# Makam pitch sequence classification with LSTM

### Preprocessing pitch files

Before proceeding, the pitch files on the CompMusic Dunya makam corpus need to be converted in the quantized pitch series encoding constructed as described in the pseudocode below.

### Library importing for file reading and preprocessing

In [None]:
import glob
import os
import numpy as np
from keras.preprocessing import sequence

### File reading

In [None]:
#quantized pitch file directory
q_read_dir = "./otmm_makam_recognition_dataset/qdata/" 
octq_read_dir = "./otmm_makam_recognition_dataset/octfold_qdata/" 

#Makam list for more efficient file searching during label retrieval
makams = ["Acemasiran", "Acemkurdi", "Bestenigar", "Beyati", "Hicaz", "Hicazkar", "Huseyni", "Huzzam", "Karcigar", "Kurdilihicazkar", "Mahur", "Muhayyer", "Neva", "Nihavent", "Rast", "Saba", "Segah", "Sultaniyegah", "Suzinak", "Ussak"]

X = [] #array holding pitch series split per line
y = [] #holds makam labels
max_length = 0
for root, dirs, files in os.walk(q_read_dir):
    for name in files:
        if '.pitch' in name:
            #retrieve label from parent of original path
            for makam in makams:
                if (os.path.isfile("./otmm_makam_recognition_dataset/data/" + makam + "/" + name[:-4] + ".pitch") == True):
                    Y.append(makam)
                    break
            with open(os.path.join(root, name)) as f:
                content = f.readlines()
                content_length = len(content)
                if (content_length > max_length):
                    max_length = content_length
                X.append(content)

### Preprocessing
Pading input sequences

In [None]:
max_seq_length = 10000

#sequence padding
X = sequence.pad_sequences(pitch_seqs, maxlen=max_seq_length)

### Library importing for deep learning

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers.embeddings import Embedding
from sklearn.model_selection import train_test_split

### Train - Test split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

### Building the LSTM

In [None]:
#Word embedding
embedding_vector_len = 8 #pitch value of length 5, comma, significance value, newline character

#Network topology
model = Sequential()

model.add(Embedding(vocabulary_size, embedding_vector_len, input_length=max_seq_length))
model.add(Dropout(0.2))
model.add(LSTM(100))
model.add(Dropout(0.2))
model.add(Dense(20, activation='sigmoid'))

#Compile model
model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
model.summary()