## Import the Necessary Libraries

In [1]:
import numpy as np
from music21 import *
from collections import Counter
import os 
import pandas as pd
from sklearn.model_selection import train_test_split

## Initialize the Conversion of MIDI data into CSV data

In [2]:
def read_midi(file):
    print("Loading Music File:", file)
    notes = []
    midi = converter.parse(file)
    s2 = instrument.partitionByInstrument(midi)

    for part in s2.parts:
        if 'Piano' in str(part): 
            notes_to_parse = part.recurse() 
            for element in notes_to_parse:
                if isinstance(element, note.Note):
                    notes.append(str(element.pitch))
                elif isinstance(element, chord.Chord):
                    notes.append('.'.join(str(n) for n in element.normalOrder))
    return np.array(notes)

def get_sequences(notes_array, timesteps=32, future_steps=8):
    notes_ = [element for note_ in notes_array for element in note_]
    freq = dict(Counter(notes_))
    frequent_notes = [note_ for note_, count in freq.items() if count >= 50]

    new_music = []
    for notes in notes_array:
        temp = [note_ for note_ in notes if note_ in frequent_notes]
        new_music.append(temp)

    new_music = np.array(new_music, dtype=object)

    x, y = [], []
    for note_ in new_music:
        for i in range(0, len(note_) - (timesteps + future_steps), 1):
            input_ = note_[i:i + timesteps]
            output = note_[i + 1 + timesteps:i + 1 + timesteps + future_steps]
            x.append(input_)
            y.append(output)
            
    x = np.array(x)
    y = np.array(y)

    unique_notes = list(set(x.ravel()))
    note_to_int = dict((note_, number) for number, note_ in enumerate(unique_notes))
    x_seq = [[note_to_int[j] for j in i] for i in x]
    y_seq = [[note_to_int[j] for j in i] for i in y]

    return np.array(x_seq), np.array(y_seq), unique_notes, note_to_int

## Start the Conversion Process

In [3]:
train_size = 0.7
validation_size = 0.2 
test_size = 0.1
path = '/home/admin1/Desktop/LSTM/schubert/'

files = [i for i in os.listdir(path) if i.endswith(".mid")]
notes_array = np.array([read_midi(os.path.join(path, i)) for i in files], dtype=object)

x_seq, y_seq, unique_notes, note_to_int = get_sequences(notes_array, timesteps=32, future_steps=8)

X_train, X_rem, y_train, y_rem = train_test_split(x_seq, y_seq, train_size=train_size)
X_valid, X_test, y_valid, y_test = train_test_split(X_rem, y_rem, test_size=float(test_size) / float(validation_size + test_size))

train_dataset = [{'x_tr': X_train[i], 'future': y_train[i]} for i in range(len(X_train))]
validation_dataset = [{'x_val': X_valid[i], 'future': y_valid[i]} for i in range(len(X_valid))]
test_dataset = [{'x_test': X_test[i], 'future': y_test[i]} for i in range(len(X_test))]

df_tr = pd.DataFrame(train_dataset)
df_val = pd.DataFrame(validation_dataset)
df_test = pd.DataFrame(test_dataset)
df_notes = pd.DataFrame(unique_notes)

# Specify the path for the folder
folder_path = '/home/admin1/Desktop/LSTM/csv_dataset/'

# Create the folder if it doesn't exist
if not os.path.exists(folder_path):
    os.makedirs(folder_path)

# Save the datasets into CSV files in the specified folder
df_tr.to_csv(os.path.join(folder_path, 'trainset.csv'), index=False)
df_val.to_csv(os.path.join(folder_path, 'validationset.csv'), index=False)
df_test.to_csv(os.path.join(folder_path, 'testset.csv'), index=False)
df_notes.to_csv(os.path.join(folder_path, 'notes.csv'), index=False)

Loading Music File: /home/admin1/Desktop/LSTM/schubert/schubert_D935_2.mid




Loading Music File: /home/admin1/Desktop/LSTM/schubert/schuim-3.mid
Loading Music File: /home/admin1/Desktop/LSTM/schubert/schubert_D850_1.mid




Loading Music File: /home/admin1/Desktop/LSTM/schubert/schuim-4.mid




Loading Music File: /home/admin1/Desktop/LSTM/schubert/schuim-2.mid




Loading Music File: /home/admin1/Desktop/LSTM/schubert/schubert_D850_2.mid




Loading Music File: /home/admin1/Desktop/LSTM/schubert/schu_143_3.mid




Loading Music File: /home/admin1/Desktop/LSTM/schubert/schub_d960_2.mid




Loading Music File: /home/admin1/Desktop/LSTM/schubert/schub_d960_1.mid




Loading Music File: /home/admin1/Desktop/LSTM/schubert/schumm-4.mid
Loading Music File: /home/admin1/Desktop/LSTM/schubert/schu_143_1.mid




Loading Music File: /home/admin1/Desktop/LSTM/schubert/schumm-5.mid
Loading Music File: /home/admin1/Desktop/LSTM/schubert/schub_d960_3.mid
Loading Music File: /home/admin1/Desktop/LSTM/schubert/schubert_D850_3.mid
Loading Music File: /home/admin1/Desktop/LSTM/schubert/schubert_D850_4.mid
Loading Music File: /home/admin1/Desktop/LSTM/schubert/schub_d760_4.mid
Loading Music File: /home/admin1/Desktop/LSTM/schubert/schub_d760_2.mid
Loading Music File: /home/admin1/Desktop/LSTM/schubert/schub_d760_1.mid
Loading Music File: /home/admin1/Desktop/LSTM/schubert/schumm-1.mid




Loading Music File: /home/admin1/Desktop/LSTM/schubert/schumm-2.mid
Loading Music File: /home/admin1/Desktop/LSTM/schubert/schub_d760_3.mid
Loading Music File: /home/admin1/Desktop/LSTM/schubert/schub_d960_4.mid
Loading Music File: /home/admin1/Desktop/LSTM/schubert/schubert_D935_1.mid
Loading Music File: /home/admin1/Desktop/LSTM/schubert/schubert_D935_4.mid
Loading Music File: /home/admin1/Desktop/LSTM/schubert/schumm-3.mid
Loading Music File: /home/admin1/Desktop/LSTM/schubert/schubert_D935_3.mid
Loading Music File: /home/admin1/Desktop/LSTM/schubert/schu_143_2.mid
Loading Music File: /home/admin1/Desktop/LSTM/schubert/schumm-6.mid
Loading Music File: /home/admin1/Desktop/LSTM/schubert/schuim-1.mid


## Display the created CSV files

In [4]:
df_tr

Unnamed: 0,x_tr,future
0,"[144, 73, 136, 29, 38, 5, 94, 141, 10, 38, 56,...","[136, 29, 148, 64, 132, 132, 10, 164]"
1,"[58, 79, 79, 121, 51, 37, 37, 37, 37, 37, 37, ...","[37, 46, 2, 37, 37, 37, 126, 37]"
2,"[93, 52, 155, 127, 129, 181, 129, 28, 131, 92,...","[71, 49, 118, 132, 103, 29, 159, 60]"
3,"[121, 24, 46, 25, 76, 6, 56, 69, 69, 147, 69, ...","[62, 10, 60, 19, 69, 21, 114, 122]"
4,"[97, 176, 180, 45, 86, 135, 176, 2, 180, 135, ...","[176, 176, 43, 176, 43, 176, 43, 135]"
...,...,...
54868,"[37, 143, 98, 53, 25, 143, 25, 143, 98, 148, 2...","[42, 144, 50, 47, 144, 155, 47, 144]"
54869,"[2, 76, 171, 98, 14, 110, 117, 9, 110, 67, 67,...","[76, 171, 171, 76, 171, 126, 171, 56]"
54870,"[42, 126, 169, 126, 42, 56, 176, 37, 42, 126, ...","[126, 93, 11, 162, 18, 11, 145, 171]"
54871,"[21, 69, 92, 21, 69, 37, 142, 69, 17, 142, 92,...","[89, 21, 179, 28, 80, 179, 52, 80]"


In [5]:
df_val

Unnamed: 0,x_val,future
0,"[57, 95, 9, 95, 43, 180, 95, 43, 180, 137, 9, ...","[117, 56, 76, 160, 22, 99, 171, 126]"
1,"[92, 4, 42, 70, 28, 50, 11, 70, 50, 28, 145, 4...","[42, 145, 92, 145, 42, 46, 145, 56]"
2,"[97, 92, 99, 17, 92, 99, 17, 89, 92, 37, 99, 1...","[48, 99, 105, 17, 152, 92, 48, 99]"
3,"[46, 38, 56, 172, 144, 46, 88, 38, 36, 142, 38...","[80, 87, 168, 80, 168, 80, 25, 168]"
4,"[143, 10, 143, 10, 143, 164, 10, 143, 10, 110,...","[135, 177, 135, 177, 135, 177, 135, 177]"
...,...,...
15673,"[165, 46, 124, 75, 158, 38, 165, 46, 124, 75, ...","[46, 21, 38, 46, 21, 38, 46, 38]"
15674,"[171, 171, 68, 68, 68, 68, 68, 68, 68, 68, 68,...","[64, 64, 161, 64, 64, 173, 159, 29]"
15675,"[126, 168, 113, 113, 123, 165, 172, 126, 80, 1...","[22, 48, 48, 127, 152, 165, 126, 172]"
15676,"[180, 42, 123, 99, 97, 92, 113, 160, 117, 37, ...","[180, 57, 89, 44, 37, 100, 160, 117]"


In [6]:
df_test

Unnamed: 0,x_test,future
0,"[126, 38, 145, 145, 15, 145, 15, 46, 50, 162, ...","[162, 68, 171, 162, 145, 15, 56, 136]"
1,"[4, 30, 138, 21, 51, 116, 30, 136, 39, 30, 131...","[26, 116, 39, 138, 51, 4, 116, 138]"
2,"[149, 51, 103, 115, 115, 103, 159, 115, 115, 1...","[180, 117, 180, 103, 123, 160, 42, 170]"
3,"[109, 131, 109, 131, 109, 131, 109, 131, 16, 8...","[89, 87, 15, 44, 89, 89, 89, 89]"
4,"[90, 29, 159, 29, 49, 29, 49, 63, 165, 124, 63...","[136, 159, 10, 29, 132, 29, 49, 29]"
...,...,...
7834,"[160, 56, 113, 170, 42, 22, 123, 170, 42, 22, ...","[171, 126, 99, 169, 97, 126, 99, 170]"
7835,"[37, 15, 97, 90, 51, 51, 116, 120, 167, 58, 16...","[24, 71, 37, 99, 145, 169, 76, 38]"
7836,"[162, 47, 11, 162, 160, 11, 76, 97, 99, 17, 99...","[97, 50, 92, 58, 50, 8, 97, 99]"
7837,"[25, 97, 39, 170, 113, 47, 89, 86, 180, 37, 87...","[25, 44, 97, 89, 16, 47, 44, 89]"


In [7]:
df_notes

Unnamed: 0,0
0,0.2.6
1,9.11
2,E-5
3,0.6
4,6.10.1
...,...
177,8.0.3
178,7.10.1
179,3.9
180,E-3
