In [1]:
!pip install mido --quiet
import mido

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/50.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━[0m [32m41.0/50.3 kB[0m [31m1.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.3/50.3 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import glob

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import LabelEncoder
import os
import mido
import string
from tqdm import tqdm

np.random.seed(42)  # makes the randomness deterministic

%matplotlib inline
# todo: try %matplotlib widget
plt.rcParams['figure.figsize'] = (15, 5)
plt.rcParams['axes.grid'] = True

In [None]:
import string
def msg2dict(msg):
    result = dict()
    if 'note_on' in msg:
        on_ = True
    elif 'note_off' in msg:
        on_ = False
    else:
        on_ = None
    result['time'] = int(msg[msg.rfind('time'):].split(' ')[0].split('=')[1].translate(
        str.maketrans({a: None for a in string.punctuation})))

    if on_ is not None:
        for k in ['note', 'velocity']:
            result[k] = int(msg[msg.rfind(k):].split(' ')[0].split('=')[1].translate(
                str.maketrans({a: None for a in string.punctuation})))
    return [result, on_]

def switch_note(last_state, note, velocity, on_=True):
    # piano has 88 notes, corresponding to note id 21 to 108, any note out of this range will be ignored
    result = [0] * 88 if last_state is None else last_state.copy()
    if 21 <= note <= 108:
        result[note-21] = velocity if on_ else 0
    return result

def get_new_state(new_msg, last_state):
    new_msg, on_ = msg2dict(str(new_msg))
    new_state = switch_note(last_state, note=new_msg['note'], velocity=new_msg['velocity'], on_=on_) if on_ is not None else last_state
    return [new_state, new_msg['time']]

def track2seq(track):
    # piano has 88 notes, corresponding to note id 21 to 108, any note out of the id range will be ignored
    result = []
    last_state, last_time = get_new_state(str(track[0]), [0]*88)
    for i in range(1, len(track)):
        new_state, new_time = get_new_state(track[i], last_state)
        if new_time > 0:
            result += [last_state]*new_time
        last_state, last_time = new_state, new_time
    return result

def mid2arry(mid, min_msg_pct=0.1):
    tracks_len = [len(tr) for tr in mid.tracks]
    min_n_msg = max(tracks_len) * min_msg_pct
    # convert each track to nested list
    all_arys = []
    for i in range(len(mid.tracks)):
        if len(mid.tracks[i]) > min_n_msg:
            ary_i = track2seq(mid.tracks[i])
            all_arys.append(ary_i)
    # make all nested list the same length
    max_len = max([len(ary) for ary in all_arys])
    for i in range(len(all_arys)):
        if len(all_arys[i]) < max_len:
            all_arys[i] += [[0] * 88] * (max_len - len(all_arys[i]))
    all_arys = np.array(all_arys)
    all_arys = all_arys.max(axis=0)
    # trim: remove consecutive 0s in the beginning and at the end
    sums = all_arys.sum(axis=1)
    ends = np.where(sums > 0)[0]
    return all_arys[min(ends): max(ends)]

In [None]:
# The wrapper function to load a MIDI file and extract features
def extract_features(file_path):
    mid = mido.MidiFile(file_path, clip=True)

    return mid2arry(mid)

In [None]:
# Specify your directory containing MIDI folders
main_dir = '/content/drive/MyDrive/Colab Notebooks/Data/Composer_Dataset/NN_midi_files_extended/train/'

# Prepare lists to store filenames and lengths
filenamelist = []
lengths = []

# Walk through all subdirectories
for dirpath, dirnames, filenames in os.walk(main_dir):
    for filename in tqdm(filenames):
      if filename.endswith('.mid'):
            full_file_path = os.path.join(dirpath, filename)  # get full file path
            mid = mido.MidiFile(full_file_path, clip=True)
            # Compute the total number of messages in all tracks
            total_msgs = sum(len(track) for track in mid.tracks)
            filenamelist.append(filename)
            lengths.append(total_msgs)

# Create a DataFrame
df = pd.DataFrame({
    'filename': filenamelist,
    'length': lengths
})

100%|██████████| 1/1 [00:00<00:00, 321.67it/s]
100%|██████████| 42/42 [00:18<00:00,  2.28it/s]
100%|██████████| 43/43 [00:14<00:00,  2.98it/s]
100%|██████████| 39/39 [00:21<00:00,  1.82it/s]
100%|██████████| 42/42 [00:26<00:00,  1.60it/s]
100%|██████████| 42/42 [00:17<00:00,  2.45it/s]
100%|██████████| 43/43 [00:12<00:00,  3.32it/s]
100%|██████████| 42/42 [00:25<00:00,  1.62it/s]
100%|██████████| 43/43 [00:24<00:00,  1.77it/s]
100%|██████████| 42/42 [00:16<00:00,  2.53it/s]


In [None]:
df

Unnamed: 0,filename,length
0,bartok414.mid,755
1,bartok422.mid,2820
2,bartok425.mid,655
3,bartok403.mid,363
4,bartok407.mid,1200
...,...,...
364,chopin052.mid,5481
365,chopin054.mid,5067
366,chopin067.mid,2902
367,chopin064.mid,2465


In [None]:
df.describe()

Unnamed: 0,length
count,369.0
mean,9062.140921
std,11852.857223
min,228.0
25%,2174.0
50%,4802.0
75%,10627.0
max,92747.0


In [None]:
min_index = df['length'].idxmin()
min_row = df.loc[min_index]
print(min_row)

filename    bartok396.mid
length                228
Name: 6, dtype: object


In [None]:
max_index = df['length'].idxmax()
max_row = df.loc[max_index]
print(max_row)

filename    mendelssohn272.mid
length                   92747
Name: 139, dtype: object


In [None]:
def trim_midi_to_length(midi_file, desired_length, output_file):
    # Load MIDI file
    mid = mido.MidiFile(midi_file)

    # Create a new MIDI file to store the trimmed data
    trimmed_midi = mido.MidiFile()

    for i, track in enumerate(mid.tracks):
        trimmed_track = mido.MidiTrack()
        time_passed = 0
        for msg in track:
            # Update time passed
            time_passed += msg.time
            # If the time passed has exceeded the desired length, stop adding events
            if time_passed > desired_length * mid.ticks_per_beat:
                break
            # Add the message to the new track
            trimmed_track.append(msg)
        # Add the new track to the new MIDI file
        trimmed_midi.tracks.append(trimmed_track)

    # Save the new MIDI file
    trimmed_midi.save(output_file)

In [None]:
trim_midi_to_length('/content/drive/MyDrive/Colab Notebooks/Data/Composer_Dataset/NN_midi_files_extended/train/bartok/bartok396.mid', 20, '/content/drive/MyDrive/Colab Notebooks/Data/Composer_Dataset/Trimmed/train/bartok/bartok396_trim.mid')

In [None]:
mid1 = mido.MidiFile('/content/drive/MyDrive/Colab Notebooks/Data/Composer_Dataset/Trimmed/train/bartok/bartok396_trim.mid', clip=True)

In [None]:
mid_array1 = mid2arry(mid1)
bytes = mid_array1.nbytes
kilobytes = bytes / 1024
megabytes = kilobytes / 1024
gigabytes = megabytes / 1024
print(megabytes, 'MB')

6.35198974609375 MB


In [None]:
mid_array1.shape

(9461, 88)

In [None]:
trim_midi_to_length('/content/drive/MyDrive/Colab Notebooks/Data/Composer_Dataset/NN_midi_files_extended/train/mendelssohn/mendelssohn276.mid', 20, '/content/drive/MyDrive/Colab Notebooks/Data/Composer_Dataset/Trimmed/train/mendelssohn/mendelssohn276_trim.mid')

In [None]:
mid2 = mido.MidiFile('/content/drive/MyDrive/Colab Notebooks/Data/Composer_Dataset/Trimmed/train/mendelssohn/mendelssohn276_trim.mid', clip=True)
mid_array2 = mid2arry(mid2)
bytes = mid_array2.nbytes
kilobytes = bytes / 1024
megabytes = kilobytes / 1024
gigabytes = megabytes / 1024
print(megabytes, 'MB')

1.4354248046875 MB


In [None]:
mid_array2.shape

(2138, 88)

In [None]:
def trim_midi_to_length_rf(midi_file, desired_length):
    # Load MIDI file
    mid = mido.MidiFile(midi_file)

    # Create a new MIDI file to store the trimmed data
    trimmed_midi = mido.MidiFile()

    for i, track in enumerate(mid.tracks):
        trimmed_track = mido.MidiTrack()
        time_passed = 0
        for msg in track:
            # Update time passed
            time_passed += msg.time
            # If the time passed has exceeded the desired length, stop adding events
            if time_passed > desired_length * mid.ticks_per_beat:
                break
            # Add the message to the new track
            trimmed_track.append(msg)
        # Add the new track to the new MIDI file
        trimmed_midi.tracks.append(trimmed_track)

    # Return the trimmed MIDI file
    return trimmed_midi

In [None]:
# The wrapper function to load a MIDI file and extract features
def extract_features1(midi_file):
    return mid2arry(midi_file)

In [None]:
# Define counters
file_count = 0

save_dir = "/content/drive/MyDrive/Colab Notebooks/Data/Composer_Dataset/Trimmed/train/"

# Loop through the files in your train directory
for dirpath, dirnames, filenames in os.walk('/content/drive/MyDrive/Colab Notebooks/Data/Composer_Dataset/NN_midi_files_extended/train/'):
    for filename in tqdm(filenames, desc=os.path.basename(dirpath)):
        if filename.endswith('.mid'):
            full_file_path = os.path.join(dirpath, filename)  # get full file path

            try:
                midi_features = extract_features1(trim_midi_to_length_rf(full_file_path, 15))

                # Get the composer's name from the directory name
                composer = os.path.basename(dirpath)

                # Save this file's data and label
                np.save(f'{save_dir}/{composer}/data_{file_count}.npy', np.array(midi_features))
                np.save(f'{save_dir}/{composer}/labels_{file_count}.npy', np.array(composer))

                file_count += 1

            except Exception as e:
                print(f"Skipping file {full_file_path} due to error: {e}")
                continue

100%|██████████| 1/1 [00:00<00:00, 10356.31it/s]
bartok: 100%|██████████| 43/43 [01:22<00:00,  1.92s/it]
bach: 100%|██████████| 43/43 [01:04<00:00,  1.49s/it]
schumann: 100%|██████████| 39/39 [01:12<00:00,  1.87s/it]
mendelssohn: 100%|██████████| 44/44 [01:21<00:00,  1.86s/it]
handel: 100%|██████████| 42/42 [01:16<00:00,  1.83s/it]
byrd: 100%|██████████| 43/43 [01:02<00:00,  1.45s/it]
mozart:  24%|██▍       | 10/42 [00:21<00:57,  1.79s/it]

Skipping file /content/drive/MyDrive/Colab Notebooks/Data/Composer_Dataset/NN_midi_files_extended/train/mozart/mozart008.mid due to error: min() arg is an empty sequence


mozart: 100%|██████████| 42/42 [01:23<00:00,  1.98s/it]
hummel:  23%|██▎       | 10/43 [00:16<00:59,  1.79s/it]

Skipping file /content/drive/MyDrive/Colab Notebooks/Data/Composer_Dataset/NN_midi_files_extended/train/hummel/hummel338.mid due to error: min() arg is an empty sequence


hummel:  88%|████████▊ | 38/43 [01:08<00:09,  2.00s/it]

Skipping file /content/drive/MyDrive/Colab Notebooks/Data/Composer_Dataset/NN_midi_files_extended/train/hummel/hummel340.mid due to error: min() arg is an empty sequence


hummel: 100%|██████████| 43/43 [01:19<00:00,  1.85s/it]


Skipping file /content/drive/MyDrive/Colab Notebooks/Data/Composer_Dataset/NN_midi_files_extended/train/hummel/hummel300.mid due to error: min() arg is an empty sequence


chopin: 100%|██████████| 42/42 [01:03<00:00,  1.51s/it]


In [3]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
load_dir = "/content/drive/MyDrive/Colab Notebooks/Data/Composer_Dataset/Trimmed/train/"

combined_features = []
combined_labels = []

# Loop through the files in your saved directory
for dirpath, dirnames, filenames in tqdm(os.walk(load_dir), desc="Loading arrays"):
    for filename in filenames:
        if filename.startswith('data_') and filename.endswith('.npy'):
            # Load the data array and append it to the combined_features list
            data_array = np.load(os.path.join(dirpath, filename))
            combined_features.append(data_array)
            number = filename.split('_')[1].split('.')[0]
            labelsname = f'labels_{number}.npy'
            # Load the labels array and append it to the combined_labels list
            labels_array = np.load(os.path.join(dirpath, labelsname))
            combined_labels.append(labels_array)

# Get maximum length of the sequences
max_len = max([len(seq) for seq in combined_features])
# Pad each sequence to the maximum length
padded_features = pad_sequences(combined_features, maxlen=max_len, padding='post', dtype='float32')

Loading arrays: 10it [05:38, 33.83s/it]


In [4]:
padded_features.shape

(373, 18290, 88)

In [5]:
from sklearn.preprocessing import MinMaxScaler
# Reshape it into 2D
array_2d = padded_features.reshape(-1, padded_features.shape[-1])

# Apply MinMaxScaler
scaler = MinMaxScaler()
scaled_array_2d = scaler.fit_transform(array_2d)

# Reshape it back into 3D
padded_features = scaled_array_2d.reshape(padded_features.shape)

In [6]:
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical

# Initialize the LabelEncoder
le = LabelEncoder()

# Fit the LabelEncoder on your labels, and then transform the labels
encoded_labels = le.fit_transform(combined_labels)

# One-hot encode the integer labels
one_hot_labels = to_categorical(encoded_labels)

In [7]:
one_hot_labels.shape

(373, 9)

In [9]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, LSTM, MaxPooling1D, Dense, Dropout, TimeDistributed, Flatten, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping
# Define your model
model = Sequential()

# Convolutional Layer
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(18290, 88)))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.2))

# Convolutional Layer 2
model.add(Conv1D(filters=128, kernel_size=3, activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.2))

# LSTM Layers
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(64, return_sequences=False))
model.add(Dropout(0.2))

# Dense Layer
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))

# Output Layer
# Replace 'num_classes' with the number of composers in your dataset
model.add(Dense(9, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Define a callback to prevent overfitting
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5)

# Train the model
history = model.fit(padded_features, one_hot_labels, epochs=50, batch_size=32, validation_split=0.2, callbacks=[es])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 6: early stopping


In [9]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping

# Define your model
model = Sequential()

# Convolutional Layer 1
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(18290, 88)))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.2))

# Convolutional Layer 2
model.add(Conv1D(filters=128, kernel_size=3, activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.2))

# Flatten Layer
model.add(Flatten())

# Dense Layer 1
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))

# Output Layer
model.add(Dense(9, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Define a callback to prevent overfitting
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5)

# Train the model
history = model.fit(padded_features, one_hot_labels, epochs=50, batch_size=32, validation_split=0.2, callbacks=[es])



Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 6: early stopping


In [10]:
model.save('/content/drive/MyDrive/Colab Notebooks/models/MSAAI511_FP_LSTM_model.h5') # you can provide your desired path and filename here
print("Model saved successfully!")

Model saved successfully!


In [10]:
from sklearn.metrics import classification_report

# Suppose X_test is your test data
y_pred_probs = model.predict(padded_features)

# The model's predict function will return a matrix of probabilities.
# To convert these probabilities into concrete class predictions,
# you can take the class with the highest probability for each sample as the predicted class:
y_pred = np.argmax(y_pred_probs, axis=1)

# Suppose y_test are the true labels of the test data
y_true = np.argmax(one_hot_labels, axis=1)  # if y_test is one-hot encoded

print(classification_report(y_true, y_pred))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        42
           1       0.11      1.00      0.20        42
           2       0.00      0.00      0.00        42
           3       0.00      0.00      0.00        43
           4       0.00      0.00      0.00        41
           5       0.00      0.00      0.00        41
           6       0.00      0.00      0.00        43
           7       0.00      0.00      0.00        41
           8       0.00      0.00      0.00        38

    accuracy                           0.11       373
   macro avg       0.01      0.11      0.02       373
weighted avg       0.01      0.11      0.02       373



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [11]:
y_true

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 6,
       6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
       6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 8, 8, 8, 8,
       8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
       8, 8, 8, 8, 8, 8, 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,

In [12]:
y_pred

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,