## Data Pre-processing

In [4]:
import os
import numpy as np
import pretty_midi
from music21 import converter, instrument, note, chord
import warnings

# Hiding pesky warnings about missing instrument info in MIDI files
warnings.filterwarnings('ignore', category=UserWarning, module='music21')

# Function to extract notes and chords from a MIDI file
def extract_notes_and_chords(file_path):
    notes = []
    midi = converter.parse(file_path)
    parts = instrument.partitionByInstrument(midi)
    if parts:  # file has instrument parts
        notes_to_parse = parts.parts[0].recurse()
    else:  # file has notes in a flat structure
        notes_to_parse = midi.flat.notes

    for element in notes_to_parse:
        if isinstance(element, note.Note):
            notes.append(str(element.pitch))
        elif isinstance(element, chord.Chord):
            notes.append('.'.join(str(n) for n in element.normalOrder))

    return notes

# Path to your dataset
dataset_path = 'C:/Users/Mohammad/Desktop/Final_project'

# Dictionary to hold the notes and chords for each composer
composer_notes = {'Bach': [], 'Beethoven': [], 'Chopin': [], 'Mozart': []}

# Iterate through each composer's folder
for composer in composer_notes.keys():
    folder_path = os.path.join(dataset_path, composer)
    for file in os.listdir(folder_path):
        if file.endswith('.mid'):
            file_path = os.path.join(folder_path, file)
            notes = extract_notes_and_chords(file_path)
            composer_notes[composer].extend(notes)

# Print sample notes and chords
for composer, notes in composer_notes.items():
    print(f"{composer}: {notes[:10]}...")

# Save extracted notes and chords for further use
np.save('composer_notes.npy', composer_notes)


Bach: ['F3', 'A3', 'C4', 'F4', 'A4', 'C4', 'F4', 'A4', 'F3', 'A3']...
Beethoven: ['0.3.7', 'C5', 'E-5', 'D5', '7.11.2', 'G4', '7.9.11', '0.2', 'E5', '4.7.10.0']...
Chopin: ['10.3', '10.3', '10.3', '8.0', '10.3', '8.0', '10.3', 'E-5', '10.3', '10.3']...
Mozart: ['G4', 'C5', 'C5', 'B4', 'C5', 'D5', 'D5', 'C5', 'D5', 'E5']...


## Encode Notes and Chords

In [34]:
# Flatten all notes and chords
all_notes = []
for notes in composer_notes.values():
    all_notes.extend(notes)

# Create a sorted list of unique notes and chords
unique_notes = sorted(set(all_notes))

# Create a dictionary to map notes and chords to integers
note_to_int = {note: number for number, note in enumerate(unique_notes)}

# Convert notes and chords to integers
composer_sequences = {composer: [note_to_int[note] for note in notes] for composer, notes in composer_notes.items()}

# Save mappings and sequences
np.save('note_to_int.npy', note_to_int)
np.save('composer_sequences.npy', composer_sequences)

# Print sample encoded sequences
for composer, sequence in composer_sequences.items():
    print(f"{composer}: {sequence[:10]}...")


Bach: [900, 841, 867, 901, 842, 867, 901, 842, 900, 841]...
Beethoven: [58, 868, 882, 875, 648, 914, 687, 21, 889, 466]...
Chopin: [199, 199, 199, 697, 199, 697, 199, 882, 199, 199]...
Mozart: [914, 868, 868, 854, 868, 875, 875, 868, 875, 889]...


## Create Input Sequences and Labels

In [35]:
# Define the sequence length
sequence_length = 100

# Create input sequences and labels
input_sequences = []
labels = []
label_map = {'Bach': 0, 'Beethoven': 1, 'Chopin': 2, 'Mozart': 3}

for composer, sequence in composer_sequences.items():
    for i in range(len(sequence) - sequence_length):
        # Extract the sequence of notes and chords
        input_seq = sequence[i:i + sequence_length]
        # The label is the next note/chord
        label = sequence[i + sequence_length]
        
        input_sequences.append(input_seq)
        labels.append(label_map[composer])

# Convert to numpy arrays
input_sequences = np.array(input_sequences)
labels = np.array(labels)

# Print the shape of the data
print(f"Input sequences shape: {input_sequences.shape}")
print(f"Labels shape: {labels.shape}")


Input sequences shape: (464657, 100)
Labels shape: (464657,)


## Model Building | LSTM Model

In [45]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout, Embedding

# Explicit input shape declaration
input_shape = (sequence_length, )
embedding_input = Input(shape=input_shape)

# Define the embedding layer without using input_length
embedding_layer = Embedding(input_dim=len(unique_notes), output_dim=128)(embedding_input)

# Define LSTM layers
lstm_out1 = LSTM(128, return_sequences=True)(embedding_layer)
dropout_out1 = Dropout(0.3)(lstm_out1)
lstm_out2 = LSTM(128)(dropout_out1)
dropout_out2 = Dropout(0.3)(lstm_out2)

# Define output layer
output = Dense(4, activation='softmax')(dropout_out2)

# Build and compile the model
lstm_model = Model(inputs=embedding_input, outputs=output)
lstm_model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Print the model summary
lstm_model.summary()


## Model Training

In [46]:
# Training the model
history = lstm_model.fit(input_sequences, labels, epochs=20, batch_size=64, validation_split=0.2)

# Save the model for future use
lstm_model.save('composer_lstm_model.h5')

# Plot the training and validation accuracy and loss
import matplotlib.pyplot as plt

# Plot training & validation accuracy values
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()


Epoch 1/20
[1m5809/5809[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m653s[0m 112ms/step - accuracy: 0.8164 - loss: 0.5086 - val_accuracy: 0.0186 - val_loss: 6.8116
Epoch 2/20
[1m 997/5809[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m8:00[0m 100ms/step - accuracy: 0.9793 - loss: 0.0669