<a href="https://colab.research.google.com/github/ishikapachori/music-generation-using-rnn/blob/main/Music_generation_using_RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
import numpy as np
import glob
import os
from music21 import converter, instrument, note, chord, stream
from tensorflow.keras.layers import LSTM, Dense, Dropout, Activation, BatchNormalization, Input
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
import matplotlib.pyplot as plt
import pickle
import random
from tqdm import tqdm
from google.colab import files

# Clear GPU memory
tf.keras.backend.clear_session()

# Step 1: Data Preparation
def extract_notes_from_midi(file_path):
    """Extract all notes and chords from a MIDI file."""
    try:
        midi = converter.parse(file_path)
        notes_to_parse = None

        # Handle multi-instrument files by selecting only one instrument
        try:
            s2 = instrument.partitionByInstrument(midi)
            notes_to_parse = s2.parts[0].recurse()
        except:
            notes_to_parse = midi.flat.notes

        notes = []
        for element in notes_to_parse:
            if isinstance(element, note.Note):
                notes.append(str(element.pitch))
            elif isinstance(element, chord.Chord):
                notes.append('.'.join(str(n) for n in element.normalOrder))

        return notes
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return []

def prepare_sequences(notes, sequence_length=30):  # Reduced sequence length
    """Prepare the sequences used for the model."""
    # Get all pitch names
    pitchnames = sorted(set(notes))

    # Create a dictionary to map pitches to integers
    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

    network_input = []
    network_output = []

    # Create input sequences and the corresponding outputs
    for i in range(0, len(notes) - sequence_length):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
        network_output.append(note_to_int[sequence_out])

    # Reshape the input into a format compatible with LSTM layers
    n_patterns = len(network_input)
    n_vocab = len(pitchnames)

    network_input = np.reshape(network_input, (n_patterns, sequence_length))

    # Normalize input
    network_input = network_input / float(n_vocab)

    # One-hot encode the output
    network_output = tf.keras.utils.to_categorical(network_output, num_classes=n_vocab)

    return (network_input, network_output, n_vocab, pitchnames, note_to_int)

# Step 2: Build the RNN Model
def build_model(network_input, n_vocab, sequence_length=30):  # Reduced sequence length
    """Build the LSTM model."""
    model = tf.keras.Sequential([
        Input(shape=(sequence_length, 1)),
        LSTM(256, return_sequences=True, recurrent_dropout=0.2),
        BatchNormalization(),
        Dropout(0.5),
        LSTM(256),
        BatchNormalization(),
        Dropout(0.5),
        Dense(128, kernel_regularizer=l2(0.01)),
        Activation('relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(n_vocab),
        Activation('softmax')
    ])

    optimizer = Adam(learning_rate=0.0005)  # Adjusted learning rate
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    return model

# Step 3: Train the Model
def train_model(model, network_input, network_output, epochs=300, batch_size=32):  # Increased epochs
    """Train the neural network."""
    filepath = "weights-improvement-{epoch:02d}-{accuracy:.4f}.keras"
    checkpoint = ModelCheckpoint(
        filepath,
        monitor='accuracy',
        verbose=1,
        save_best_only=True,
        mode='max'
    )
    callbacks_list = [checkpoint]

    # Reshape input to be [samples, time steps, features]
    network_input = np.reshape(network_input, (len(network_input), network_input.shape[1], 1))

    history = model.fit(
        network_input,
        network_output,
        epochs=epochs,
        batch_size=batch_size,
        callbacks=callbacks_list,
        validation_split=0.2
    )

    return model, history

# Step 4: Generate Music
def generate_notes(model, network_input, pitchnames, note_to_int, n_vocab, num_notes=500, temperature=1.0):
    """Generate notes using the trained model."""
    # Pick a random sequence from the input as a starting point for the prediction
    start = np.random.randint(0, len(network_input)-1)
    pattern = network_input[start]
    prediction_output = []

    # Generate notes
    for note_index in tqdm(range(num_notes)):
        # Reshape the pattern to match the input shape of the model
        x = np.reshape(pattern, (1, len(pattern), 1))
        x = x / float(n_vocab)

        # Make a prediction
        prediction = model.predict(x, verbose=0)[0]

        # Apply temperature to adjust the randomness of the prediction
        prediction = np.log(prediction + 1e-10) / temperature
        exp_preds = np.exp(prediction)
        prediction = exp_preds / np.sum(exp_preds)

        # Convert the prediction into an integer index
        index = np.random.choice(range(len(prediction)), p=prediction)

        # Map the index to the actual note
        result = pitchnames[index]
        prediction_output.append(result)

        # Update pattern by removing the first element and adding the predicted index
        pattern = np.append(pattern[1:], index / float(n_vocab))

    return prediction_output

def create_midi(prediction_output, filename="generated_music.mid"):
    """Convert the predicted notes into a MIDI file."""
    offset = 0
    output_notes = []

    # Create note and chord objects based on the values generated
    for pattern in prediction_output:
        # Pattern is a chord
        if ('.' in pattern) or pattern.isdigit():
            notes_in_chord = pattern.split('.')
            notes = []
            for current_note in notes_in_chord:
                new_note = note.Note(int(current_note))
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
        # Pattern is a note
        else:
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)

        # Increase offset each iteration so notes don't stack
        offset += 0.5

    midi_stream = stream.Stream(output_notes)
    midi_stream.write('midi', fp=filename)
    print(f"MIDI file saved at: {os.path.abspath(filename)}")  # Debug statement

def evaluate_model(model, test_input, test_output):
    """Evaluate model accuracy on test data."""
    test_input = np.reshape(test_input, (len(test_input), test_input.shape[1], 1))
    test_loss, test_accuracy = model.evaluate(test_input, test_output)
    return test_loss, test_accuracy

def plot_training_history(history):
    """Plot the training history."""
    plt.figure(figsize=(12, 5))

    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='lower right')

    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper right')

    plt.tight_layout()
    plt.savefig('training_history.png')
    plt.show()

# Main function to run the entire pipeline
def main():
    # Step 1: Get all MIDI files
    midi_files = glob.glob("midi_songs/*.mid")

    if not midi_files:
        print("No MIDI files found. Please place MIDI files in a directory named 'midi_songs'")
        return

    # Extract notes from MIDI files
    notes = []
    for file in tqdm(midi_files, desc="Processing MIDI files"):
        notes_from_file = extract_notes_from_midi(file)
        notes.extend(notes_from_file)

    # Save notes to file for later use
    os.makedirs("data", exist_ok=True)
    with open('data/notes', 'wb') as filepath:
        pickle.dump(notes, filepath)

    # Step 2: Prepare sequences
    sequence_length = 30  # Reduced sequence length
    network_input, network_output, n_vocab, pitchnames, note_to_int = prepare_sequences(notes, sequence_length)

    # Save pitch names and note to int mapping for later use
    with open('data/pitchnames', 'wb') as filepath:
        pickle.dump(pitchnames, filepath)
    with open('data/note_to_int', 'wb') as filepath:
        pickle.dump(note_to_int, filepath)

    # Step 3: Build the model
    model = build_model(network_input, n_vocab, sequence_length)
    model.summary()

    # Step 4: Train the model
    model, history = train_model(model, network_input, network_output, epochs=300, batch_size=32)  # Increased epochs

    # Save the model
    model.save('trained_model.keras')

    # Plot training history
    plot_training_history(history)

    # Evaluate model
    # Split off test data
    test_size = int(len(network_input) * 0.2)
    test_input = network_input[-test_size:]
    test_output = network_output[-test_size:]

    test_loss, test_accuracy = evaluate_model(model, test_input, test_output)
    print(f"Test accuracy: {test_accuracy*100:.2f}%")

    # Generate music unconditionally
    print("Generating music...")
    prediction_output = generate_notes(model, network_input, pitchnames, note_to_int, n_vocab, num_notes=500)
    create_midi(prediction_output, "generated_music.mid")
    print("Music generated and saved as 'generated_music.mid'")

    # Provide download link for the generated MIDI file
    print("Download the generated music file:")
    files.download("generated_music.mid")

if __name__ == "__main__":
    # Create necessary directories
    os.makedirs("midi_songs", exist_ok=True)
    os.makedirs("data", exist_ok=True)

    main()

Processing MIDI files: 100%|██████████| 3/3 [00:01<00:00,  1.84it/s]


Epoch 1/300
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 260ms/step - accuracy: 0.0108 - loss: 7.0706
Epoch 1: accuracy improved from -inf to 0.02609, saving model to weights-improvement-01-0.0261.keras
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 345ms/step - accuracy: 0.0117 - loss: 7.0579 - val_accuracy: 0.0000e+00 - val_loss: 5.9127
Epoch 2/300
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 267ms/step - accuracy: 0.0893 - loss: 6.0395
Epoch 2: accuracy improved from 0.02609 to 0.07609, saving model to weights-improvement-02-0.0761.keras
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 325ms/step - accuracy: 0.0885 - loss: 6.0438 - val_accuracy: 0.0000e+00 - val_loss: 5.8818
Epoch 3/300
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 282ms/step - accuracy: 0.0765 - loss: 5.8684
Epoch 3: accuracy improved from 0.07609 to 0.07826, saving model to weights-improvement-03-0.0783.keras
[1m15/15[0m [3