<a href="https://colab.research.google.com/github/dhanaKankanala/Music_Generation/blob/main/GRU_Music_Generation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

pip install --upgrade music21

Collecting music21
  Downloading music21-9.5.0-py3-none-any.whl.metadata (5.1 kB)
Downloading music21-9.5.0-py3-none-any.whl (20.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.1/20.1 MB[0m [31m22.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: music21
  Attempting uninstall: music21
    Found existing installation: music21 9.3.0
    Uninstalling music21-9.3.0:
      Successfully uninstalled music21-9.3.0
Successfully installed music21-9.5.0


In [None]:
#DataFlair Automatic Music Generation Project
#load all the libraries
from music21 import *
import glob
import os
from tqdm import tqdm
import numpy as np
import random
from tensorflow.keras.layers import LSTM,Dense,Input,Dropout, GRU,BatchNormalization
from tensorflow.keras.models import Sequential,Model,load_model
from keras.regularizers import l2
from sklearn.model_selection import train_test_split

In [None]:
from google.colab import files
uploaded = files.upload()
#Upload the raw zip datafile(musicgenerationdata.zip)

Saving musicgenerationdata.zip to musicgenerationdata (1).zip


In [None]:
import zipfile
import os

zip_path = "/content/midi_data/musicgenerationdata.zip"  # Update with your uploaded file's name
extract_path = "/content/midi_data"  # Folder to extract files

# Extract ZIP file
os.makedirs(extract_path, exist_ok=True)
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("Files extracted to:", extract_path)


Files extracted to: /content/midi_data


In [None]:
DATA_DIR = '/content/midi_data/musicgenerationdata'

In [None]:
def read_files(file):
  notes=[]
  notes_to_parse=None
  #parse the midi file
  midi=converter.parse(file)
  #seperate all instruments from the file
  instrmt=instrument.partitionByInstrument(midi)

  for part in instrmt.parts:
  #fetch data only of Piano instrument
    if 'Piano' in str(part):
      notes_to_parse=part.recurse()

    #iterate over all the parts of sub stream elements
    #check if element's type is Note or chord
    #if it is chord split them into notes
    for element in notes_to_parse:
      if type(element)==note.Note:
        notes.append(str(element.pitch))
      elif type(element)==chord.Chord:
        notes.append('.'.join(f"{note.Note(n).pitch.pitchClass}{note.Note(n).octave}" for n in element.normalOrder))
      elif isinstance(element, note.Rest):
        notes.append('Rest')

  #return the list of notes
  return notes

#retrieve paths recursively from inside the directories/files
file_path=["schubert","chopin","bach","balakir","beeth","borodin","brahms","burgm","debussy","granados","grieg","haydn","liszt","mendelssohn","mozart","muss","schumann","tschai"]
for file in range(len(file_path)):
  print("files",os.path.join(DATA_DIR,'All Midi Files/'+file_path[file]+'/*.mid'))
  all_files=glob.glob(os.path.join(DATA_DIR,'All Midi Files/'+file_path[file]+'/*.mid'),recursive=True)
# all_files = glob.glob(os.path.join(DATA_DIR, '**', '*.mid'), recursive=True) # Use this line if you want to train on entire directory

#reading each midi file
notes_array = [read_files(i) for i in tqdm(all_files,position=0,leave=True)]
# notes_array = np.array([np.array(seq, dtype=object) for seq in notes_array], dtype=object)

files /content/midi_data/musicgenerationdata/All Midi Files/schubert/*.mid
files /content/midi_data/musicgenerationdata/All Midi Files/chopin/*.mid
files /content/midi_data/musicgenerationdata/All Midi Files/bach/*.mid
files /content/midi_data/musicgenerationdata/All Midi Files/balakir/*.mid
files /content/midi_data/musicgenerationdata/All Midi Files/beeth/*.mid
files /content/midi_data/musicgenerationdata/All Midi Files/borodin/*.mid
files /content/midi_data/musicgenerationdata/All Midi Files/brahms/*.mid
files /content/midi_data/musicgenerationdata/All Midi Files/burgm/*.mid
files /content/midi_data/musicgenerationdata/All Midi Files/debussy/*.mid
files /content/midi_data/musicgenerationdata/All Midi Files/granados/*.mid
files /content/midi_data/musicgenerationdata/All Midi Files/grieg/*.mid
files /content/midi_data/musicgenerationdata/All Midi Files/haydn/*.mid
files /content/midi_data/musicgenerationdata/All Midi Files/liszt/*.mid
files /content/midi_data/musicgenerationdata/All Mi

100%|██████████| 12/12 [00:31<00:00,  2.63s/it]


In [None]:
#unique notes
# notess = sum(notes_array,[])
notess = [note for seq in notes_array for note in seq]
unique_notes = list(set(notess))
print("Unique Notes:",len(unique_notes))

#notes with their frequency
freq=dict(map(lambda x: (x,notess.count(x)),unique_notes))

#get the threshold frequency
for i in range(30,100,20):
  print(i,":",len(list(filter(lambda x:x[1]>=i,freq.items()))))

Unique Notes: 244
30 : 96
50 : 65
70 : 56
90 : 51


In [None]:
#filter notes greater than threshold i.e. 50
freq_notes=dict(filter(lambda x:x[1]>=50,freq.items()))

#create new notes using the frequent notes
new_notes=[[i for i in j if i in freq_notes] for j in notes_array]

In [None]:
#dictionary having key as note index and value as note
ind2note=dict(enumerate(freq_notes))

#dictionary having key as note and value as note index
note2ind=dict(map(reversed,ind2note.items()))

In [None]:
#timestep
timesteps=50

#store values of input and output
x=[] ; y=[]

for i in new_notes:
 for j in range(0,len(i)-timesteps):
  #input will be the current index + timestep
  #output will be the next index after timestep
  inp=i[j:j+timesteps] ; out=i[j+timesteps]

  #append the index value of respective notes
  x.append(list(map(lambda x:note2ind[x],inp)))
  y.append(note2ind[out])

x = [seq for seq in x if len(seq) == timesteps]  # Remove sequences that are too short
y = [y[i] for i in range(len(y)) if len(x[i]) == timesteps]  # Ensure y aligns

x_new = np.array(x)
y_new = np.array(y)

In [None]:
#reshape input and output for the model
x_new = np.reshape(x_new,(len(x_new),timesteps,1))
y_new = np.reshape(y_new,(-1,1))

#split the input and value into training and testing sets
#80% for training and 20% for testing sets
x_train,x_test,y_train,y_test = train_test_split(x_new,y_new,test_size=0.3,random_state=36)

In [None]:

#create the model
model = Sequential()
# Create two stacked GRU layers with the latent dimension of 256
model.add(LSTM(256, return_sequences=True, input_shape=(x_new.shape[1], x_new.shape[2])))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(GRU(512, return_sequences=True))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(GRU(256))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Dense(256, activation='relu',kernel_regularizer=l2(0.001)))

#fully connected layer for the output with softmax activation
model.add(Dense(len(note2ind),activation='softmax'))
model.summary()



  super().__init__(**kwargs)


In [None]:
from keras.optimizers import Adamax

# compile the model using Adam optimizer
optimizer = Adamax(learning_rate=0.001)
model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer,metrics=['accuracy'])
print(x_train.shape,y_train.shape,"shapes",x_test.shape,y_test.shape)
#train the model on training sets and validate on testing sets
model.fit(
 x_train,y_train,
 batch_size=128,epochs=80,
 validation_data=(x_test,y_test))

(9723, 50, 1) (9723, 1) shapes (4168, 50, 1) (4168, 1)
Epoch 1/80
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m211s[0m 3s/step - accuracy: 0.2488 - loss: 3.9965 - val_accuracy: 0.3244 - val_loss: 3.6425
Epoch 2/80
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m242s[0m 2s/step - accuracy: 0.3221 - loss: 3.5056 - val_accuracy: 0.3249 - val_loss: 3.6607
Epoch 3/80
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 2s/step - accuracy: 0.3197 - loss: 3.4257 - val_accuracy: 0.3251 - val_loss: 3.6246
Epoch 4/80
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m220s[0m 3s/step - accuracy: 0.3335 - loss: 3.3147 - val_accuracy: 0.3201 - val_loss: 3.6071
Epoch 5/80
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m182s[0m 2s/step - accuracy: 0.3366 - loss: 3.2224 - val_accuracy: 0.3203 - val_loss: 3.4865
Epoch 6/80
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 2s/step - accuracy: 0.3458 - loss: 3.1357 - val_accuracy: 0.328

<keras.src.callbacks.history.History at 0x7efef5e95ed0>

In [None]:
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {test_acc} Test Loss: {test_loss}")


In [None]:
train_loss, train_acc = model.evaluate(x_train, y_train)
print(f"Train Accuracy: {train_acc} Train loss: {train_loss}")

[1m304/304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 204ms/step - accuracy: 0.9930 - loss: 0.1988
Train Accuracy: 0.9918749332427979 Train loss: 0.1997397392988205


In [None]:
#save the model for predictions
model.save("s2s1.keras")

In [None]:
# Assuming you have imported necessary libraries and loaded the model
from keras.models import load_model
import numpy as np

# Load the trained model
model = load_model("s2s1.keras")

# Generate a random index
index = np.random.randint(0, len(x_test) - 1)

# Get the corresponding sequence from x_test
music_pattern = x_test[index].reshape(1, timesteps, 1)
out_pred = []  # List to store generated notes

# Generate 200 notes
for i in range(200):
    # Predict next note probabilities
    pred_probs = model.predict(music_pattern)[0]

    # Select the top 7 probabilities
    top_indices = np.argsort(pred_probs)[-7:]
    top_probs = pred_probs[top_indices]

    # Standardize probabilities to avoid division by zero
    standardized_probs = top_probs / (np.sum(top_probs) + 1e-8)

    # Sample from the top 7 probabilities
    pred_index = np.random.choice(top_indices, p=standardized_probs)

    # Append the predicted note only if it is valid
    if pred_index in ind2note:
        out_pred.append(ind2note[pred_index])
    else:
        print(f"Warning: Skipped invalid index {pred_index}")

    # Update the music pattern (keep only the last `timesteps` values)
    music_pattern = np.append(music_pattern, pred_index)
    music_pattern = music_pattern[-timesteps:].reshape(1, timesteps, 1)  # Ensure correct shape

print("Generated Music Sequence:", out_pred)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 623ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 101ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 

In [None]:
from music21 import stream, note, chord
import copy

output_notes = []

for offset, pattern in enumerate(out_pred):
    new_note = None  # Initialize

    if pattern == 'Rest':
        new_note = note.Rest()

    elif '.' in pattern or pattern.isdigit():
        if pattern.isdigit():
            notes_in_chord = [pattern]  # Single note in chord format
        else:
            notes_in_chord = pattern.split('.')  # Split chord into notes

        notes = []
        for current_note in notes_in_chord:
            if current_note.isdigit():
                i_curr_note = int(current_note)
                temp_note = note.Note(i_curr_note)
                temp_note.storedInstrument = instrument.Piano()
                notes.append(temp_note)
            else:
                print(f"Skipping invalid note: {current_note}")  # Debugging

        if len(notes) > 1:
            new_note = chord.Chord(notes)

    else:
        try:
            new_note = note.Note(pattern)
            new_note.storedInstrument = instrument.Piano()
        except Exception as e:
            print(f"Skipping invalid pattern: {pattern} - Error: {e}")
            continue  # Skip invalid notes

    if new_note:
        new_note.offset = offset  # Set offset for timing
        output_notes.append(copy.deepcopy(new_note))  # Ensure unique object reference

# Save the MIDI file
midi_stream = stream.Stream(output_notes)
midi_stream.write('midi', fp='pred_music4.mid')

print("MIDI file successfully created: pred_music4.mid")





Skipping invalid note: 7None
Skipping invalid note: 11None
Skipping invalid note: 2None
Skipping invalid note: 11None
Skipping invalid note: 2None
Skipping invalid note: 7None
Skipping invalid note: 0None
Skipping invalid note: 9None
Skipping invalid note: 2None
Skipping invalid note: 7None
Skipping invalid note: 11None
Skipping invalid note: 9None
Skipping invalid note: 2None
Skipping invalid note: 9None
Skipping invalid note: 2None
Skipping invalid note: 11None
Skipping invalid note: 2None
Skipping invalid note: 11None
Skipping invalid note: 2None
Skipping invalid note: 9None
Skipping invalid note: 0None
Skipping invalid note: 7None
Skipping invalid note: 11None
Skipping invalid note: 8None
Skipping invalid note: 0None
Skipping invalid note: 3None
Skipping invalid note: 8None
Skipping invalid note: 0None
Skipping invalid note: 3None
Skipping invalid note: 8None
Skipping invalid note: 0None
Skipping invalid note: 3None
Skipping invalid note: 8None
Skipping invalid note: 0None
Skipping