## Music Generation Training Notebook

In [None]:
import numpy as np


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
with open('/content/drive/MyDrive/Music_Generation/input.txt','rb') as f:
      input_text = f.read()
input_text=str(input_text,'utf-8')

In [None]:
input_text [:100]

'X: 1\nT:A and D\n% Nottingham Music Database\nS:EF\nY:AB\nM:4/4\nK:A\nM:6/8\nP:A\nf|"A"ecc c2f|"A"ecc c2f|"A"'

### Converting each and every charecter into a integer and create a dictionary of it 

In [None]:
def generate_keys(text):
    ## charecter to index dictionary  
    char_to_idx = {ch:idx for idx,ch in enumerate(sorted(list(set(text))))}

    ## Index to character dicrionary
    idx_to_char = {idx:ch for ch,idx in char_to_idx.items()}

    print("len of the  char_to_idx ",len(char_to_idx))
    print("len of the  idx_to_char ",len(idx_to_char))
    return char_to_idx,idx_to_char


In [None]:
char_to_idx, idx_to_char = generate_keys(input_text)

len of the  char_to_idx  86
len of the  idx_to_char  86


In [None]:
import json
with open('/content/drive/MyDrive/Music_Generation/char_to_idx','w') as f:
      json.dump(char_to_idx,f)

In [None]:
len(input_text)

129665

### Generate Batches

In [None]:
def generate_batchs(T, vocab_size):  
    length = T.shape[0] #129,665
  ### 129665//16 = 8104 
  ## number of batches = 8104 // 64 = 126
  ## Now since it is a sequence data we will divide the 1st 8104 char in 126 batches, each batches will have these char in the 1st row.
  ## similarly from 8105 - 16209th chars will be divided into 126 batches (each batch will have 64 sequence) and will be added at the 2nd row of each bayches
  ## that is how at 8104 number interval we will tak char and divide them into batches and put them in respective rows of batches
  ## So we will have the continuation of the sequence row wise for different batches  

  #          Batch 1                  Batch 2               Batchs              batch 126
  #   |0--------------------63| 64---------------127| ................ |8000------------8063|
  #   |8104---------------8167| 8168------------8233| ................ |16104----------16167|
  #                                         .
  #                                         .
  #                                         .
  #   |121560-----------121624| .......................................|129601--------121665|

  ## in this way we can keep the sequence information in the text data.
    batch_char = int(length / batch_size); # 8,104
    for start in range(0, 126*64,64):
        X = np.zeros((batch_size, batch_sequence)) # (16,64)
        Y = np.zeros((batch_size, batch_sequence, vocab_size)) #(16,64,86)

        for batch_index in range(0,batch_size):
            for col_index in range(0,batch_sequence):
            X[batch_index, col_index] = T[batch_char * batch_index + start + col_index]
            Y[batch_index, col_index, T[batch_char * batch_index + start + col_index+1]] = 1
        yield X,Y



In [None]:
vocab_size = len(char_to_idx)
batch_size = 16
seq_len = 64
vocab_size

86

### Defining the model

In [31]:
from keras.models import Sequential, load_model
from keras.layers import LSTM, Dropout, TimeDistributed, Dense, Activation, Embedding


MODEL_DIR = '/content/drive/MyDrive/Music_Generation/model'

def save_weights(epoch, model):
    if not os.path.exists(MODEL_DIR):
        os.makedirs(MODEL_DIR)
    model.save_weights(os.path.join(MODEL_DIR, 'weights.{}.h5'.format(epoch)))

def load_weights(epoch, model):
    model.load_weights(os.path.join(MODEL_DIR, 'weights.{}.h5'.format(epoch)))

def build_model(batch_size, seq_len, vocab_size):
    model = Sequential()
    model.add(Embedding(vocab_size, 512, batch_input_shape=(batch_size, seq_len)))
    for i in range(3):
        model.add(LSTM(256, return_sequences=True, stateful=True))
        model.add(Dropout(0.2))
    ## Using TimeDistributed Dense Layer for each return sequences
    model.add(TimeDistributed(Dense(vocab_size))) 
    model.add(Activation('softmax'))
    return model

In [None]:
import os
def train(text, epochs=100, save_freq=10):

    #model_architecture
    model = build_model(batch_size, batch_sequence, vocab_size)
    print(model.summary())
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


    #Train data generation
    T = np.asarray([char_to_idx[c] for c in text], dtype=np.int32) #convert complete text into numerical indices

    print("Length of text:" + str(T.size)) #129,665

    steps_per_epoch = (len(text) / batch_size - 1) / batch_sequence  

    #log = TrainLogger('training_log.csv')

    for epoch in range(epochs):
        print('\nEpoch {}/{}'.format(epoch + 1, epochs))
        
        losses, accs = [], []

        for i, (X, Y) in enumerate(generate_batchs(T, vocab_size)):
            
            #print(X);

            loss, acc = model.train_on_batch(X, Y)
            
            losses.append(loss)
            accs.append(acc)

        print('epoch {}: loss = {}, acc = {}'.format(epoch + 1, np.mean(loss), np.mean(acc)))
        

        if (epoch + 1) % 10 == 0:
            save_weights(epoch + 1, model)
            print('Saved checkpoint to', 'weights.{}.h5'.format(epoch + 1))

In [None]:
train(input_text)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (16, 64, 512)             44032     
_________________________________________________________________
lstm (LSTM)                  (16, 64, 256)             787456    
_________________________________________________________________
dropout (Dropout)            (16, 64, 256)             0         
_________________________________________________________________
lstm_1 (LSTM)                (16, 64, 256)             525312    
_________________________________________________________________
dropout_1 (Dropout)          (16, 64, 256)             0         
_________________________________________________________________
lstm_2 (LSTM)                (16, 64, 256)             525312    
_________________________________________________________________
dropout_2 (Dropout)          (16, 64, 256)             0