In [1]:
import os
import json
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import LSTM, Dropout, TimeDistributed, Dense, Activation, Embedding

Using TensorFlow backend.


In [2]:
data_directory = "../Data/"
data_file = "Data_Tunes.txt"
charIndex_json = "char_to_index.json"
model_weights_directory = '../Data/Model_Weights/'
BATCH_SIZE = 16
SEQ_LENGTH = 64

In [3]:
def read_batches(all_chars, unique_chars):
    length = all_chars.shape[0]
    batch_chars = int(length / BATCH_SIZE) #155222/16 = 9701
    
    for start in range(0, batch_chars - SEQ_LENGTH, 64):  #(0, 9637, 64)  #it denotes number of batches. It runs everytime when
        #new batch is created. We have a total of 151 batches.
        X = np.zeros((BATCH_SIZE, SEQ_LENGTH))    #(16, 64)
        Y = np.zeros((BATCH_SIZE, SEQ_LENGTH, unique_chars))   #(16, 64, 87)
        for batch_index in range(0, 16):  #it denotes each row in a batch.  
            for i in range(0, 64):  #it denotes each column in a batch. Each column represents each character means 
                #each time-step character in a sequence.
                X[batch_index, i] = all_chars[batch_index * batch_chars + start + i]
                Y[batch_index, i, all_chars[batch_index * batch_chars + start + i + 1]] = 1 #here we have added '1' because the
                #correct label will be the next character in the sequence. So, the next character will be denoted by
                #all_chars[batch_index * batch_chars + start + i + 1]
        yield X, Y

In [4]:
def built_model(batch_size, seq_length, unique_chars):
    model = Sequential()
    
    model.add(Embedding(input_dim = unique_chars, output_dim = 512, batch_input_shape = (batch_size, seq_length))) 
    
    model.add(LSTM(256, return_sequences = True, stateful = True))
    model.add(Dropout(0.2))
    
    model.add(LSTM(128, return_sequences = True, stateful = True))
    model.add(Dropout(0.2))
    
    model.add(TimeDistributed(Dense(unique_chars)))

    model.add(Activation("softmax"))
    
    return model

In [5]:
def training_model(data, epochs = 80):
    #mapping character to index
    char_to_index = {ch: i for (i, ch) in enumerate(sorted(list(set(data))))}
    print("Number of unique characters in our whole tunes database = {}".format(len(char_to_index))) #87
    
    with open(os.path.join(data_directory, charIndex_json), mode = "w") as f:
        json.dump(char_to_index, f)
        
    index_to_char = {i: ch for (ch, i) in char_to_index.items()}
    unique_chars = len(char_to_index)
    
    model = built_model(BATCH_SIZE, SEQ_LENGTH, unique_chars)
    model.summary()
    model.compile(loss = "categorical_crossentropy", optimizer = "adam", metrics = ["accuracy"])
    
    all_characters = np.asarray([char_to_index[c] for c in data], dtype = np.int32)
    print("Total number of characters = "+str(all_characters.shape[0])) #155222
    
    epoch_number, loss, accuracy = [], [], []
    
    for epoch in range(epochs):
        print("Epoch {}/{}".format(epoch+1, epochs))
        final_epoch_loss, final_epoch_accuracy = 0, 0
        epoch_number.append(epoch+1)
        
        for i, (x, y) in enumerate(read_batches(all_characters, unique_chars)):
            final_epoch_loss, final_epoch_accuracy = model.train_on_batch(x, y) #check documentation of train_on_batch here: https://keras.io/models/sequential/
            print("Batch: {}, Loss: {}, Accuracy: {}".format(i+1, final_epoch_loss, final_epoch_accuracy))
            #here, above we are reading the batches one-by-one and train our model on each batch one-by-one.
        loss.append(final_epoch_loss)
        accuracy.append(final_epoch_accuracy)
        
        #saving weights after every 10 epochs
        if (epoch + 1) % 10 == 0:
            if not os.path.exists(model_weights_directory):
                os.makedirs(model_weights_directory)
            model.save_weights(os.path.join(model_weights_directory, "Weights_{}.h5".format(epoch+1)))
            print('Saved Weights at epoch {} to file Weights_{}.h5'.format(epoch+1, epoch+1))
    
    #creating dataframe and record all the losses and accuracies at each epoch
    log_frame = pd.DataFrame(columns = ["Epoch", "Loss", "Accuracy"])
    log_frame["Epoch"] = epoch_number
    log_frame["Loss"] = loss
    log_frame["Accuracy"] = accuracy
    log_frame.to_csv("../Data/log.csv", index = False)

In [None]:
file = open(os.path.join(data_directory, data_file), mode = 'r')
data = file.read()
file.close()
if __name__ == "__main__":
    training_model(data)

Number of unique characters in our whole tunes database = 87
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (16, 64, 512)             44544     
_________________________________________________________________
lstm_1 (LSTM)                (16, 64, 256)             787456    
_________________________________________________________________
dropout_1 (Dropout)          (16, 64, 256)             0         
_________________________________________________________________
lstm_2 (LSTM)                (16, 64, 128)             197120    
_________________________________________________________________
dropout_2 (Dropout)          (16, 64, 128)             0         
_________________________________________________________________
time_distributed_1 (TimeDist (16, 64, 87)              11223     
___________________________________________________________

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Batch: 1, Loss: 4.465232849121094, Accuracy: 0.0068359375
Batch: 2, Loss: 4.438962459564209, Accuracy: 0.173828125
Batch: 3, Loss: 4.410113334655762, Accuracy: 0.134765625
Batch: 4, Loss: 4.36024808883667, Accuracy: 0.1025390625
Batch: 5, Loss: 4.114752292633057, Accuracy: 0.1435546875
Batch: 6, Loss: 3.8069310188293457, Accuracy: 0.166015625
Batch: 7, Loss: 3.7187938690185547, Accuracy: 0.1630859375
Batch: 8, Loss: 3.7469322681427, Accuracy: 0.1416015625
Batch: 9, Loss: 3.753999948501587, Accuracy: 0.1357421875
Batch: 10, Loss: 3.603203773498535, Accuracy: 0.1328125
Batch: 11, Loss: 3.3470730781555176, Accuracy: 0.1455078125
Batch: 12, Loss: 3.5461196899414062, Accuracy: 0.138671875
Batch: 13, Loss: 3.7733259201049805, Accuracy: 0.11328125
Batch: 14, Loss: 3.4946980476379395, Accuracy: 0.1298828125
Batch: 15, Loss: 3.735079765319824, Accuracy: 0.115234375
Batch: 16, Loss: 3.4264934062957764, Accuracy: 0.1572265625
Batch: 17, Loss: 3.3588309288024902, Accuracy: 0.1767578125
Batch: 18, 

Batch: 141, Loss: 2.590959072113037, Accuracy: 0.314453125
Batch: 142, Loss: 2.532702684402466, Accuracy: 0.322265625
Batch: 143, Loss: 2.6209139823913574, Accuracy: 0.3017578125
Batch: 144, Loss: 2.5920519828796387, Accuracy: 0.3125
Batch: 145, Loss: 2.514592170715332, Accuracy: 0.328125
Batch: 146, Loss: 2.6072092056274414, Accuracy: 0.3037109375
Batch: 147, Loss: 2.5779454708099365, Accuracy: 0.3076171875
Batch: 148, Loss: 2.532719135284424, Accuracy: 0.3134765625
Batch: 149, Loss: 2.584620952606201, Accuracy: 0.2958984375
Batch: 150, Loss: 2.503286838531494, Accuracy: 0.318359375
Batch: 151, Loss: 2.6077685356140137, Accuracy: 0.3173828125
Epoch 2/80
Batch: 1, Loss: 2.4259793758392334, Accuracy: 0.333984375
Batch: 2, Loss: 2.297786235809326, Accuracy: 0.34765625
Batch: 3, Loss: 2.5091071128845215, Accuracy: 0.328125
Batch: 4, Loss: 2.5784358978271484, Accuracy: 0.3115234375
Batch: 5, Loss: 2.4553093910217285, Accuracy: 0.3486328125
Batch: 6, Loss: 2.3079426288604736, Accuracy: 0.35

Batch: 130, Loss: 2.074617385864258, Accuracy: 0.4521484375
Batch: 131, Loss: 1.9177411794662476, Accuracy: 0.470703125
Batch: 132, Loss: 1.9631651639938354, Accuracy: 0.4755859375
Batch: 133, Loss: 1.9061217308044434, Accuracy: 0.478515625
Batch: 134, Loss: 1.877782940864563, Accuracy: 0.4814453125
Batch: 135, Loss: 1.8094730377197266, Accuracy: 0.517578125
Batch: 136, Loss: 1.8410136699676514, Accuracy: 0.4833984375
Batch: 137, Loss: 1.7223100662231445, Accuracy: 0.48828125
Batch: 138, Loss: 1.6585032939910889, Accuracy: 0.53515625
Batch: 139, Loss: 1.7189562320709229, Accuracy: 0.50390625
Batch: 140, Loss: 1.873392105102539, Accuracy: 0.4892578125
Batch: 141, Loss: 1.8363512754440308, Accuracy: 0.5068359375
Batch: 142, Loss: 1.8479151725769043, Accuracy: 0.4892578125
Batch: 143, Loss: 1.9093008041381836, Accuracy: 0.4716796875
Batch: 144, Loss: 1.836427927017212, Accuracy: 0.5068359375
Batch: 145, Loss: 1.75357985496521, Accuracy: 0.4990234375
Batch: 146, Loss: 1.893491506576538, Ac

Batch: 118, Loss: 1.5193297863006592, Accuracy: 0.5732421875
Batch: 119, Loss: 1.5805888175964355, Accuracy: 0.5615234375
Batch: 120, Loss: 1.6668641567230225, Accuracy: 0.5087890625
Batch: 121, Loss: 1.7208971977233887, Accuracy: 0.4912109375
Batch: 122, Loss: 1.6000341176986694, Accuracy: 0.5498046875
Batch: 123, Loss: 1.5809786319732666, Accuracy: 0.5625
Batch: 124, Loss: 1.6103386878967285, Accuracy: 0.5458984375
Batch: 125, Loss: 1.6580562591552734, Accuracy: 0.515625
Batch: 126, Loss: 1.649338960647583, Accuracy: 0.4931640625
Batch: 127, Loss: 1.5367767810821533, Accuracy: 0.5673828125
Batch: 128, Loss: 1.7625815868377686, Accuracy: 0.5029296875
Batch: 129, Loss: 1.6100306510925293, Accuracy: 0.5302734375
Batch: 130, Loss: 1.8294484615325928, Accuracy: 0.4736328125
Batch: 131, Loss: 1.6639070510864258, Accuracy: 0.509765625
Batch: 132, Loss: 1.7186518907546997, Accuracy: 0.5302734375
Batch: 133, Loss: 1.6241557598114014, Accuracy: 0.5302734375
Batch: 134, Loss: 1.6352674961090088

Batch: 106, Loss: 1.5289676189422607, Accuracy: 0.5283203125
Batch: 107, Loss: 1.6356877088546753, Accuracy: 0.5166015625
Batch: 108, Loss: 1.5782313346862793, Accuracy: 0.5439453125
Batch: 109, Loss: 1.6269041299819946, Accuracy: 0.505859375
Batch: 110, Loss: 1.3230836391448975, Accuracy: 0.5810546875
Batch: 111, Loss: 1.4979180097579956, Accuracy: 0.5224609375
Batch: 112, Loss: 1.4887721538543701, Accuracy: 0.5537109375
Batch: 113, Loss: 1.5080269575119019, Accuracy: 0.576171875
Batch: 114, Loss: 1.572899341583252, Accuracy: 0.51953125
Batch: 115, Loss: 1.6234164237976074, Accuracy: 0.53515625
Batch: 116, Loss: 1.5752021074295044, Accuracy: 0.5224609375
Batch: 117, Loss: 1.591491460800171, Accuracy: 0.54296875
Batch: 118, Loss: 1.3498282432556152, Accuracy: 0.59375
Batch: 119, Loss: 1.4069349765777588, Accuracy: 0.58984375
Batch: 120, Loss: 1.5624773502349854, Accuracy: 0.5107421875
Batch: 121, Loss: 1.6026251316070557, Accuracy: 0.5126953125
Batch: 122, Loss: 1.447351336479187, Accu

In [4]:
log = pd.read_csv(os.path.join(data_directory, "log.csv"))
log

Unnamed: 0,Epoch,Loss,Accuracy
0,1,2.643317,0.290039
1,2,1.873376,0.496094
2,3,1.548782,0.557617
3,4,1.417467,0.597656
4,5,1.348234,0.585938
5,6,1.265394,0.618164
6,7,1.186394,0.630859
7,8,1.145774,0.642578
8,9,1.097427,0.656250
9,10,1.073594,0.650391
