In [44]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"]="3"
import json
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, TimeDistributed, Dense, Activation, Embedding

#import all modules.

In [45]:
#attributes of batch

batchSize=16
seqLength=64
cij= "char_to_index.json"
model_weights_directory = 'Model_Weights/'

In [59]:
def makeBatch(allChars,uniqueChars):
    size=allChars.shape[0]
    batchCapacity=int(size/batchSize)
    
    for s in range(0, batchCapacity- seqLength, 64):
    
        X = np.zeros((batchSize, seqLength))
        Y = np.zeros((batchSize, seqLength, uniqueChars))
        
        for i in range(0,16):
            for j in range(0,64):
                X[i, j] = allChars[i * batchCapacity + s + j]
                Y[i, j, allChars[i * batchCapacity + s + j + 1]] = 1 
        yield X, Y

In [60]:
def built_model(batch_size, seq_length,uniqueChars):
    model = Sequential()
    
    model.add(Embedding(input_dim = uniqueChars, output_dim = 512, batch_input_shape = (batch_size, seq_length), name = "embd_1")) 
    
    model.add(LSTM(256, return_sequences = True, stateful = True, name = "lstm_1"))
    model.add(Dropout(0.2, name = "drp_1"))
    
    model.add(LSTM(256, return_sequences = True, stateful = True,name = "lstm_2"))
    model.add(Dropout(0.2,name = "drp_2"))
    
    model.add(LSTM(256, return_sequences = True, stateful = True,name = "lstm_3"))
    model.add(Dropout(0.2,name = "drp_3"))
    
    model.add(TimeDistributed(Dense(uniqueChars)))
    model.add(Activation("softmax"))
    
    # TODO : load weights.
    
    
    return model

In [61]:
def train_model(data, epochs = 40):
    #mapping character to index number
    cti = {ch: i for (i, ch) in enumerate(sorted(list(set(data))))}
    print("Number of unique characters included in our ABC database = {}".format(len(cti)))
    #Will print the number of different charachters in our database
    
    with open(cij, mode = "w") as f:
        json.dump(cti, f)
        
    itc = {i: ch for (ch, i) in cti.items()}
    uniqueChars = len(cti)
    
    model = built_model(batchSize, seqLength, uniqueChars)
    model.summary()
    model.compile(loss = "categorical_crossentropy", optimizer = "adam", metrics = ["accuracy"])
    
    allChar = np.asarray([cti[c] for c in data], dtype = np.int32)
    print("Total number of characters = "+str(allChar.shape[0])) #155222
    
    epoch_number, loss, accuracy = [], [], []
    
    for epoch in range(epochs):
        print("Epoch {}/{}".format(epoch+1, epochs))
        final_epoch_loss, final_epoch_accuracy = 0, 0
        epoch_number.append(epoch+1)
        
        for i, (x, y) in enumerate(makeBatch(allChar, uniqueChars)):
            final_epoch_loss, final_epoch_accuracy = model.train_on_batch(x, y) 
            #Training On Batch
            print("Batch: {}, Loss: {}, Accuracy: {}".format(i+1, final_epoch_loss, final_epoch_accuracy))
            #here, above we are reading the batches one-by-one and train our model on each batch one-by-one.
        loss.append(final_epoch_loss)
        accuracy.append(final_epoch_accuracy)
        
        #saving weights after every 10 epochs
        #Weights to be saved 4 times
        if (epoch + 1) % 1 == 0:
            if not os.path.exists(model_weights_directory):
                os.makedirs(model_weights_directory)
            model.save_weights(os.path.join(model_weights_directory, "Weights_{}.h5".format(epoch+1)))
            print('Saved Weights at epoch {} to file Weights_{}.h5'.format(epoch+1, epoch+1))
    
    #creating dataframe and record all the losses and accuracies at each epoch Total 40
    log_frame = pd.DataFrame(columns = ["Epoch", "Loss", "Accuracy"])
    log_frame["Epoch"] = epoch_number
    log_frame["Loss"] = loss
    log_frame["Accuracy"] = accuracy
    log_frame.to_csv("log.csv", index = False)


In [62]:
Datafile="Morris and Waltzes.txt"
file=open(Datafile,mode='r')
data=file.read()
file.close()
if __name__ == "__main__":
    train_model(data)
    

Number of unique characters included in our ABC database = 82
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embd_1 (Embedding)           (16, 64, 512)             41984     
_________________________________________________________________
lstm_1 (LSTM)                (16, 64, 256)             787456    
_________________________________________________________________
drp_1 (Dropout)              (16, 64, 256)             0         
_________________________________________________________________
lstm_2 (LSTM)                (16, 64, 256)             525312    
_________________________________________________________________
drp_2 (Dropout)              (16, 64, 256)             0         
_________________________________________________________________
lstm_3 (LSTM)                (16, 64, 256)             525312    
_________________________________________________________________
drp_3 (Dropout

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Batch: 1, Loss: 4.420258522033691, Accuracy: 0.005859375
Batch: 2, Loss: 4.105154991149902, Accuracy: 0.1240234375
Batch: 3, Loss: 3.930936336517334, Accuracy: 0.0986328125
Batch: 4, Loss: 3.596402406692505, Accuracy: 0.1220703125
Batch: 5, Loss: 3.327986001968384, Accuracy: 0.1259765625
Batch: 6, Loss: 3.503667116165161, Accuracy: 0.1240234375
Batch: 7, Loss: 3.5223934650421143, Accuracy: 0.142578125
Batch: 8, Loss: 3.4347286224365234, Accuracy: 0.13671875
Batch: 9, Loss: 3.630373239517212, Accuracy: 0.115234375
Batch: 10, Loss: 3.937164306640625, Accuracy: 0.0576171875
Batch: 11, Loss: 3.478513240814209, Accuracy: 0.091796875
Batch: 12, Loss: 3.3535823822021484, Accuracy: 0.1123046875
Batch: 13, Loss: 3.374765157699585, Accuracy: 0.1142578125
Batch: 14, Loss: 3.4170644283294678, Accuracy: 0.1298828125
Batch: 15, Loss: 3.323763608932495, Accuracy: 0.1318359375
Batch: 16, Loss: 3.5368857383728027, Accuracy: 0.1318359375
Batch: 17, Loss: 3.4910178184509277, Accuracy: 0.119140625
Batch: 

KeyboardInterrupt: 

In [None]:
log = pd.read_csv("log.csv")
log