In [1]:
import numpy as np
from tensorflow.keras.layers import Dense,LSTM,Embedding,Input
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import ModelCheckpoint

In [2]:
def getdata(path):
    ip_text=[]
    op_text=[]
    for line in open(path):
        ip_line = "<start> "+line
        op_line=line+" <end>"
        ip_text.append(ip_line)
        op_text.append(op_line)
    return ip_text,op_text
        
    

In [3]:
ip_text,op_text = getdata("poetry.txt")

In [4]:
op_text[0]


'Two roads diverged in a yellow wood,\n <end>'

In [5]:
ip_text[0]

'<start> Two roads diverged in a yellow wood,\n'

In [6]:
all_text = ip_text+op_text

In [7]:
tokenizer = Tokenizer(num_words=5000,filters='')
tokenizer.fit_on_texts(all_text)

In [8]:
word_to_index=(tokenizer.word_index) #dictionary 

In [9]:
ip_seq = tokenizer.texts_to_sequences(ip_text)
op_seq = tokenizer.texts_to_sequences(op_text)

In [10]:
max_len=0
for item in ip_seq:
    max_len=max(len(item),max_len)
max_len   

13

In [11]:
ip_padded_seq = pad_sequences(ip_seq,maxlen=13)
op_padded_seq = pad_sequences(op_seq,maxlen=13)


In [12]:
ip_padded_seq[0]

array([  0,   0,   0,   0,   0,   1, 101, 506, 507,  10,   8, 508, 890])

In [13]:
op_padded_seq[0]

array([  0,   0,   0,   0,   0, 101, 506, 507,  10,   8, 508, 890,   2])

In [14]:
#Word Vectors Loading
word2vec = {}
with open('C:/Users/Asus/Desktop/ML PROJECTS/Glove Vectors/glove.6B.100d.txt',"r",encoding="utf-8") as f:
    for lines in f:
        value = lines.split()
        word=value[0]
        vector=np.asarray(value[1:],dtype="float32")
        word2vec[word]=vector


In [15]:
max_vocab_size=20000
numwords=min(max_vocab_size,len(word_to_index)+1) # as keras tokenizes from 1
emb_matrix=np.zeros((numwords,100)) #100 dim vectors
for word,index in word_to_index.items():
    if index<max_vocab_size:
        vector = word2vec.get(word) #don't use [''] to prevent eroor,.get returns null if word not found.
        if vector is not None:
            emb_matrix[index]=vector

In [16]:
n_seq=len(ip_seq)

In [17]:
#For training
one_hot_vectors=np.zeros((n_seq,max_len,numwords))#n_Seq 2D arrays with rows being 13 and cols being numwords
for index,vector in enumerate(op_padded_seq):
    for wordind,word in enumerate(vector):
        if(word>0):
            one_hot_vectors[index][wordind][word]=1 #go to right sentence,go to appropirate wordindex,set 1 to the correct output.
            

In [18]:
#embedding layer
embedding = Embedding(numwords,100,weights=[emb_matrix],trainable=False)

In [19]:
inputs = Input(shape=(13))
initial_h=Input(shape=(50))
initial_c=Input(shape=(50))
x=embedding(inputs) #13*100 each ip
lstm = LSTM(50,return_sequences=True,return_state=True) #13*50 op now
x,_,_ = lstm(x,initial_state=[initial_h,initial_c]) 
dense = Dense(numwords,activation='softmax')
op=dense(x)
model = Model([inputs,initial_h,initial_c],op)

In [20]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 13)]         0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, 13, 100)      332400      input_1[0][0]                    
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 50)]         0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            [(None, 50)]         0                                            
______________________________________________________________________________________________

In [21]:
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
checkpoint = ModelCheckpoint('poetrybest.h5',save_best_only=True)

In [23]:
z=np.zeros((n_seq,50))
m=model.fit([ip_padded_seq,z,z],one_hot_vectors,batch_size=64,epochs=500,validation_split=0.2,callbacks=[checkpoint])

Train on 1264 samples, validate on 317 samples
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500


Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78/500
Epoch 79/500
Epoch 80/500
Epoch 81/500
Epoch 82/500
Epoch 83/500
Epoch 84/500
Epoch 85/500
Epoch 86/500
Epoch 87/500
Epoch 88/500
Epoch 89/500
Epoch 90/500
Epoch 91/500
Epoch 92/500
Epoch 93/500
Epoch 94/500
Epoch 95/500
Epoch 96/500
Epoch 97/500
Epoch 98/500
Epoch 99/500
Epoch 100/500
Epoch 101/500
Epoch 102/500
Epoch 103/500
Epoch 104/500
Epoch 105/500
Epoch 106/500
Epoch 107/500
Epoch 108/500
Epoch 109/500


Epoch 110/500
Epoch 111/500
Epoch 112/500
Epoch 113/500
Epoch 114/500
Epoch 115/500
Epoch 116/500
Epoch 117/500
Epoch 118/500
Epoch 119/500
Epoch 120/500
Epoch 121/500
Epoch 122/500
Epoch 123/500
Epoch 124/500
Epoch 125/500
Epoch 126/500
Epoch 127/500
Epoch 128/500
Epoch 129/500
Epoch 130/500
Epoch 131/500
Epoch 132/500
Epoch 133/500
Epoch 134/500
Epoch 135/500
Epoch 136/500
Epoch 137/500
Epoch 138/500
Epoch 139/500
Epoch 140/500
Epoch 141/500
Epoch 142/500
Epoch 143/500
Epoch 144/500
Epoch 145/500
Epoch 146/500
Epoch 147/500
Epoch 148/500
Epoch 149/500
Epoch 150/500
Epoch 151/500
Epoch 152/500
Epoch 153/500
Epoch 154/500
Epoch 155/500
Epoch 156/500
Epoch 157/500
Epoch 158/500
Epoch 159/500
Epoch 160/500
Epoch 161/500
Epoch 162/500
Epoch 163/500
Epoch 164/500


Epoch 165/500
Epoch 166/500
Epoch 167/500
Epoch 168/500
Epoch 169/500
Epoch 170/500
Epoch 171/500
Epoch 172/500
Epoch 173/500
Epoch 174/500
Epoch 175/500
Epoch 176/500
Epoch 177/500
Epoch 178/500
Epoch 179/500
Epoch 180/500
Epoch 181/500
Epoch 182/500
Epoch 183/500
Epoch 184/500
Epoch 185/500
Epoch 186/500
Epoch 187/500
Epoch 188/500
Epoch 189/500
Epoch 190/500
Epoch 191/500
Epoch 192/500
Epoch 193/500
Epoch 194/500
Epoch 195/500
Epoch 196/500
Epoch 197/500
Epoch 198/500
Epoch 199/500
Epoch 200/500
Epoch 201/500
Epoch 202/500
Epoch 203/500
Epoch 204/500
Epoch 205/500
Epoch 206/500
Epoch 207/500
Epoch 208/500
Epoch 209/500
Epoch 210/500
Epoch 211/500
Epoch 212/500
Epoch 213/500
Epoch 214/500
Epoch 215/500
Epoch 216/500
Epoch 217/500
Epoch 218/500


Epoch 219/500
Epoch 220/500
Epoch 221/500
Epoch 222/500
Epoch 223/500
Epoch 224/500
Epoch 225/500
Epoch 226/500
Epoch 227/500
Epoch 228/500
Epoch 229/500
Epoch 230/500
Epoch 231/500
Epoch 232/500
Epoch 233/500
Epoch 234/500
Epoch 235/500
Epoch 236/500
Epoch 237/500
Epoch 238/500
Epoch 239/500
Epoch 240/500
Epoch 241/500
Epoch 242/500
Epoch 243/500
Epoch 244/500
Epoch 245/500
Epoch 246/500
Epoch 247/500
Epoch 248/500
Epoch 249/500
Epoch 250/500
Epoch 251/500
Epoch 252/500
Epoch 253/500
Epoch 254/500
Epoch 255/500
Epoch 256/500
Epoch 257/500
Epoch 258/500
Epoch 259/500
Epoch 260/500
Epoch 261/500
Epoch 262/500
Epoch 263/500
Epoch 264/500
Epoch 265/500
Epoch 266/500
Epoch 267/500
Epoch 268/500
Epoch 269/500
Epoch 270/500
Epoch 271/500
Epoch 272/500


Epoch 273/500
Epoch 274/500
Epoch 275/500
Epoch 276/500
Epoch 277/500
Epoch 278/500
Epoch 279/500
Epoch 280/500
Epoch 281/500
Epoch 282/500
Epoch 283/500
Epoch 284/500
Epoch 285/500
Epoch 286/500
Epoch 287/500
Epoch 288/500
Epoch 289/500
Epoch 290/500
Epoch 291/500
Epoch 292/500
Epoch 293/500
Epoch 294/500
Epoch 295/500
Epoch 296/500
Epoch 297/500
Epoch 298/500
Epoch 299/500
Epoch 300/500
Epoch 301/500
Epoch 302/500
Epoch 303/500
Epoch 304/500
Epoch 305/500
Epoch 306/500
Epoch 307/500
Epoch 308/500
Epoch 309/500
Epoch 310/500
Epoch 311/500
Epoch 312/500
Epoch 313/500
Epoch 314/500
Epoch 315/500
Epoch 316/500
Epoch 317/500
Epoch 318/500
Epoch 319/500
Epoch 320/500
Epoch 321/500
Epoch 322/500
Epoch 323/500
Epoch 324/500
Epoch 325/500
Epoch 326/500


Epoch 327/500
Epoch 328/500
Epoch 329/500
Epoch 330/500
Epoch 331/500
Epoch 332/500
Epoch 333/500
Epoch 334/500
Epoch 335/500
Epoch 336/500
Epoch 337/500
Epoch 338/500
Epoch 339/500
Epoch 340/500
Epoch 341/500
Epoch 342/500
Epoch 343/500
Epoch 344/500
Epoch 345/500
Epoch 346/500
Epoch 347/500
Epoch 348/500
Epoch 349/500
Epoch 350/500
Epoch 351/500
Epoch 352/500
Epoch 353/500
Epoch 354/500
Epoch 355/500
Epoch 356/500
Epoch 357/500
Epoch 358/500
Epoch 359/500
Epoch 360/500
Epoch 361/500
Epoch 362/500
Epoch 363/500
Epoch 364/500
Epoch 365/500
Epoch 366/500
Epoch 367/500
Epoch 368/500
Epoch 369/500
Epoch 370/500
Epoch 371/500
Epoch 372/500
Epoch 373/500
Epoch 374/500
Epoch 375/500
Epoch 376/500
Epoch 377/500
Epoch 378/500
Epoch 379/500
Epoch 380/500


Epoch 381/500
Epoch 382/500
Epoch 383/500
Epoch 384/500
Epoch 385/500
Epoch 386/500
Epoch 387/500
Epoch 388/500
Epoch 389/500
Epoch 390/500
Epoch 391/500
Epoch 392/500
Epoch 393/500
Epoch 394/500
Epoch 395/500
Epoch 396/500
Epoch 397/500
Epoch 398/500
Epoch 399/500
Epoch 400/500
Epoch 401/500
Epoch 402/500
Epoch 403/500
Epoch 404/500
Epoch 405/500
Epoch 406/500
Epoch 407/500
Epoch 408/500
Epoch 409/500
Epoch 410/500
Epoch 411/500
Epoch 412/500
Epoch 413/500
Epoch 414/500
Epoch 415/500
Epoch 416/500
Epoch 417/500
Epoch 418/500
Epoch 419/500
Epoch 420/500
Epoch 421/500
Epoch 422/500
Epoch 423/500
Epoch 424/500
Epoch 425/500
Epoch 426/500
Epoch 427/500
Epoch 428/500
Epoch 429/500
Epoch 430/500
Epoch 431/500
Epoch 432/500
Epoch 433/500
Epoch 434/500


Epoch 435/500
Epoch 436/500
Epoch 437/500
Epoch 438/500
Epoch 439/500
Epoch 440/500
Epoch 441/500
Epoch 442/500
Epoch 443/500
Epoch 444/500
Epoch 445/500
Epoch 446/500
Epoch 447/500
Epoch 448/500
Epoch 449/500
Epoch 450/500
Epoch 451/500
Epoch 452/500
Epoch 453/500
Epoch 454/500
Epoch 455/500
Epoch 456/500
Epoch 457/500
Epoch 458/500
Epoch 459/500
Epoch 460/500
Epoch 461/500
Epoch 462/500
Epoch 463/500
Epoch 464/500
Epoch 465/500
Epoch 466/500
Epoch 467/500
Epoch 468/500
Epoch 469/500
Epoch 470/500
Epoch 471/500
Epoch 472/500
Epoch 473/500
Epoch 474/500
Epoch 475/500
Epoch 476/500
Epoch 477/500
Epoch 478/500
Epoch 479/500
Epoch 480/500
Epoch 481/500
Epoch 482/500
Epoch 483/500
Epoch 484/500
Epoch 485/500
Epoch 486/500
Epoch 487/500


Epoch 488/500
Epoch 489/500
Epoch 490/500
Epoch 491/500
Epoch 492/500
Epoch 493/500
Epoch 494/500
Epoch 495/500
Epoch 496/500
Epoch 497/500
Epoch 498/500
Epoch 499/500
Epoch 500/500


In [24]:
model.save_weights('500epoch.h5')

In [25]:
input2=Input(shape=(1))
x = embedding(input2)
x,h,c = lstm(x,initial_state=[initial_h,initial_c])
op2 = dense(x)
sample_model = Model([input2,initial_h,initial_c],[op2,h,c])

idtoword={Id:word for word,Id in word_to_index.items()}



In [28]:
sample_model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
embedding (Embedding)           multiple             332400      input_4[0][0]                    
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 50)]         0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            [(None, 50)]         0                                            
____________________________________________________________________________________________

In [31]:
ip_padded_seq.shape
max_len


<tf.Tensor 'lstm_1/Identity:0' shape=(None, 1, 50) dtype=float32>

In [75]:
def sample_line():
    np_ip = np.array([[word_to_index['<start>']]])
    h=np.zeros((1,50))
    c=np.zeros((1,50))
    endtok = word_to_index['<end>']
    op_sent=[]
    for _ in range(max_len):
        o,h,c = sample_model.predict([np_ip,h,c])
        probs = o[0][0]
        probs[0]=0
        probs/=probs.sum()
        idx = np.random.choice(len(probs),p=probs) #element with higher p has more chance to get selected
        if idx==endtok:
            break
        op_sent.append(idtoword.get(idx))
        np_ip[0][0]=idx
    #return probs,o,np_ip
    print(' '.join(op_sent))
        
    
    

In [80]:
for i in range(4):
    sample_line()

panther shake eyes.)
 the life pipe's best struck it i didn't recognize,

names attic,
 one found your would think in this throve

governor rested out out of flowers out used mad. and hold consideration? 

course
 taken do but i can told tempt i'll 'toffile, it peas.



In [93]:
for i in range(4):
    sample_line()

afraid we is are up from the dead 
 the wizard

beside mountain
 come a is well at a four they back to greatness
seemed, our 'the but they heard held in a passing 

grafton proclaimed means that all there back in a show not cost 

