In [62]:
import numpy as np
import pandas as pd
import sys
import os
import pickle
import tqdm
import tensorflow as tf
from keras.models import Sequential
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout, Activation #for our model architecture
from keras.layers import Dense, Dropout, LSTM
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import ModelCheckpoint
#Natural Language Toolkit for NLP
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords
from string import punctuation

In [78]:
#load data
poems = open("/hafiz.txt", "r").read() # "r" stands for read only file
poems = poems.lower() #highly recommended - I didn't do it at first and the outcome was not satisfying
poems = poems.translate(str.maketrans("", "", punctuation)) #no punctuations

In [79]:
# for converting character to int and int to character
DT = os.path.basename("/hafiz.txt")
words = ''.join(sorted(set(poems)))
CI = {c: i for i, c in enumerate(words)}
IC = {i: c for i, c in enumerate(words)}

pickle.dump(CI, open(f"{DT}-CI.pickle", "wb"))
pickle.dump(IC, open(f"{DT}-IC.pickle", "wb"))

intTxt = np.array([CI[c] for c in poems]) #for working with the data we need first to convert it to integers

In [80]:
char_dataset = tf.data.Dataset.from_tensor_slices(intTxt)
sequences = char_dataset.batch(201, drop_remainder=True)

In [81]:
def SPL(exp):
    data = tf.data.Dataset.from_tensors((exp[:100], exp[100]))
    for i in range(1, (len(exp)-1) // 2):
        in_ = exp[i: i+100]
        out_ = exp[i+100]
        other_ds = tf.data.Dataset.from_tensors((in_, out_))
        data = data.concatenate(other_ds)
    return data

dataset = sequences.flat_map(SPL)

In [83]:
chars = len(words)

def encode(in_, out_):
    return tf.one_hot(in_, chars), tf.one_hot(in_, chars)

In [84]:
dataset = dataset.map(encode)

In [85]:
ds = dataset.repeat().shuffle(1024).batch(128, drop_remainder=True)
print(len(words))
print(len(intTxt))

30
38574


In [86]:
model = Sequential()
#model.add(LSTM(256, input_dim=chars, input_length=100, return_sequences=True)) #not recommended
model.add(LSTM(256, input_shape=(100, chars), return_sequences=True)) #100*53 - dim and lenght
#make sure setting return_sequences to true, otherwise your model is not capable of producing the layers
model.add(Dropout(0.3))
model.add(LSTM(256))
#model.add(LSTM(128))
model.add(Dense(chars, activation = "softmax"))
model.summary()

Model: "sequential_15"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_8 (LSTM)                (None, 100, 256)          293888    
_________________________________________________________________
dropout_4 (Dropout)          (None, 100, 256)          0         
_________________________________________________________________
lstm_9 (LSTM)                (None, 256)               525312    
_________________________________________________________________
dense_3 (Dense)              (None, 30)                7710      
Total params: 826,910
Trainable params: 826,910
Non-trainable params: 0
_________________________________________________________________


In [87]:
ds = dataset.repeat().shuffle(1024).batch(64, drop_remainder=True) #batch size is 32
if not os.path.isdir("out"):
    os.mkdir("out")
    
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
''''inorder to train your model you need to compile it first
    categorical_crossentropy => it is a multiclass problem
    try not to use model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])'''

    

model.fit(ds, steps_per_epoch=(len(intTxt) - 15000) // 64, epochs=32)
#since our intTxt lenth is 38574, 15000 results in a good choice for train steps in an epoch

model.save(f"out/{DT}-{100}.h5")

Train for 368 steps
Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32


In [101]:
CI_ = pickle.load(open(f"{DT}-CI_.pickle", "rb"))
IC_ = pickle.load(open(f"{DT}-IC_.pickle", "rb"))

size = len(CI)
model.load_weights(f"out/{DT}-{100}.h5")  #loading the file saved

pred = "hafiz shirazi" #make sure all characters are in lower case if you normalized your dataset to lowercase words
produced_poem = "" #The poem produced

for i in tqdm.tqdm(range(600), "Poem generating"): #generates 600 words of poem
    X = np.zeros((1, 100, size))
    for t, char in enumerate("hafiz shirazi"):
        array[0, (100 - len("hafiz shirazi")) + t, CI_[char]] = 1
    predicted = model.predict(array, verbose=0)[0]
    ind_ = np.argmax(predicted)
    char_ = IC_[ind_]
    produced_poem += char_ #append it to previous produced poem
    pred = pred[1:] + char_


print(produced_poem)

Poem generating: 100%|███████████████████████████████████████████████████████████████| 600/600 [00:11<00:00, 50.49it/s]

den when dispate
if this will not suit such be love small
who is the doing that is no mamy framer not ensurrow the skne
to alwats out own face
hafiz graceful yetts heart and speech of your good
on the straight a cul i vist
stee and in this curnigi sert
the angels in this her hair her heart master will keve a healt will wake
the one kempassidg those
full of his own path her bidde the skn

o mess of divers flowers and in the hand of my lovers sweet lips
and hawars that senver though may have seemed depaver
and the wind secrets of your face and hair
partred my friend
brought a charm fragrant bree



