In [67]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from setuptools.dist import sequence

from cnn_1 import history_ann

In [68]:
df =  pd.read_csv('qoute_dataset.csv')

In [69]:
df.shape

(3038, 2)

In [70]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3038 entries, 0 to 3037
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   quote   3038 non-null   object
 1   Author  3038 non-null   object
dtypes: object(2)
memory usage: 47.6+ KB


In [71]:
df.isnull().sum()

quote     0
Author    0
dtype: int64

In [72]:
quotes = df['quote']

In [73]:
quotes.head()

0    “The world as we have created it is a process ...
1    “It is our choices, Harry, that show what we t...
2    “There are only two ways to live your life. On...
3    “The person, be it gentleman or lady, who has ...
4    “Imperfection is beauty, madness is genius and...
Name: quote, dtype: object

In [74]:
import string

In [75]:
quotes = quotes.str.lower()
quotes.head()

0    “the world as we have created it is a process ...
1    “it is our choices, harry, that show what we t...
2    “there are only two ways to live your life. on...
3    “the person, be it gentleman or lady, who has ...
4    “imperfection is beauty, madness is genius and...
Name: quote, dtype: object

In [76]:


translator = str.maketrans('', '', string.punctuation)
quotes = quotes.apply(lambda x: x.translate(translator))

In [77]:
quotes.head()

0    “the world as we have created it is a process ...
1    “it is our choices harry that show what we tru...
2    “there are only two ways to live your life one...
3    “the person be it gentleman or lady who has no...
4    “imperfection is beauty madness is genius and ...
Name: quote, dtype: object

In [78]:
from tensorflow.keras.preprocessing.text import Tokenizer

In [79]:

vocab_size = 10000

tokinizer = Tokenizer(num_words=vocab_size)
tokinizer.fit_on_texts(quotes)

In [80]:
word_index = tokinizer.word_index
print(len(word_index))

8978


In [81]:
list(word_index.items())[:10]

[('the', 1),
 ('you', 2),
 ('to', 3),
 ('and', 4),
 ('a', 5),
 ('i', 6),
 ('is', 7),
 ('of', 8),
 ('that', 9),
 ('it', 10)]

In [82]:
sequence = tokinizer.texts_to_sequences(quotes)

In [83]:
quotes[:1]

0    “the world as we have created it is a process ...
Name: quote, dtype: object

In [84]:
for i in range(4):
    print(quotes[i])

“the world as we have created it is a process of our thinking it cannot be changed without changing our thinking”
“it is our choices harry that show what we truly are far more than our abilities”
“there are only two ways to live your life one is as though nothing is a miracle the other is as though everything is a miracle”
“the person be it gentleman or lady who has not pleasure in a good novel must be intolerably stupid”


In [85]:
for i in range(4):
    print(sequence[i])

[713, 62, 29, 19, 16, 946, 10, 7, 5, 1156, 8, 70, 293, 10, 145, 12, 809, 104, 752, 70, 2461]
[947, 7, 70, 871, 373, 9, 433, 21, 19, 465, 14, 294, 52, 54, 70, 3676]
[1337, 14, 53, 201, 714, 3, 81, 15, 36, 37, 7, 29, 329, 93, 7, 5, 1157, 1, 101, 7, 29, 329, 126, 7, 5, 3677]
[713, 116, 12, 10, 2462, 32, 1043, 30, 82, 13, 601, 11, 5, 74, 1338, 119, 12, 2463, 3678]


In [86]:
X = []
y = []

for seq in sequence:
    for i in range(1,len(seq)):
        input_seq = seq[:i]
        output_seq = seq[i]
        X.append(input_seq)
        y.append(output_seq)

In [87]:
max_len = max(len(x) for x in X)
print(max_len)

745


In [88]:

from tensorflow.keras.preprocessing.sequence import pad_sequences
X_padded = pad_sequences(X, maxlen=max_len, padding='pre')

In [89]:
y =  np.array(y)

In [90]:
X_padded.shape

(85271, 745)

In [91]:
y.shape

(85271,)

In [92]:

from tensorflow.keras.utils import to_categorical
y_one_hot = to_categorical(y, num_classes=vocab_size)

In [93]:
y_one_hot.shape

(85271, 10000)

In [95]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding,LSTM,SimpleRNN,Dense

In [96]:
embedding_dim = 50
rnn_units = 128


In [99]:

rnn_model = Sequential()

rnn_model.add(
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_len)
)
rnn_model.add(SimpleRNN(units=rnn_units))
rnn_model.add(Dense(units=vocab_size, activation='softmax'))



In [100]:
rnn_model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=['accuracy'])
rnn_model.summary()

In [101]:

lstm_model = Sequential()
lstm_model.add(
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_len)
)
lstm_model.add(LSTM(units=rnn_units))
lstm_model.add(Dense(units=vocab_size, activation='softmax'))



In [102]:

lstm_model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [103]:
lstm_model.summary()

In [108]:
history_rnn = rnn_model.fit(
    X_padded, y_one_hot,
    epochs=10,batch_size=128,
    validation_split=0.1,verbose=1
)

Epoch 1/10
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m117s[0m 194ms/step - accuracy: 0.0638 - loss: 6.3516 - val_accuracy: 0.0617 - val_loss: 6.8951
Epoch 2/10
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m121s[0m 202ms/step - accuracy: 0.0791 - loss: 6.1767 - val_accuracy: 0.0752 - val_loss: 6.8941
Epoch 3/10
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 183ms/step - accuracy: 0.0882 - loss: 6.0597 - val_accuracy: 0.0833 - val_loss: 6.8950
Epoch 4/10
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m109s[0m 182ms/step - accuracy: 0.0946 - loss: 5.9653 - val_accuracy: 0.0876 - val_loss: 6.8909
Epoch 5/10
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 180ms/step - accuracy: 0.1009 - loss: 5.8793 - val_accuracy: 0.0870 - val_loss: 6.8933
Epoch 6/10
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 180ms/step - accuracy: 0.1092 - loss: 5.7884 - val_accuracy: 0.0945 - val_loss: 6.8782
Epoc

## train only if you have gpu

In [109]:
# history_lstm = lstm_model.fit(
#     X_padded, y_one_hot,
#     epochs=100,batch_size=128,
#     validation_split=0.1,verbose=1
# )

Epoch 1/100
[1m 15/600[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m6:43[0m 690ms/step - accuracy: 0.0165 - loss: 9.1859

KeyboardInterrupt: 

In [111]:

from tensorflow.keras.models import load_model

lstm_model = load_model("lstm_model.h5")



In [112]:

lstm_model.save("lstm_model.h5")



In [113]:

index_to_word = {}
for word, index in word_index.items():
  index_to_word[index] = word

In [114]:

def predictor(model,tokenizer,text,max_len):
  text = text.lower()

  seq = tokenizer.texts_to_sequences([text])[0]
  seq = pad_sequences([seq], maxlen=max_len, padding='pre')

  pred = model.predict(seq,verbose = 0)
  pred_index = np.argmax(pred)
  return index_to_word[pred_index]

In [115]:
seed_text = "what are you"
next_word = predictor(lstm_model,tokinizer,seed_text,max_len)
print(next_word)


worrying


In [116]:
def generate_text(model,tokenizer,seed_text,max_len,n_words):
  for _ in range(n_words):
    next_word = predictor(model,tokenizer,seed_text,max_len)
    if next_word == "":
      break
    seed_text += " " + next_word
  return seed_text

In [117]:


seed = "are you a "
generate_text = generate_text(lstm_model,tokinizer,seed,max_len,10)
print(generate_text)

are you a  little girl you can only give her monotony and stale


In [118]:

import pickle
with open("tokenizer.pkl", "wb") as f:
  pickle.dump(tokinizer, f)


In [119]:
with open("max_len.pkl", "wb") as f:
  pickle.dump(max_len, f)
     