<a href="https://colab.research.google.com/github/akesh-0909/Quotes-Generator-using-LSTM-/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Loading Required Packages

In [32]:
from bs4   import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import re
from tqdm import tqdm
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
    'Accept-Language': 'en-US,en;q=0.9',
    'Accept-Encoding': 'gzip, deflate, br',
    'Connection': 'keep-alive'
}


## Scraping 1000 quots and saving to remote repository

In [None]:
pages=50
quots_list=[]
authors_list = []
for page in tqdm(range(1,1+pages)):
  url = f"https://www.azquotes.com/top_quotes.html?p={page}"
  response = requests.get(url,headers=headers)
  soup = BeautifulSoup(response.content, 'html.parser')
  quots_list.extend(soup.find_all('a',class_="title"))
  authors_list.extend(soup.find_all('div',class_="author"))

quots_list = [q.text for q in quots_list]
authors_list = [a.text for a in authors_list]

pd.DataFrame({'quote': quots_list, 'author': authors_list}).to_csv("/content/drive/MyDrive/Projects/LSTM RNN GRU etx/quots.csv")

In [None]:
# pd.DataFrame({'quote': [q.text for q in quots_list], 'author': [a.text for a in authors_list]})

In [None]:
print("No of quots",len(quots_list),len(authors_list))

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Loading data From remote repository

In [15]:
quots = pd.read_csv("/content/drive/MyDrive/Projects/LSTM RNN GRU etx/quots.csv")[['quote']]

- **Fitting Tokenizer on Training Data**

In [16]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(quots['quote'].tolist())

In [18]:
print("Size of vocabulary",len(tokenizer.word_index))

Size of vocabulary 2869


In [None]:
quots['quote'][10]

"A strong nation, like a strong person, can afford to be gentle, firm, thoughtful, and restrained. It can afford to extend a helping hand to others. It's a weak nation, like a weak person, that must behave with bluster and boasting and rashness and other signs of insecurity."

In [None]:
tokenizer.texts_to_sequences(quots['quote'][10])[0]

[7]

- **Tokenizing and creating sequential inputs**

In [19]:
input_sequences= []
for quote in quots['quote']:
  token_list = tokenizer.texts_to_sequences([quote])[0]
  for i in range(1,len(token_list)):
    n_gram_sequence = token_list[:i+1]
    input_sequences.append(n_gram_sequence)

In [25]:
# input_sequences

In [20]:
max_len = max([len(x) for x in input_sequences])
print(max_len)

98


- **Padding Input sequences**

In [21]:
padded_inputs = pad_sequences(input_sequences,maxlen=max_len,padding='pre')

# Splitting data into inputs and target

In [22]:
X = padded_inputs[:,:-1]
Y = padded_inputs[:,-1]

In [None]:
X.shape, Y.shape

((17802, 97), (17802,))

- **Reshaping our target for multiclass representation with values binary**

In [23]:

Y = to_categorical(Y,num_classes=len(tokenizer.word_index)+1)

In [27]:
len(tokenizer.word_index)+1

2870

## Initializing our model with Embeddings, LSTM, and Dense layer

In [None]:
model = Sequential()
model.add(Embedding(len(tokenizer.word_index)+1, 100, input_length=97))
model.add(LSTM(500,return_sequences=True))
model.add(LSTM(500))
model.add(Dense(2870, activation='softmax'))

In [None]:
model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])

In [None]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_3 (Embedding)     (None, 97, 100)           287000    
                                                                 
 gru (GRU)                   (None, 97, 500)           903000    
                                                                 
 gru_1 (GRU)                 (None, 500)               1503000   
                                                                 
 dense_2 (Dense)             (None, 2870)              1437870   
                                                                 
Total params: 4130870 (15.76 MB)
Trainable params: 4130870 (15.76 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
# callback = tf.keras.callbacks.EarlyStopping(monitor='loss',patience=10)

-  **Fitting Training data to model**

In [None]:
model.fit(X,Y,epochs=50,verbose=1,validation_split=0.2)

- **Saving our model in remote repository**

In [None]:
model.save("/content/drive/MyDrive/Projects/LSTM RNN GRU etx/my_model.keras")

In [1]:
# import pandas as pd
# import tensorflow as tf
# from tensorflow.keras.preprocessing.text import Tokenizer
# from tensorflow.keras.preprocessing.sequence import pad_sequences
quots = pd.read_csv("/content/drive/MyDrive/Projects/LSTM RNN GRU etx/quots.csv")[['quote','author']]
tokenizer = Tokenizer()
tokenizer.fit_on_texts(quots['quote'].tolist())

- **Loading Model**


In [3]:
model = tf.keras.models.load_model("/content/drive/MyDrive/Projects/LSTM RNN GRU etx/my_model.keras")

In [4]:
import time

# text = "revenge"
def results(text,words):
  for i in range(int(words)):
    # tokenize
    token_text = tokenizer.texts_to_sequences([text])[0]
    # padding
    padded_token_text = pad_sequences([token_text], maxlen=97, padding='pre')
    # predict
    pos = np.argmax(model.predict(padded_token_text))

    for word,index in tokenizer.word_index.items():
      if index == pos:
        text = text + " " + word
        break

  return text


In [5]:
results('love',10)



'love is friendship that has caught fire it is quiet understanding'

In [6]:
pip install gradio

Collecting gradio
  Downloading gradio-4.36.1-py3-none-any.whl (12.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.3/12.3 MB[0m [31m53.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.111.0-py3-none-any.whl (91 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.0/92.0 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ffmpy (from gradio)
  Downloading ffmpy-0.3.2.tar.gz (5.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gradio-client==1.0.1 (from gradio)
  Downloading gradio_client-1.0.1-py3-none-any.whl (318 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m318.1/318.1 kB[0m [31m32.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━

## Creating Interface using Gradio

In [7]:
import gradio as gr


demo = gr.Interface(
      fn=(lambda Prompt,nwords : results(Prompt,nwords)),
    inputs=["text", gr.Slider(value=13, minimum=10, maximum=60, step=1,label='How many words ?')],
    outputs=[gr.Textbox(label="Results", lines=7)],
)

demo.launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://5c66a4ad2f30c252b9.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [9]:
from gradio_client import Client

client = Client("https://5c66a4ad2f30c252b9.gradio.live/")
result = client.predict(
		Prompt="Hello!!",
		nwords=13,
		api_name="/predict")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loaded as API: https://5c66a4ad2f30c252b9.gradio.live/ ✔
