In [238]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import warnings
warnings.filterwarnings('ignore')
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import load_model
from flask import Flask, render_template, request

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/bhagavad-gita-versewise/bhagavad_gita.csv


# ****DATA Loading

In [239]:
data = pd.read_csv('/kaggle/input/bhagavad-gita-versewise/bhagavad_gita.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,verse_number,verse_in_sanskrit,sanskrit_verse_transliteration,translation_in_english,meaning_in_english,translation_in_hindi,meaning_in_hindi
0,0,"Chapter 1, Verse 1",धृतराष्ट्र उवाच |धर्मक्षेत्रे कुरुक्षेत्रे समव...,dhṛitarāśhtra uvācha dharma-kṣhetre kuru-kṣhet...,"Dhritarashtra said: O Sanjay, after gathering ...",The two armies had gathered on the battlefield...,धृतराष्ट्र ने कहाः हे संजय! कुरुक्षेत्र की पवि...,राजा धृतराष्ट्र जन्म से नेत्रहीन होने के अतिरि...
1,1,"Chapter 1, Verse 2",सञ्जय उवाच ।दृष्ट्वा तु पाण्डवानीकं व्यूढं दुर...,sañjaya uvācha dṛiṣhṭvā tu pāṇḍavānīkaṁ vyūḍha...,Sanjay said: On observing the Pandava army sta...,"Sanjay understood Dhritarashtra’s concern, who...",संजय ने कहाः हे राजन्! पाण्डवों की सेना की व्य...,धृतराष्ट्र इस बात की पुष्टि करना चाहता था कि क...
2,2,"Chapter 1, Verse 3",पश्यैतां पाण्डुपुत्राणामाचार्य महतीं चमूम् ।व्...,paśhyaitāṁ pāṇḍu-putrāṇām āchārya mahatīṁ cham...,Duryodhan said: Respected teacher!Behold the m...,Duryodhana asked Dronacharya to look at the sk...,दुर्योधन ने कहाः पूज्य आचार्य! पाण्डु पुत्रों ...,दुर्योधन एक कुशल कूटनीतिज्ञ के रूप में अपने से...
3,3,"Chapter 1, Verse 4-6",अत्र शूरा महेष्वासा भीमार्जुनसमा युधि युयुधान...,atra śhūrā maheṣhvāsā bhīmārjuna-samā yudhiyuy...,Behold in their ranks are many powerful warrio...,"Due to his anxiety, the Pandava army seemed mu...",यहाँ इस सेना में भीम और अर्जुन के समान बलशाली ...,अपने सम्मख संकट को मंडराते देखकर दुर्योधन को प...
4,4,"Chapter 1, Verse 4-6",अत्र शूरा महेष्वासा भीमार्जुनसमा युधि युयुधान...,atra śhūrā maheṣhvāsā bhīmārjuna-samā yudhiyuy...,Behold in their ranks are many powerful warrio...,"Due to his anxiety, the Pandava army seemed mu...",यहाँ इस सेना में भीम और अर्जुन के समान बलशाली ...,अपने सम्मख संकट को मंडराते देखकर दुर्योधन को प...


# ****DATA Insight

In [240]:
data.shape

(701, 8)

In [241]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 701 entries, 0 to 700
Data columns (total 8 columns):
 #   Column                          Non-Null Count  Dtype 
---  ------                          --------------  ----- 
 0   Unnamed: 0                      701 non-null    int64 
 1   verse_number                    701 non-null    object
 2   verse_in_sanskrit               701 non-null    object
 3   sanskrit_verse_transliteration  701 non-null    object
 4   translation_in_english          701 non-null    object
 5   meaning_in_english              701 non-null    object
 6   translation_in_hindi            701 non-null    object
 7   meaning_in_hindi                700 non-null    object
dtypes: int64(1), object(7)
memory usage: 43.9+ KB


In [242]:
data.isnull().sum()

Unnamed: 0                        0
verse_number                      0
verse_in_sanskrit                 0
sanskrit_verse_transliteration    0
translation_in_english            0
meaning_in_english                0
translation_in_hindi              0
meaning_in_hindi                  1
dtype: int64

In [243]:
data.columns

Index(['Unnamed: 0', 'verse_number', 'verse_in_sanskrit',
       'sanskrit_verse_transliteration', 'translation_in_english',
       'meaning_in_english', 'translation_in_hindi', 'meaning_in_hindi'],
      dtype='object')

# DATA Preprocessing

In [244]:
if 'Unnamed: 0' in data.columns:
    data.drop('Unnamed: 0', axis=1, inplace=True)

data.shape

(701, 7)

# ****Tokenization

In [245]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(data)
sequences = tokenizer.texts_to_sequences(data)

#  ****Splitting

In [246]:
sequences_padded = pad_sequences(sequences)
sequences = np.array(sequences_padded)

In [247]:
X = sequences[:, :-1]
y = sequences[:, -1]
vocab_size = len(tokenizer.word_index) + 1

# ****ONE-Hot Encoding

In [248]:
from keras.utils import to_categorical

y = to_categorical(y, num_classes=vocab_size)


# ****Defining the model

In [249]:
model = Sequential()
model.add(Embedding(vocab_size, 50, input_length=X.shape[1]))
model.add(LSTM(100, return_sequences=True))
model.add(LSTM(100))
model.add(Dense(100, activation='relu'))
model.add(Dense(vocab_size, activation='softmax'))

# ****Compiling and Training the model

In [250]:
# Compile and train the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, y, epochs=100, verbose=2)

# Use the model to generate text
def generate_text(seed_text, model, tokenizer, max_sequence_len):
    input_text = seed_text
    for _ in range(max_sequence_len):
        token_list = tokenizer.texts_to_sequences([input_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predicted = model.predict_classes(token_list, verbose=0)
        output_word = ""
        for word, index in tokenizer.word_index.items():
            if index == predicted:
                output_word = word
                break
        input_text += " " + output_word
    return input_text

Epoch 1/100
1/1 - 4s - loss: 2.3026 - accuracy: 0.1429 - 4s/epoch - 4s/step
Epoch 2/100
1/1 - 0s - loss: 2.2978 - accuracy: 0.4286 - 10ms/epoch - 10ms/step
Epoch 3/100
1/1 - 0s - loss: 2.2931 - accuracy: 0.2857 - 10ms/epoch - 10ms/step
Epoch 4/100
1/1 - 0s - loss: 2.2880 - accuracy: 0.2857 - 11ms/epoch - 11ms/step
Epoch 5/100
1/1 - 0s - loss: 2.2823 - accuracy: 0.2857 - 11ms/epoch - 11ms/step
Epoch 6/100
1/1 - 0s - loss: 2.2762 - accuracy: 0.2857 - 11ms/epoch - 11ms/step
Epoch 7/100
1/1 - 0s - loss: 2.2695 - accuracy: 0.2857 - 9ms/epoch - 9ms/step
Epoch 8/100
1/1 - 0s - loss: 2.2621 - accuracy: 0.2857 - 12ms/epoch - 12ms/step
Epoch 9/100
1/1 - 0s - loss: 2.2539 - accuracy: 0.2857 - 11ms/epoch - 11ms/step
Epoch 10/100
1/1 - 0s - loss: 2.2450 - accuracy: 0.2857 - 11ms/epoch - 11ms/step
Epoch 11/100
1/1 - 0s - loss: 2.2350 - accuracy: 0.2857 - 11ms/epoch - 11ms/step
Epoch 12/100
1/1 - 0s - loss: 2.2237 - accuracy: 0.2857 - 10ms/epoch - 10ms/step
Epoch 13/100
1/1 - 0s - loss: 2.2111 - accu

# Saving The Model

In [251]:
model.save('Bhagvad_gita_chatbot_model.h5')


# Loading The Model

In [252]:
model = load_model('Bhagvad_gita_chatbot_model.h5')

# ****Creating a Function to Handle User Input

In [253]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(data)
sequences = tokenizer.texts_to_sequences(data)

In [254]:
def handle_message(message):
    # Preprocess the message
    token_list = tokenizer.texts_to_sequences([message])[0]
    token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')

    # Get the model's response
    predicted = model.predict_classes(token_list, verbose=0)

    # Postprocess the response
    output_word = ""
    for word, index in tokenizer.word_index.items():
        if index == predicted:
            output_word = word
            break

    return output_word

# ****Creating a User Interface And Connecting the UI to the Model

In [255]:
model = load_model('Bhagvad_gita_chatbot_model.h5')

In [256]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(data)
sequences = tokenizer.texts_to_sequences(data)

In [257]:
!pip install --upgrade gradio
!pip install --upgrade pydantic

[0m

In [258]:
#import gradio as gr

# can't be run on kaggle

In [260]:

#def respond(message):
    # Preprocess the message
#    token_list = tokenizer.texts_to_sequences([message])[0]
#    token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')

    # Get the model's response
#    predicted = model.predict_classes(token_list, verbose=0)
 #   output_word = ""
 #   for word, index in tokenizer.word_index.items():
 #       if index == predicted:
  #          output_word = word
 #           break
  #  return output_word

#iface = gr.Interface(fn=respond, inputs="text", outputs="text")
#iface.launch()
