In [1]:
# Cell 1: Import necessary libraries
import nltk
import string
import random
import json
import pickle
import numpy as np
import tensorflow as tf

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from keras.models import load_model

In [2]:
# Cell 2: Download necessary NLTK data for tokenization and lemmatization
nltk.download('punkt')  # Download the tokenizer data
nltk.download('wordnet')  # Download the lemmatizer data

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [3]:
# Cell 3: Define preprocessing functions for FAQ handling
lemmer = nltk.stem.WordNetLemmatizer()

def LemTokens(tokens):
    """
    Lemmatizes each token in the list.
    Uses NLTK's WordNetLemmatizer to reduce words to their base forms.
    """
    return [lemmer.lemmatize(token) for token in tokens]

def LemNormalize(text):
    """
    Normalizes the text by converting to lowercase, removing punctuation, and lemmatizing.
    Prepares the text for further processing by tokenizing, removing punctuation, and applying lemmatization.
    """
    remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)
    return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))

In [4]:
# Cell 4: Load FAQs from a text file
def load_faqs(filename):
    """
    Loads FAQs from a file and splits them into questions and answers.
    Assumes the file has a format where each line contains a question and answer separated by a comma.
    """
    questions = []
    answers = []
    with open(filename, 'r', encoding='latin-1') as file:
        for line in file:
            if ',' in line:
                question, answer = line.strip().split(',', 1)
                questions.append(question)
                answers.append(answer)
    return questions, answers

# Load FAQs and normalize questions
sent_tokens, answer_tokens = load_faqs("BankFAQs.doc")
sent_tokens = [q.lower() for q in sent_tokens]  # Convert questions to lowercase

In [5]:
# Cell 5: Load intents for greeting and intent classification
intents = json.loads(open('intents.json').read())
# Load the intents JSON file containing patterns and responses for different intents.

In [6]:
# Cell 6: Preprocess intents data for training the model
words = []
classes = []
documents = []
ignore_letters = ['?', '!', '.', ',']  # Characters to ignore during processing

for intent in intents['intents']:
    for pattern in intent['patterns']:
        word_list = nltk.word_tokenize(pattern)
        words.extend(word_list)
        documents.append((word_list, intent['tag']))
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

# Lemmatize words and prepare lists of unique words and classes
words = [lemmer.lemmatize(word.lower()) for word in words if word not in ignore_letters]
words = sorted(set(words))
classes = sorted(set(classes))

# Save processed words and classes for later use
pickle.dump(words, open('words.pkl', 'wb'))
pickle.dump(classes, open('classes.pkl', 'wb'))

In [7]:
# Cell 7: Prepare training data for the intent classification model
training = []
output_empty = [0] * len(classes)

for document in documents:
    bag = []
    word_patterns = document[0]
    word_patterns = [lemmer.lemmatize(word.lower()) for word in word_patterns]
    for word in words:
        bag.append(1) if word in word_patterns else bag.append(0)

    output_row = list(output_empty)
    output_row[classes.index(document[1])] = 1
    training.append(bag + output_row)

# Shuffle and convert to numpy array for training
random.shuffle(training)
training = np.array(training)

train_x = training[:, :len(words)]  # Feature set
train_y = training[:, len(words):]  # Label set

In [8]:
# Cell 8: Build and train the intent classification model
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(len(train_y[0]), activation='softmax'))

# Compile and train the model
sgd = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)
model.save('chatbot_model.h5')  # Save the trained model
print('Intent model training complete')

Epoch 1/200


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.0193 - loss: 3.6134
Epoch 2/200
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0429 - loss: 3.5096     
Epoch 3/200
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.0717 - loss: 3.4346
Epoch 4/200
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.1527 - loss: 3.3191
Epoch 5/200
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.1783 - loss: 3.2362
Epoch 6/200
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.2405 - loss: 3.0108
Epoch 7/200
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.3145 - loss: 2.9020
Epoch 8/200
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.3675 - loss: 2.5912
Epoch 9/200
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m



Intent model training complete


In [9]:
# Cell 9: Load the trained model
model = load_model('chatbot_model.h5')



In [10]:
# Cell 10: Define utility functions for handling chatbot interactions
def clean_up_sentence(sentence):
    """
    Tokenizes and lemmatizes the sentence for prediction.
    Prepares the input sentence to be compatible with the model.
    """
    sentence_words = nltk.word_tokenize(sentence)
    sentence_words = [lemmer.lemmatize(word.lower()) for word in sentence_words]
    return sentence_words

def bag_of_words(sentence):
    """
    Creates a bag of words representation for the input sentence.
    Transforms the sentence into a fixed-size vector based on the vocabulary.
    """
    sentence_words = clean_up_sentence(sentence)
    bag = [0] * len(words)
    for w in sentence_words:
        for i, word in enumerate(words):
            if word == w:
                bag[i] = 1
    return np.array(bag)

def predict_class(sentence):
    """
    Predicts the class of the sentence using the trained model.
    Returns a list of intent predictions with associated probabilities.
    """
    bow = bag_of_words(sentence)
    res = model.predict(np.array([bow]))[0]
    ERROR_THRESHOLD = 0.25
    results = [[i, r] for i, r in enumerate(res) if r > ERROR_THRESHOLD]

    results.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in results:
        return_list.append({'intent': classes[r[0]], 'probability': str(r[1])})
    return return_list

def get_response(intents_list, intents_json):
    """
    Retrieves a response based on the predicted intent.
    Matches the predicted intent with the responses in the intents JSON.
    """
    tag = intents_list[0]['intent']
    list_of_intents = intents_json['intents']
    for i in list_of_intents:
        if i['tag'] == tag:
            result = random.choice(i['responses'])
            break
    return result

def faq_response(user_response):
    """
    Generates a response based on the FAQ dataset using cosine similarity.
    Matches the user query to the most similar question in the FAQ dataset.
    """
    robo_response = ''
    TfidfVec = TfidfVectorizer(tokenizer=LemNormalize, stop_words='english')
    tfidf = TfidfVec.fit_transform(sent_tokens + [user_response])
    vals = cosine_similarity(tfidf[-1], tfidf[:-1])

    idx = vals.argsort()[0][-1]
    flat = vals.flatten()
    flat.sort()
    req_tfidf = flat[-1]

    if req_tfidf == 0:
        robo_response = "I am sorry! I don't understand you."
    else:
        robo_response = answer_tokens[idx]

    return robo_response

In [11]:
# Cell 11: Chatbot interaction loop
print("Hello! I am FinGuard. Start typing your text to talk to me. For ending the conversation type 'bye'!")

while True:
    user_response = input("").lower()

    if user_response == 'bye':
        print('FinGuard: Goodbye!')
        break
    elif user_response in ['thanks', 'thank you']:
        print('FinGuard: You are welcome!')
        break
    else:
        ints = predict_class(user_response)
        if ints:
            response = get_response(ints, intents)
        else:
            response = faq_response(user_response)
        print(f'FinGuard: {response}')

Hello! I am FinGuard. Start typing your text to talk to me. For ending the conversation type 'bye'!
bye
FinGuard: Goodbye!


In [55]:
# Additional setup for Streamlit GUI
!pip install streamlit
import streamlit as st



In [56]:
!pip install streamlit -q  # Install Streamlit silently

In [67]:
!wget -q -O - ipv4.icanhazip.com  # Fetch external IP address

34.16.222.47


In [68]:
!npm install -g localtunnel  # Install localtunnel for exposing local apps

[K[?25h
changed 22 packages, and audited 23 packages in 795ms

3 packages are looking for funding
  run `npm fund` for details

1 [33m[1mmoderate[22m[39m severity vulnerability

To address all issues (including breaking changes), run:
  npm audit fix --force

Run `npm audit` for details.


In [69]:
!streamlit run FinGuard_GUI.py & npx localtunnel --port 8501
# Run the Streamlit app and expose it via localtunnel


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.16.222.47:8501[0m
[0m
your url is: https://kind-rockets-dress.loca.lt
2024-08-03 03:40:16.710828: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-03 03:40:16.745547: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-03 03:40:16.755795: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has alr