In [1]:
import pandas as pd

import os
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split as tts
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder as LE
from sklearn.metrics.pairwise import cosine_similarity

import nltk
from nltk.corpus import stopwords
import datetime

import speech_recognition as sr
import pyttsx3 
import pyaudio
import pickle

In [2]:
#data

data = pd.read_csv("Chatbot Data.csv", encoding = "ISO-8859-1", engine='python')

#data.drop(['Unnamed: 3', 'Unnamed: 4'], axis = 1, inplace=True)

data.columns = ['Question','Answer','Class']

questions = data['Question'].values

In [3]:
stop_words = set(stopwords.words('english'))

def cleanup(sentence):
    
    word_tok = nltk.word_tokenize(sentence)
    stemmed_words = [w for w in word_tok if not w in stop_words]
    return ' '.join(stemmed_words)
    

In [4]:
X = []

for question in questions:
    X.append(cleanup(str(question)))

In [5]:
#loading model

with open('model_pkl' , 'rb') as f:
    model = pickle.load(f)

In [6]:
#defining entities

le = LE()

from sentence_transformers import SentenceTransformer

t_model = SentenceTransformer("Tranformer_saved")



le.fit(data['Class'])

le.transform(data['Class'])

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 8, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7,
       7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 9, 9, 9, 9, 9, 9,
       9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9])

In [7]:
def get_response(usrText):
    
    t_usr = t_model.encode([cleanup(usrText.strip().lower())])
    class_ = le.inverse_transform(model.predict(t_usr))


    questionset = data[data['Class'].values == class_]

    cos_sims = []
    for question in questionset['Question']:
        sims = cosine_similarity(t_model.encode([question]), t_usr)

        cos_sims.append(sims)

    ind = cos_sims.index(max(cos_sims))

    b = [questionset.index[ind]]
    
    r = data['Answer'][questionset.index[ind]]+"   "
    
    return r


In [8]:
def speech_to_text():
    
    r = sr.Recognizer() 

    try:

            with sr.Microphone() as source2:

                r.adjust_for_ambient_noise(source2, duration=0.2)

                print("Start Talking : \n")

                audio2 = r.listen(source2)

                print("Recognizing : ......\n")
                MyText = r.recognize_google(audio2)
                MyText = MyText.lower()

                print("\nDid you say :   "+MyText)
                
                return MyText

    except sr.RequestError as e:
        print("Could not request results; {0}".format(e))

    except sr.UnknownValueError:
        print("unknown error occured")

In [None]:
from flask import Flask, render_template, request

app = Flask(__name__)
app.static_folder = 'static'

@app.route("/")
def home():
    return render_template("index.html")

@app.route("/get")
def get_bot_response():
    userText = request.args.get('msg')
    return get_response(userText)


if __name__ == "__main__":
    app.run()

 * Serving Flask app '__main__' (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
