## UFC Text-based Chatbot
### Capable of responding to questions about pound for pound rankings, each weight division information, and athlete's detailed record.

### Importing libraries

In [None]:
import numpy as np
import nltk #natural language tool kit for NLP
import string
import random

### Opening and reading the data

In [None]:
file = open(r'C:\Users\17097\Documents\ufcchatbot.txt', 'r', errors = 'ignore')
raw_doc = file.read()
raw_doc = raw_doc.lower()
nltk.download('punkt')
nltk.download('wordnet')
sent_tokens = nltk.sent_tokenize(raw_doc)
word_tokens = nltk.word_tokenize(raw_doc)

In [None]:
sent_tokens[:4]

In [None]:
word_tokens[:5]

### Text preprocessing using NLTK

In [None]:
# WordNet is a dictionary included in NLTK
lmtz = nltk.stem.WordNetLemmatizer() # converts words into root words

def LemTokens(tokens):
    
    return [lmtz.lemmatize(token) for token in tokens]

remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)

def LemNormalize(text):
    
    return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))

## Defining the greeting function

In [None]:
GREET_INPUTS = ("hello", "hi", "greetings", "sup", "what's up", "hey")
GREET_RESPONSES = ["hi", "hey", "*nods*", "hi there", "hello", "sup"]

def greet(sentence):
    
    for word in sentence.split():
        
        if word.lower() in GREET_INPUTS:
            
            return random.choice(GREET_RESPONSES)

## Response generation

In [None]:
from sklearn.metrics.pairwise import cosine_similarity #measures simmilarity between two vectors
from sklearn.feature_extraction.text import TfidfVectorizer #transform text into numbers and feed it to ML model

In [None]:
def response(user_response):
    
    robo1_response = ''
    TfidfVec = TfidfVectorizer(tokenizer = LemNormalize, stop_words = 'english')
    tfidf = TfidfVec.fit_transform(sent_tokens)
    tfidf = TfidfVec.fit_transform(sent_tokens)
    vals = cosine_similarity(tfidf[-1], tfidf)
    idx = vals.argsort()[0][-2]
    flat = vals.flatten()
    flat.sort()
    req_tfidf = flat[-2]
    
    if(req_tfidf == 0):
        robo1_response = robo1_response + "I can't understand"
        return robo1_response
    
    else:
        robo1_response = robo1_response + sent_tokens[idx]
        return robo1_response

## Defining conversation start/end protocols

In [None]:
flag = True

print("BOT: My name is Daniel. Ask me about UFC rankings! If you wish to exit, type 'bye' at any time")

while(flag == True):
    
    user_response = input()
    user_response = user_response.lower()
    
    if(user_response != 'bye'):
        
        if(user_response == 'thanks' or user_response == 'thank you'):
            
            flag = False
            print("BOT: You are welcome..")
            
        else:
            
            if(greet(user_response) != None):
                
                print("BOT: " + greet(user_response))
                
            else:
                
                sent_tokens.append(user_response)
                word_tokens = word_tokens + nltk.word_tokenize(user_response)
                final_words = list(set(word_tokens))
                print("BOT: ", end = "")
                print(response(user_response))
                sent_tokens.remove(user_response)
                
    else:
        
        flag = False
        print("BOT: Goodbye! Take care <3")