# ChatBot to Recommend Movies and Restaurant

In [1]:
import csv 
import string
import nltk
import random
import warnings
import re
import numpy as np

# <H1>Movie Recommendation ChatBot</H1>

In [2]:
with open('ratings.csv', mode='r') as file: 
    csvFile = csv.reader(file)
    movie_ratings = {}
    for line in csvFile:
        if int(line[1]) not in movie_ratings:
            movie_ratings[int(line[1])] = [float(line[2]), 1]
        else:
            movie_ratings[int(line[1])] = [float(line[2]) + movie_ratings[int(line[1])][0], movie_ratings[int(line[1])][1] + 1]

In [3]:
with open('movies.csv', mode='r') as file: 
    csvFile = csv.reader(file)
    movie_name = {}
    sent_token_movie = []
    movie_name_token = []
    movie_genres = {}
    for line in csvFile:
        sent_token_movie.append(str(line[0]) + " " + line[2].lower().translate(str.maketrans(string.punctuation, ' '*len(string.punctuation))))
        movie_name_token.append(str(line[0]) + " " + line[1].lower().translate(str.maketrans(string.punctuation, ' '*len(string.punctuation))))
        movie_name[int(line[0])] = line[1]  
        movie_genres[int(line[0])] = line[2].lower().translate(str.maketrans(string.punctuation, ' '*len(string.punctuation)))

In [4]:
lemmer = nltk.stem.WordNetLemmatizer() 

def LemTokens(tokens):
  return [lemmer.lemmatize(token) for token in tokens]


def LemNormalize(text):
  return LemTokens(nltk.word_tokenize(text.lower()))

In [5]:
from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.metrics.pairwise import cosine_similarity

In [6]:
def sortFirst(val):
        return val[0]

In [7]:
TfidfVec_search_movie_genre = TfidfVectorizer(tokenizer=LemNormalize, stop_words='english')
tfidf_search_movie_genre = TfidfVec_search_movie_genre.fit_transform(movie_name_token)



In [8]:
def search_movie_genre(user_query):
   k = TfidfVec_search_movie_genre.transform([user_query.lower().translate(str.maketrans(string.punctuation, ' '*len(string.punctuation)))])
   vals = cosine_similarity(k, tfidf_search_movie_genre)
   idx = vals.argsort()[0][-1]
   flat = vals.flatten()
   flat.sort()
   req_tfidf = flat[-1]
   if req_tfidf == 0:
        return "", 0
   else:
        return movie_genres[int(movie_name_token[idx].split(" ")[0])], int(movie_name_token[idx].split(" ")[0])

In [9]:
TfidfVec_search_similar_genre = TfidfVectorizer(tokenizer=LemNormalize, stop_words='english')
tfidf_search_similar_genre = TfidfVec_search_similar_genre.fit_transform(sent_token_movie)

In [10]:
def find_similar_genre(user_query):
    query_tags, query_movie_id = search_movie_genre(user_query)
    if query_tags == "":
        return [], 0
    if movie_name[query_movie_id].split("(")[0].lower().translate(str.maketrans(string.punctuation, ' '*len(string.punctuation)))[:-1] not in user_query.lower().translate(str.maketrans(string.punctuation, ' '*len(string.punctuation))):
        return [], 0
    k = TfidfVec_search_similar_genre.transform([query_tags.lower().translate(str.maketrans(string.punctuation, ' '*len(string.punctuation)))])
    vals = cosine_similarity(k, tfidf_search_similar_genre)
    index = 0
    val_with_index = []
    for val in vals[0]:
        tuple1 = (val, index)
        val_with_index.append(tuple1)
        index = index + 1
    val_with_index.sort(key=sortFirst, reverse=True)
    return val_with_index, query_movie_id

In [11]:
def sort_found_genre(user_query):
    similarity_with_index, query_movie_id = find_similar_genre(user_query)
    if query_movie_id == 0:
        return [], 0
    all_sorted_details = []
    for swi in similarity_with_index:
        movie_id = int(sent_token_movie[swi[1]].split(" ")[0])
        if movie_id == query_movie_id:
            continue
        if movie_id not in movie_ratings:
            continue
        movie_rating = movie_ratings[movie_id][0]/float(movie_ratings[movie_id][1])
        all_sorted_details.append((swi[0], movie_rating, movie_name[movie_id], movie_genres[movie_id]))
    all_sorted_details = sorted(all_sorted_details, key=lambda x:(-x[0], -x[1]))
    rating_in_query = re.findall(r"[-+]?(?:\d*\.\d+|\d+)", user_query)
    if len(rating_in_query) > 0 and float(rating_in_query[0]) <= 5 and float(rating_in_query[0]) >= 0:
        remove_elements = []
        for asd in all_sorted_details:
            if asd[1] < float(rating_in_query[0]):
                remove_elements.append(asd)
        for remove_element in remove_elements:
            all_sorted_details.remove(remove_element)
    query_genre_tags = movie_genres[query_movie_id].split(" ")
    remove_elements = []
    for all_sorted_detail in all_sorted_details:
        flag = 0
        for query_genre_tag in query_genre_tags:
            if query_genre_tag in all_sorted_detail[3]:
                flag = 1
                break
        if flag == 0:
            remove_elements.append(all_sorted_detail)
    for remove_element in remove_elements:
        all_sorted_details.remove(remove_element)
    print("ChatBot: I found these similar movies:")
    for x in range(0,min(5, len(all_sorted_details))):
        print(x + 1, " ",all_sorted_details[x][2])
    return all_sorted_details, query_movie_id

In [12]:
def chat_bot_movie(user_input):
        all_sorted_details, query_movie_id = sort_found_genre(user_input)
        if query_movie_id == 0:
            print("ChartBot: There is no such movie I know.")
        elif len(all_sorted_details) == 0:
            print("ChatBot: I couldn't find any similar movie.")

In [13]:
initial_intent_data = {"intents": [
    {"tag": "greeting",
     "patterns": ["Hi", "Hey", "Is anyone there?", "Hello", "Hay"],
     "responses": ["Hello", "Hi", "Hi there"]
    },
    {"tag": "movie_chat_bot",
     "patterns": ["find me a movie similar to", "what are the movies I can watch if I have seen", "I have seen find me something similar", "I had seen , tell me what to watch"],
     "responses": []
    },
    {"tag": "restaurant_chat_pot",
     "patterns": ["french dutch european vegetarian friendly", "gluten free options mediterranean international vegan", "contemporary asian indonesian japanese seafood", "fast food american bar central pub", "cafe british healthy indian tibetan nepali", "italian barbecue steakhouse latin argentinean", "south grill delicatessen pizza thai soups street diner","lebanese middle eastern israeli new zealand chinese belgian", "sushi spanish korean turkish vietnamese irish german", "halal gastropub swiss scandinavian fusion", "arabic balti moroccan tunisian persian wine portuguese", "mexican australian greek caribbean african ethiopian", "brew southwestern singaporean malaysian minority", "peruvian taiwanese hawaiian jamaican", "kosher brazilian pakistani swedish norwegian", "afghani colombian ecuadorean austrian danish romanian","cajun creole georgian egyptian cuban russian", "czech armenian venezuelan bangladeshi scottish azerbaijani", "hungarian filipino croatian polish yunnan cambodian", "chilean mongolian uzbek xinjiang albanian ukrainian", "sri lankan caucasian latvian salvadoran guatemalan", "native canadian slovenian polynesian puerto", "rican welsh burmese fujian", "what are the restaurant I can go to if I like", "find me a restaurant with their speciality in ", "where can I go out for lunch dinner breakfast brunch"],
     "responses": []
    },
    {"tag": "goodbye",
     "patterns": ["Bye", "See you later", "Goodbye"],
     "responses": ["See you later", "Have a nice day", "Bye! Come back again"]
    },
    {"tag": "thanks",
     "patterns": ["Thanks", "Thank you", "That's helpful", "Thanks for the help"],
     "responses": ["Happy to help!", "Any time!", "My pleasure", "You're most welcome!"]
    },
]}

restaurant_intent_data = {"intents": [
    {"tag": "tags_not_found",
     "patterns": ['french', 'dutch', 'european', 'vegetarian', 'friendly', 'gluten', 'free', 'options', 'mediterranean', 'international', 'vegan', 'contemporary', 'asian', 'indonesian', 'japanese', 'seafood', 'fast', 'food', 'american', 'bar', 'central', 'pub', 'cafe', 'british', 'healthy', 'indian', 'tibetan', 'nepali', 'italian', 'barbecue', 'steakhouse', 'latin', 'argentinean', 'south', 'grill', 'delicatessen', 'pizza', 'thai', 'soups', 'street', 'diner', 'lebanese', 'middle', 'eastern', 'israeli', 'new', 'zealand', 'chinese', 'belgian', 'sushi', 'spanish', 'korean', 'turkish', 'vietnamese', 'irish', 'german', 'halal', 'gastropub', 'swiss', 'scandinavian', 'fusion', 'arabic', 'balti', 'moroccan', 'tunisian', 'persian', 'wine', 'portuguese', 'mexican', 'australian', 'greek', 'caribbean', 'african', 'ethiopian', 'brew', 'southwestern', 'singaporean', 'malaysian', 'minority', 'peruvian', 'taiwanese', 'hawaiian', 'jamaican', 'kosher', 'brazilian', 'pakistani', 'swedish', 'norwegian', 'afghani', 'colombian', 'ecuadorean', 'austrian', 'danish', 'romanian', 'cajun', 'creole', 'georgian', 'egyptian', 'cuban', 'russian', 'czech', 'armenian', 'venezuelan', 'bangladeshi', 'scottish', 'azerbaijani', 'hungarian', 'filipino', 'croatian', 'polish', 'yunnan', 'cambodian', 'chilean', 'mongolian', 'uzbek', 'xinjiang', 'albanian', 'ukrainian', 'sri', 'lankan', 'caucasian', 'latvian', 'salvadoran', 'guatemalan', 'native', 'canadian', 'slovenian', 'polynesian', 'puerto', 'rican', 'welsh', 'burmese', 'fujian'],
     "responses": ["What kind of speciality you want restaurants to have?", "What kind of food would you like to have?", "I can search any perticular type of restaurants"]
    },
    {"tag": "city_not_found",
     "patterns": ['amsterdam', 'athens', 'barcelona', 'berlin', 'bratislava', 'brussels', 'budapest', 'copenhagen', 'dublin', 'edinburgh', 'geneva', 'hamburg', 'helsinki', 'krakow', 'lisbon', 'ljubljana', 'london', 'luxembourg', 'lyon', 'madrid', 'milan', 'munich', 'oporto', 'oslo', 'paris', 'prague', 'rome', 'stockholm', 'vienna', 'warsaw', 'zurich'],
     "responses": ["Tell me the city you want the restaurant to be in", "I need the city info in order to search the restaurants", "In which city you want the restaurants to be found"]
    },
]}

# <H1> Restaurant Recommendation Chatbot</h1>

In [14]:
with open('TA_restaurants_curated.csv', mode='r') as file:
    csvFile = csv.reader(file)
    sent_token = []
    name_rating = []
    city_tags = []
    line_counter = 0
    for lines in csvFile:
        sentence = ""
        col_num = 0
        rating = ""
        name = ""
        for col in lines:
            if col == "":
                sentence = ""
                break
            col_num = col_num + 1
            if col_num == 2:
                name = col
                continue
            if col_num == 3:
                if col.lower() not in city_tags:
                    city_tags.append(col.lower())
            if col_num == 5 or col_num == 1:
                continue
            if col_num == 6:
                rating = col
                break
            if str(col) not in sentence:
                sentence = sentence + str(col) + " "
        if sentence != "":
            sentence = sentence.translate(str.maketrans(' ', ' ', string.punctuation))
            sentence = str(line_counter) + " " + sentence
            line_counter = line_counter + 1
            name_rating.append((float(rating), name))
            sent_token.append(sentence + rating)
            

In [15]:
TfidfVec = TfidfVectorizer(tokenizer=LemNormalize, stop_words='english')
tfidf = TfidfVec.fit_transform(sent_token)

In [16]:
def response(user_query):
   k = TfidfVec.transform([user_query])
   vals = cosine_similarity(k, tfidf)
   index = 0
   val_with_index = []
   for val in vals[0]:
        tuple1 = (val, index)
        val_with_index.append(tuple1)
        index = index + 1
   val_with_index.sort(key=sortFirst, reverse=True)
   return val_with_index

In [17]:
def seperate_tags():
    tags = []
    for sentence in sent_token:
        possible_tags = sentence.lower().split(" ")[1:-1]
        for pt in possible_tags:
            if pt not in tags:
                tags.append(pt)
    return tags
tags = seperate_tags()

In [18]:
def query(user_query):
    tags_in_query = []
    words_in_query = user_query.lower().translate(str.maketrans(string.punctuation, ' '*len(string.punctuation))).split(" ")
    rating_in_query = re.findall(r"[-+]?(?:\d*\.\d+|\d+)", user_query)
    for wiq in words_in_query:
        if wiq in tags:
            tags_in_query.append(wiq)
    sorted_similar_sent = response(user_query)
    found_rest_index = []
    for similar_val in sorted_similar_sent:
        sent = sent_token[similar_val[1]].lower()
        tag_found_count = 0
        for tag_search in tags_in_query:
            if tag_search in sent:
                tag_found_count = tag_found_count + 1
        if tag_found_count == 0:
            found_rest_index.sort()
            return found_rest_index, rating_in_query
        if tag_found_count == len(tags_in_query):
            found_rest_index.append(int(sent.split(" ")[0]))
    found_rest_index.sort()
    return found_rest_index, rating_in_query

In [19]:
def find_best_rests(user_input):
    possible_rest_index, rating_in_query = query(user_input)
    rating_in_query = float(rating_in_query[0]) if len(rating_in_query)>0 else 0
    possible_name_rating_pairs = []
    for pri in possible_rest_index:
        possible_name_rating_pairs.append(name_rating[pri])
    possible_name_rating_pairs.sort(key=sortFirst, reverse=True)
    if len(possible_name_rating_pairs) == 0:
        print("ChatBot: Sorry, cannot find any restaurant of that kind.")
        return
    desired_restauants = []
    for possible_name_rating_pair in possible_name_rating_pairs:
        if possible_name_rating_pair[0] < rating_in_query:
                break
        desired_restauants.append(possible_name_rating_pair[1])
    if len(desired_restauants) == 0:
        print("ChatBot: Sorry, cannot find a restaurant with that rating")
        return
    print("ChatBot: Found " + str(len(desired_restauants)) + " restaurants.")
    for x in range(0,min(5, len(desired_restauants))):
        print(x + 1, " ",desired_restauants[x])
    if len(desired_restauants) > 5:
        print("ChatBot: Do you want to see all?")
        print("User:", end=" ")
        user_input1 = input().lower()
        if user_input1 == "yes":
            for x in range(5,len(desired_restauants)):
                print(x + 1, " ",desired_restauants[x])

In [20]:
def find_city_in_query(query):
    for city_tag in restaurant_intent_data['intents'][1]['patterns']:
        if city_tag in query:
            return True
    return False

def find_tags_present(query):
    for tag in restaurant_intent_data['intents'][0]['patterns']:
        if tag in query:
            return True
    return False

In [21]:
def chat_bot_restaurant(user_input):
        is_city_present = find_city_in_query(user_input)
        if not is_city_present:
            print("ChatBot:", np.random.choice(restaurant_intent_data['intents'][1]['responses']))
            print("User:", end=" ")
            user_input1 = input().lower()
            user_input = user_input + " " + user_input1
            is_city_present = find_city_in_query(user_input)
            if not is_city_present:
                print("ChatBot: Sorry, I cannot search for that city")
                return
        is_tags_present = find_tags_present(user_input)
        find_best_rests(user_input)
        if not is_tags_present:
                print("ChatBot:", np.random.choice(restaurant_intent_data['intents'][0]['responses']) + " Write No if you have already found your restaurant.")
                print("User:", end=" ")
                user_input = user_input + " " + input().lower()
                if user_input.split(" ")[-1] == "no":
                    return
                find_best_rests(user_input)

In [22]:
initial_training_sentences = []
initial_training_labels = []
initial_labels = []
initial_responses = []
def initial_intent_document():
    for intent in initial_intent_data['intents']:
        for pattern in intent['patterns']:
            initial_training_sentences.append(pattern)
            initial_training_labels.append(intent['tag'])
        initial_responses.append(intent['responses'])

        if intent['tag'] not in initial_labels:
            initial_labels.append(intent['tag'])
initial_intent_document()

In [23]:
TfidfVec_initial_intent = TfidfVectorizer(tokenizer=LemNormalize, stop_words='english')
tfidf_initial_intent = TfidfVec_initial_intent.fit_transform(initial_training_sentences)

# Intent Matching

In [24]:
def chat_bot():
    print("Start your conversation with the chat bot")
    while True:
        print("User:", end=" ")
        user_query = input().lower().translate(str.maketrans(string.punctuation, ' '*len(string.punctuation)))
        k = TfidfVec_initial_intent.transform([user_query])
        vals = cosine_similarity(k, tfidf_initial_intent)
        idx = vals.argsort()[0][-1]
        flat = vals.flatten()
        flat.sort()
        req_tfidf = flat[-1]
        if req_tfidf == 0:
            print("ChatBot: Sorry, could not understand you please try again.")
        else:
            if initial_training_labels[idx] == "movie_chat_bot":
                chat_bot_movie(user_query)
            elif initial_training_labels[idx] == "restaurant_chat_pot":
                chat_bot_restaurant(user_query)
            else:
                print("ChatBot:", np.random.choice(initial_responses[initial_labels.index(initial_training_labels[idx])]))
                if initial_training_labels[idx] == "goodbye":
                    break

In [None]:
chat_bot()

Start your conversation with the chat bot
User: Hi
ChatBot: Hi there
User: find me a Vegan restaurant in london
ChatBot: Found 2784 restaurants.
1   R & H cafe gallery
2   Liman Restaurant
3   Holy Smoke
4   The Clink Restaurant
5   Bar 61 Restaurant
ChatBot: Do you want to see all?
User: find me a Vegan restaurant in london with rating 5
User: find me a Vegan restaurant in london with rating 5
ChatBot: Found 191 restaurants.
1   R & H cafe gallery
2   Liman Restaurant
3   Holy Smoke
4   The Clink Restaurant
5   Bar 61 Restaurant
ChatBot: Do you want to see all?
User: yes
6   Taste Of Nawab
7   Pizza Union Aldgate
8   Core by Clare Smyth
9   taNgia
10   Gastronhome
11   Zeret Kitchen
12   The Five Fields
13   Peninsula Restaurant
14   Kibele Restaurant & Bar
15   The Calabash of Culture
16   Trattoria Raffaele
17   Lorne Restaurant
18   Daphne Restaurant
19   Shahi Pakwaan
20   Lentil
21   Pizzetta Pizza
22   Zala grill
23   Rock Star Sushi Bar
24   The Lounge Cafe
25   Saka Maka cafe
