# Loading the data

In [1]:
# Import the json
import json

df = json.load(open('restaurant_details.json'))
indexes = list(df.keys())

# NLP Model

In [2]:
import torch
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification

tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")

inputs = tokenizer("I hate paolo", return_tensors="pt")
with torch.no_grad():
    logits = model(**inputs).logits

predicted_class_id = logits.argmax().item()
model.config.id2label[predicted_class_id]

'NEGATIVE'

# Pipeline

In [3]:
from tqdm import tqdm
import pandas as pd

def pipeline(df):
    
    # Getting the amount of reviews
    total_reviews = []
    for k in range(len(indexes)):
        data = len([x.strip() for x in list(df[indexes[k]]['users_review'].values()) if len(x) >= 1])
        total_reviews.append(data)
    
    # Getting the positive reviews
    res_name, res_loc, res_catagory, reviews, res_rating = [], [], [], [], []
    for k in tqdm(range(len(indexes))):
        res_name.append(df[indexes[k]]['title']+", " + df[indexes[k]]['address'])
        res_loc.append(df[indexes[k]]['location'][0].split("destination=")[1])
        res_catagory.append(df[indexes[k]]['category'])
        data = [x.strip() for x in list(df[indexes[k]]['users_review'].values()) if len(x) >= 1]
        counter = 0
        for i in data:
            inputs = tokenizer(i, return_tensors="pt")
            with torch.no_grad():
                logits = model(**inputs).logits

            predicted_class_id = logits.argmax().item()
            score = model.config.id2label[predicted_class_id]
            if score == 'POSITIVE':
                counter += 1
        reviews.append(counter)
        res_rating.append(df[indexes[k]]['dining_rating'])
        
    # Creating a dataframe with information collected
    #print(len(res_name), len(res_loc), len(res_catagory), len(total_reviews), len(reviews), len(res_rating))
    data_frame = pd.DataFrame({'res_name': res_name, 'res_loc': res_loc, 'res_catagory': res_catagory, 'total_no_reviews': total_reviews, 'positive_reviews': reviews, 'res_rating': res_rating})
    
    # Creating a new rating through the analysis of the NLP model
    data_frame['actual_rating'] = round(((( data_frame['positive_reviews'] / data_frame['total_no_reviews'] ) * 100) * 5 )/100,1)
    
    # Creating a new column with a consice description of the restaurant category
    temp =  [" ".join(x) for x in data_frame['res_catagory'] ]
    data_frame['bag_of_words'] = temp
    data_frame['res_name'] = data_frame['res_name'].apply(lambda x: x.split(',')[0])
    
    # Rounding locations
    data_frame['res_loc'] = data_frame['res_loc'].apply(lambda x: round(float(x.split(",")[0]), 2))
    data_frame['res_loc_x'] = data_frame['res_loc'].apply(lambda x: x.split(",")[0])
    data_frame['res_loc_y'] = data_frame['res_loc'].apply(lambda x: x.split(",")[1])
    
    return data_frame

In [4]:
data_frame = pipeline(df)

 76%|███████▌  | 31/41 [04:14<01:40, 10.04s/it]

In [None]:
data_frame

Unnamed: 0,res_name,res_loc,res_catagory,total_no_reviews,positive_reviews,res_rating,actual_rating,bag_of_words,similarity
0,Royal Fast Food,21.19,"[Chinese, North Indian, Fast Food, Mughlai, Si...",117,28,3.8,1.2,Chinese North Indian Fast Food Mughlai Sichuan,0.622592
1,La Pino'z Pizza,21.17,"[Pizza, Fast Food, Beverages]",13,6,3.6,2.3,Pizza Fast Food Beverages,0.0
2,Shree Khodiyar Kathiyawadi Dhaba,21.19,"[Gujarati, North Indian]",89,48,3.9,2.7,Gujarati North Indian,0.678266
3,El Sueño Bakers & Snacks House,21.14,"[Fast Food, Pizza, Beverages]",3,3,-,5.0,Fast Food Pizza Beverages,0.0
4,Jay Jalaram Thali,21.19,"[Gujarati, North Indian, Chinese]",278,89,3.7,1.6,Gujarati North Indian Chinese,0.790233
5,The Burger Company,21.19,"[Burger, Pizza, Shake]",130,113,3.8,4.3,Burger Pizza Shake,0.0
6,Radhe Dhokla,21.19,"[North Indian, Chinese, Gujarati, Sichuan]",134,75,-,2.8,North Indian Chinese Gujarati Sichuan,1.0
7,Chandan Bhojnalaya,21.2,"[Gujarati, North Indian]",336,148,3.9,2.2,Gujarati North Indian,0.678266
8,Shree Kathiyawadi Khadki,21.19,"[Gujarati, North Indian]",256,141,3.5,2.8,Gujarati North Indian,0.678266
9,Burger King,21.14,"[Burger, Fast Food, Beverages, Desserts, Finge...",264,149,4.1,2.8,Burger Fast Food Beverages Desserts Finger Food,0.0


# Recommendation based on Sentiment Analysis

In [None]:
def analyze_recommendation(data_frame, location, category, rating):
    
    location_x = round(location[0], 2)
    
    
    # Filtering the data
    data_frame = data_frame[(data_frame['res_loc'] == location) & (data_frame['bag_of_words'].str.contains(category)) & (data_frame['actual_rating'] >= rating)]
    
    # Sorting the data
    data_frame = data_frame.sort_values(by='actual_rating', ascending=False)
    
    return data_frame

Unnamed: 0,res_name,res_loc,res_catagory,total_no_reviews,positive_reviews,res_rating,actual_rating,bag_of_words,similarity
0,Royal Fast Food,"21.1891766227,72.8182196617","[Chinese, North Indian, Fast Food, Mughlai, Si...",117,28,3.8,1.2,Chinese North Indian Fast Food Mughlai Sichuan,0.622592
1,La Pino'z Pizza,"21.1660202000,72.8380596000","[Pizza, Fast Food, Beverages]",13,6,3.6,2.3,Pizza Fast Food Beverages,0.0
2,Shree Khodiyar Kathiyawadi Dhaba,"21.1900747451,72.7778721973","[Gujarati, North Indian]",89,48,3.9,2.7,Gujarati North Indian,0.678266
3,El Sueño Bakers & Snacks House,"21.1380005867,72.7743293345","[Fast Food, Pizza, Beverages]",3,3,-,5.0,Fast Food Pizza Beverages,0.0
4,Jay Jalaram Thali,"21.1934825000,72.8225117000","[Gujarati, North Indian, Chinese]",278,89,3.7,1.6,Gujarati North Indian Chinese,0.790233
5,The Burger Company,"21.1923730000,72.7871960000","[Burger, Pizza, Shake]",130,113,3.8,4.3,Burger Pizza Shake,0.0
6,Radhe Dhokla,"21.1857930000,72.8332060000","[North Indian, Chinese, Gujarati, Sichuan]",134,75,-,2.8,North Indian Chinese Gujarati Sichuan,1.0
7,Chandan Bhojnalaya,"21.1979237835,72.8238818049","[Gujarati, North Indian]",336,148,3.9,2.2,Gujarati North Indian,0.678266
8,Shree Kathiyawadi Khadki,"21.1909528553,72.7880441397","[Gujarati, North Indian]",256,141,3.5,2.8,Gujarati North Indian,0.678266
9,Burger King,"21.1442138290,72.7964709699","[Burger, Fast Food, Beverages, Desserts, Finge...",264,149,4.1,2.8,Burger Fast Food Beverages Desserts Finger Food,0.0


# Recommendation based on similarity algorithm

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def recommendation(title, total_result=5, threshold=0.5):
    tfid = TfidfVectorizer()
    tfid_matrix = tfid.fit_transform(data_frame['bag_of_words'])
    tfid.get_feature_names_out()
    
    cosine_sim = cosine_similarity(tfid_matrix, tfid_matrix)
    
    idx = data_frame[data_frame['res_name'] == title].index[0]
    data_frame['similarity'] = cosine_sim[idx]
    sort_final_df = data_frame.sort_values(by='similarity', ascending=False)[1:total_result+1]
    movies = sort_final_df['res_name']
    if len(movies) != 0:
        print('Similar restraunt name(s) list:')
        for i, movie in enumerate(movies):
            print('{}. {}'.format(i+1, movie))
        print()
    else:
        print('Similar restraunt name(s) list:')
        print('-\n')

In [None]:
recommendation('Radhe Dhokla')

Similar restraunt name(s) list:
1. Jay Jalaram Thali
2. Trishiv Chinese Corner
3. Tulsi Restaurant
4. Kasmiri Restaurant
5. Harikrushna Restaurant

