# Loading the data

In [1]:
# Import the json
import json

df = json.load(open('restaurant_details.json'))
indexes = list(df.keys())

In [2]:
df

{'https://www.zomato.com/surat/royal-fast-food-new-textile-market/order': {'imgs': ['https://b.zmtcdn.com/data/pictures/chains/3/18974473/f0836fde6e4064e73c632c5f638c1c25.jpg?output-format=webp&fit=around|771.75:416.25&crop=771.75:416.25;*,*',
   'https://b.zmtcdn.com/data/reviews_photos/b5e/a9820c510131a1fd83186075ba8e5b5e_1602186524.jpg?output-format=webp&fit=around|300:273&crop=300:273;*,*',
   'https://b.zmtcdn.com/data/reviews_photos/3f1/d45be4c0c14ab45f7d21bf67a28a93f1_1577821996.jpg?output-format=webp&fit=around|300:273&crop=300:273;*,*'],
  'title': 'Royal Fast Food',
  'dining_rating': '3.8',
  'dining_total_review': '132',
  'delivery_rating': '3.4',
  'delivery_total_review': '8,292',
  'category': ['Chinese', 'North Indian', 'Fast Food', 'Mughlai', 'Sichuan'],
  'address': 'New Textile Market, Surat',
  'time': '8am – 9pm (Today)',
  'location': ['https://www.google.com/maps/dir/?api=1&destination=21.1891766227,72.8182196617'],
  'users_review': {'Lizarani': '',
   'ILYAS S

# NLP Model

In [3]:
import torch
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification

tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")

inputs = tokenizer("I hate paolo", return_tensors="pt")
with torch.no_grad():
    logits = model(**inputs).logits

predicted_class_id = logits.argmax().item()
model.config.id2label[predicted_class_id]

  from .autonotebook import tqdm as notebook_tqdm


'NEGATIVE'

# Pipeline

In [4]:
from tqdm import tqdm
import pandas as pd

def pipeline(df):
    
    # Getting the amount of reviews
    total_reviews = []
    for k in range(len(indexes)):
        data = len([x.strip() for x in list(df[indexes[k]]['users_review'].values()) if len(x) >= 1])
        total_reviews.append(data)
    
    # Getting the positive reviews
    res_name, res_loc, res_catagory, reviews, res_rating = [], [], [], [], []
    for k in tqdm(range(len(indexes))):
        res_name.append(df[indexes[k]]['title']+", " + df[indexes[k]]['address'])
        res_loc.append(df[indexes[k]]['location'][0].split("destination=")[1])
        res_catagory.append(df[indexes[k]]['category'])
        data = [x.strip() for x in list(df[indexes[k]]['users_review'].values()) if len(x) >= 1]
        counter = 0
        for i in data:
            inputs = tokenizer(i, return_tensors="pt")
            with torch.no_grad():
                logits = model(**inputs).logits

            predicted_class_id = logits.argmax().item()
            score = model.config.id2label[predicted_class_id]
            if score == 'POSITIVE':
                counter += 1
        reviews.append(counter)
        res_rating.append(df[indexes[k]]['dining_rating'])
        
    # Creating a dataframe with information collected
    #print(len(res_name), len(res_loc), len(res_catagory), len(total_reviews), len(reviews), len(res_rating))
    data_frame = pd.DataFrame({'res_name': res_name, 'res_loc': res_loc, 'res_catagory': res_catagory, 'total_no_reviews': total_reviews, 'positive_reviews': reviews, 'res_rating': res_rating})
    
    # Creating a new rating through the analysis of the NLP model
    data_frame['actual_rating'] = round(((( data_frame['positive_reviews'] / data_frame['total_no_reviews'] ) * 100) * 5 )/100,1)
    
    # Creating a new column with a consice description of the restaurant category
    temp =  [" ".join(x) for x in data_frame['res_catagory'] ]
    data_frame['bag_of_words'] = temp
    data_frame['res_name'] = data_frame['res_name'].apply(lambda x: x.split(',')[0])

    # Creating a column for time
    data_frame['time'] = [df[x]['time'] for x in indexes]

    # Creating a column for the location link
    data_frame['location_link'] = [df[x]['location'] for x in indexes]

    # Creating a column for the zomato link
    def get_url(restaurant_name):
        for url, info in df.items():
            if info['title'] == restaurant_name:
                return url
    data_frame['zomato_link'] = data_frame['res_name'].apply(get_url)

    # Creating a column for the phone number
    data_frame['delivery_rating'] = [df[x]['delivery_rating'] for x in indexes]

    # Creating a column for the first image
    def get_images(url):
        return df[url]['imgs']
    data_frame['imagess'] = data_frame['zomato_link'].apply(get_images)

    # Creating a column for the first image
    data_frame['first_image'] = data_frame['imagess'][0][0]

    # Creating a column for the second image
    data_frame['second_image'] = data_frame['imagess'][0][1]
    
    return data_frame

In [5]:
data_frame = pipeline(df)

100%|██████████| 41/41 [05:08<00:00,  7.52s/it]


In [5]:
data_frame

Unnamed: 0.1,Unnamed: 0,res_name,res_loc,res_catagory,total_no_reviews,positive_reviews,res_rating,actual_rating,bag_of_words,time,location_link,zomato_link,delivery_rating,imagess,first_image,second_image,similarity
0,0,Royal Fast Food,"21.1891766227,72.8182196617","['Chinese', 'North Indian', 'Fast Food', 'Mugh...",117,28,3.8,1.2,Chinese North Indian Fast Food Mughlai Sichuan,8am – 9pm (Today),['https://www.google.com/maps/dir/?api=1&desti...,https://www.zomato.com/surat/royal-fast-food-n...,3.4,['https://b.zmtcdn.com/data/pictures/chains/3/...,https://b.zmtcdn.com/data/pictures/chains/3/18...,https://b.zmtcdn.com/data/reviews_photos/b5e/a...,0.285564
1,1,La Pino'z Pizza,"21.1660202000,72.8380596000","['Pizza', 'Fast Food', 'Beverages']",13,6,3.6,2.3,Pizza Fast Food Beverages,11am – 11pm (Today),['https://www.google.com/maps/dir/?api=1&desti...,https://www.zomato.com/surat/la-pinoz-pizza-ud...,4.1,[],https://b.zmtcdn.com/data/pictures/chains/3/18...,https://b.zmtcdn.com/data/reviews_photos/b5e/a...,0.0
2,2,Shree Khodiyar Kathiyawadi Dhaba,"21.1900747451,72.7778721973","['Gujarati', 'North Indian']",89,48,3.9,2.7,Gujarati North Indian,11am – 11pm (Today),['https://www.google.com/maps/dir/?api=1&desti...,https://www.zomato.com/surat/shree-khodiyar-ka...,3.9,[],https://b.zmtcdn.com/data/pictures/8/19037638/...,https://b.zmtcdn.com/data/pictures/chains/8/19...,1.0
3,4,Jay Jalaram Thali,"21.1934825000,72.8225117000","['Gujarati', 'North Indian', 'Chinese']",278,89,3.7,1.6,Gujarati North Indian Chinese,11am – 11pm (Today),['https://www.google.com/maps/dir/?api=1&desti...,https://www.zomato.com/surat/jay-jalaram-thali...,3.3,['https://b.zmtcdn.com/data/pictures/1/1920147...,https://b.zmtcdn.com/data/pictures/chains/3/18...,https://b.zmtcdn.com/data/reviews_photos/b5e/a...,0.858233
4,5,The Burger Company,"21.1923730000,72.7871960000","['Burger', 'Pizza', 'Shake']",130,113,3.8,4.3,Burger Pizza Shake,11am – 4am (Today),['https://www.google.com/maps/dir/?api=1&desti...,https://www.zomato.com/surat/the-burger-compan...,3.6,['https://b.zmtcdn.com/data/pictures/4/2019753...,https://b.zmtcdn.com/data/pictures/4/20197534/...,https://b.zmtcdn.com/data/pictures/4/20197534/...,0.0
5,6,Radhe Dhokla,"21.1857930000,72.8332060000","['North Indian', 'Chinese', 'Gujarati', 'Sichu...",134,75,-,2.8,North Indian Chinese Gujarati Sichuan,8am – 10pm (Today),['https://www.google.com/maps/dir/?api=1&desti...,https://www.zomato.com/surat/radhe-dhokla-rust...,4.2,['https://b.zmtcdn.com/data/pictures/chains/6/...,https://b.zmtcdn.com/data/pictures/chains/6/18...,https://b.zmtcdn.com/data/pictures/chains/6/18...,0.677001
6,7,Chandan Bhojnalaya,"21.1979237835,72.8238818049","['Gujarati', 'North Indian']",336,148,3.9,2.2,Gujarati North Indian,"10am – 3pm, 6pm – 9:30pm (Today)",['https://www.google.com/maps/dir/?api=1&desti...,https://www.zomato.com/surat/chandan-bhojnalay...,3.8,['https://b.zmtcdn.com/data/pictures/chains/7/...,https://b.zmtcdn.com/data/pictures/chains/7/38...,https://b.zmtcdn.com/data/reviews_photos/4d4/e...,1.0
7,8,Shree Kathiyawadi Khadki,"21.1909528553,72.7880441397","['Gujarati', 'North Indian']",256,141,3.5,2.8,Gujarati North Indian,"11am – 3:30pm, 7pm – 11pm (Today)",['https://www.google.com/maps/dir/?api=1&desti...,https://www.zomato.com/surat/shree-kathiyawadi...,4.1,['https://b.zmtcdn.com/data/pictures/9/1979255...,https://b.zmtcdn.com/data/pictures/9/19792559/...,https://b.zmtcdn.com/data/pictures/9/19792559/...,1.0
8,9,Burger King,"21.1442138290,72.7964709699","['Burger', 'Fast Food', 'Beverages', 'Desserts...",264,149,4.1,2.8,Burger Fast Food Beverages Desserts Finger Food,8am – 2am (Today),['https://www.google.com/maps/dir/?api=1&desti...,https://www.zomato.com/surat/burger-king-vesu/...,4.1,['https://b.zmtcdn.com/data/pictures/9/3800929...,https://b.zmtcdn.com/data/pictures/chains/3/18...,https://b.zmtcdn.com/data/reviews_photos/b5e/a...,0.0
9,10,Kailash Sweets & Snacks,"21.1871534046,72.8139157221","['Mithai', 'Street Food', 'Fast Food', 'Bevera...",267,171,-,3.2,Mithai Street Food Fast Food Beverages,7:30am – 10:30pm (Today),['https://www.google.com/maps/dir/?api=1&desti...,https://www.zomato.com/surat/kailash-sweets-sn...,4.3,[],https://b.zmtcdn.com/data/pictures/chains/3/18...,https://b.zmtcdn.com/data/reviews_photos/b5e/a...,0.0


# Recommendation based on Sentiment Analysis

In [14]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def analyze_recommendation(data_frame, category):
    message = ""
    ideal_restaurants = data_frame.copy()
    
    if len(ideal_restaurants) == 0:
        message = "No restaurants found near you"
    
    # Filtering the data by the category
    category_str = ' '.join(category)
    documents = [category_str] + ideal_restaurants['bag_of_words'].tolist()
    vectorizer = TfidfVectorizer().fit(documents)

    # Transform the user and restaurant 'documents' to TF-IDF vectors
    user_vector = vectorizer.transform(category)
    restaurant_vectors = vectorizer.transform(ideal_restaurants['bag_of_words'])
    
    # Saving the files user_vector and restaurant_vectors
    user_vector1 = pd.DataFrame(user_vector.toarray())
    restaurant_vectors1 = pd.DataFrame(restaurant_vectors.toarray())
    user_vector1.to_csv('user_vector.csv')
    restaurant_vectors1.to_csv('restaurant_vectors.csv')
    
    
    similarities = cosine_similarity(user_vector, restaurant_vectors)
    
    # Sorting the data
    ideal_restaurants['similarity'] = similarities.mean(axis=0)
    ideal_restaurants = ideal_restaurants.sort_values(by='similarity', ascending=False)
    
    # Removing those not similar
    top_iqr = ideal_restaurants['similarity'].quantile(0.60)
    ideal_restaurants = ideal_restaurants[ideal_restaurants['similarity'] > top_iqr]
    
    # Using ranking to get the top 5 restaurants
    mean = ideal_restaurants['actual_rating'].mean()
    ideal_restaurants = ideal_restaurants[ideal_restaurants['actual_rating'] > mean]
    
    # Sorting by rating
    ideal_restaurants = ideal_restaurants.sort_values(by='actual_rating', ascending=False)
    
    # Getting the top 5
    ideal_restaurants = ideal_restaurants.head(10)
    
    return ideal_restaurants, message

In [15]:
aux, message = analyze_recommendation(data_frame, ['north','indian'])

In [16]:
aux

Unnamed: 0.1,Unnamed: 0,res_name,res_loc,res_catagory,total_no_reviews,positive_reviews,res_rating,actual_rating,bag_of_words,time,location_link,zomato_link,delivery_rating,imagess,first_image,second_image,similarity
18,19,Harikrushna Restaurant,"21.2191308879,72.8333315626","['Gujarati', 'North Indian']",48,35,4.0,3.6,Gujarati North Indian,Opens at 6:30pm,['https://www.google.com/maps/dir/?api=1&desti...,https://www.zomato.com/surat/harikrushna-resta...,4.2,[],https://b.zmtcdn.com/data/pictures/8/18780908/...,https://b.zmtcdn.com/data/pictures/chains/8/18...,0.512313
7,8,Shree Kathiyawadi Khadki,"21.1909528553,72.7880441397","['Gujarati', 'North Indian']",256,141,3.5,2.8,Gujarati North Indian,"11am – 3:30pm, 7pm – 11pm (Today)",['https://www.google.com/maps/dir/?api=1&desti...,https://www.zomato.com/surat/shree-kathiyawadi...,4.1,['https://b.zmtcdn.com/data/pictures/9/1979255...,https://b.zmtcdn.com/data/pictures/9/19792559/...,https://b.zmtcdn.com/data/pictures/9/19792559/...,0.512313
5,6,Radhe Dhokla,"21.1857930000,72.8332060000","['North Indian', 'Chinese', 'Gujarati', 'Sichu...",134,75,-,2.8,North Indian Chinese Gujarati Sichuan,8am – 10pm (Today),['https://www.google.com/maps/dir/?api=1&desti...,https://www.zomato.com/surat/radhe-dhokla-rust...,4.2,['https://b.zmtcdn.com/data/pictures/chains/6/...,https://b.zmtcdn.com/data/pictures/chains/6/18...,https://b.zmtcdn.com/data/pictures/chains/6/18...,0.344785
20,21,Janta Thali Restaurant,"21.1694212395,72.7793822810","['North Indian', 'Chinese', 'Fast Food', 'Biry...",293,165,2.8,2.8,North Indian Chinese Fast Food Biryani,10:30am – 12midnight (Today),['https://www.google.com/maps/dir/?api=1&desti...,https://www.zomato.com/surat/janta-thali-resta...,3.5,[],https://b.zmtcdn.com/data/pictures/chains/3/18...,https://b.zmtcdn.com/data/reviews_photos/b5e/a...,0.32578
2,2,Shree Khodiyar Kathiyawadi Dhaba,"21.1900747451,72.7778721973","['Gujarati', 'North Indian']",89,48,3.9,2.7,Gujarati North Indian,11am – 11pm (Today),['https://www.google.com/maps/dir/?api=1&desti...,https://www.zomato.com/surat/shree-khodiyar-ka...,3.9,[],https://b.zmtcdn.com/data/pictures/8/19037638/...,https://b.zmtcdn.com/data/pictures/chains/8/19...,0.512313
16,17,Om Restaurant,"21.1268571255,72.8545334190","['Chinese', 'North Indian', 'South Indian']",232,125,3.7,2.7,Chinese North Indian South Indian,"10am – 3:30pm, 5:30pm – 10pm (Today)",['https://www.google.com/maps/dir/?api=1&desti...,https://www.zomato.com/surat/om-restaurant-bhe...,3.8,[],https://b.zmtcdn.com/data/pictures/chains/3/18...,https://b.zmtcdn.com/data/reviews_photos/b5e/a...,0.483725
22,23,J D Restaurant,"21.2103084048,72.8513774648","['North Indian', 'South Indian', 'Gujarati', '...",207,106,4.3,2.6,North Indian South Indian Gujarati Chinese,11am – 10:39pm (Today),['https://www.google.com/maps/dir/?api=1&desti...,https://www.zomato.com/surat/j-d-restaurant-va...,4.2,[],https://b.zmtcdn.com/data/pictures/chains/3/18...,https://b.zmtcdn.com/data/reviews_photos/b5e/a...,0.443154
14,15,Dayro - Multi Cuisine Restaurant,"21.1432097337,72.7927661687","['Gujarati', 'North Indian', 'Chinese', 'South...",268,142,3.2,2.6,Gujarati North Indian Chinese South Indian Fas...,11am – 11pm (Today),['https://www.google.com/maps/dir/?api=1&desti...,https://www.zomato.com/surat/dayro-multi-cuisi...,3.9,['https://b.zmtcdn.com/data/reviews_photos/aed...,https://b.zmtcdn.com/data/pictures/chains/3/18...,https://b.zmtcdn.com/data/reviews_photos/b5e/a...,0.374082
10,11,Dilliwale Pahelvanjee's Cholle Bhature,"21.1910378838,72.8416470811",['North Indian'],308,154,4.5,2.5,North Indian,10am – 6:30pm (Today),['https://www.google.com/maps/dir/?api=1&desti...,https://www.zomato.com/surat/dilliwale-pahelva...,4.3,[],https://b.zmtcdn.com/data/pictures/chains/3/18...,https://b.zmtcdn.com/data/reviews_photos/b5e/a...,0.706855
19,20,Tulsi Restaurant,"21.1922776695,72.8541699797","['North Indian', 'Chinese', 'Biryani', 'Sichuan']",245,121,4.3,2.5,North Indian Chinese Biryani Sichuan,"10:30am – 4pm, 6pm – 11pm (Today)",['https://www.google.com/maps/dir/?api=1&desti...,https://www.zomato.com/surat/tulsi-restaurant-...,4.0,[],https://b.zmtcdn.com/data/pictures/8/18780908/...,https://b.zmtcdn.com/data/pictures/chains/8/18...,0.313615


# Recommendation based on similarity algorithm

In [118]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def recommendation(title, total_result=5, threshold=0.5):
    tfid = TfidfVectorizer()
    tfid_matrix = tfid.fit_transform(data_frame['bag_of_words'])
    tfid.get_feature_names_out()
    
    cosine_sim = cosine_similarity(tfid_matrix, tfid_matrix)
    
    #Saving the cosine similarity matrix
    pd.DataFrame(cosine_sim).to_csv('cosine_sim.csv')
    
    idx = data_frame[data_frame['res_name'] == title].index[0]
    print(idx)
    
    #Getting only the row of the restuarant
    cosine_sim_row = cosine_sim[idx]
    print(cosine_sim_row)
    
    data_frame['similarity'] = cosine_sim[idx]
    data_frame.to_csv('similarsss.csv')
    sort_final_df = data_frame.sort_values(by='similarity', ascending=False)[1:total_result+1]
    movies = sort_final_df['res_name']
    if len(movies) != 0:
        print('Similar restraunt name(s) list:')
        for i, movie in enumerate(movies):
            print('{}. {}'.format(i+1, movie))
        print()
    else:
        print('Similar restraunt name(s) list:')
        print('-\n')

In [120]:
recommendation('Radhe Dhokla')

6
[0.28556424 0.         1.         0.85823305 0.         0.67700056
 1.         1.         0.         0.         0.73276793 0.
 0.35971442 0.54486973 0.61893237 0.68047863 0.50207143 0.
 1.         0.33063644 0.34383405 0.30539087 0.73080618 0.3300528
 0.         0.37131166 0.39876413 1.         0.31476743 0.17670671
 0.         0.         0.         0.22827289 0.         0.
 0.         0.30019183 0.19437386 0.        ]
Similar restraunt name(s) list:
1. Harikrushna Restaurant
2. Chandan Bhojnalaya
3. Shree Kathiyawadi Khadki
4. Shree Khodiyar Kathiyawadi Dhaba
5. Jay Jalaram Thali



# GPT description

In [None]:
import openai

# Set up your API key from OpenAI
openai.api_key = 'sk-proj-r7COa68Y09pou972FtJYT3BlbkFJaLly4hpkASGx9pMLPnsW'

# Information about the restaurant
def get_restaurant_info(restaurant_name):
    info = ""
    # Adding bag of words
    info += "The restaurant is a " + data_frame[data_frame['res_name'] == restaurant_name]['bag_of_words'].values[0] + ". "
    # Adding the location
    info += "The restaurant is located at " + str(data_frame[data_frame['res_name'] == restaurant_name]['res_loc'].values[0]) + ". "
    # Adding the rating
    info += "The restaurant has a rating of " + str(data_frame[data_frame['res_name'] == restaurant_name]['actual_rating'].values[0]) + ". "
    # Adding the total number of reviews
    info += "It has a total of " + str(data_frame[data_frame['res_name'] == restaurant_name]['total_no_reviews'].values[0]) + " reviews. "
    # Adding the rating of delivery
    info += "The restaurant has a delivery rating of " + str(data_frame[data_frame['res_name'] == restaurant_name]['delivery_rating'].values[0]) + ". "
    
    return info
restaurant_info = get_restaurant_info('Radhe Dhokla')

# Using ChatGPT to generate a concise restaurant description
response = openai.ChatCompletion.create(
    model="gpt-3.5",
    messages=[
        {"role": "system", "content": "You are a restaurant describer specialist."},
        {"role": "user", "content": f"Here is some information about a restaurant: {restaurant_info} Based on this, can you give me a short description, covering the most important apsects of the restaurant in about 30 words?"}
    ],
    max_tokens=90,  # Limit the response to about 30 words
    stop=["."]
)

# Print the generated text
print(response['choices'][0]['message']['content'])

APIRemovedInV1: 

You tried to access openai.ChatCompletion, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.

You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 

Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`

A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742


In [None]:
import os
OPENAI_API_KEY ='sk-proj-wXOuDQab7Oawga9lFrF1T3BlbkFJpNZbr6KcdpH08xNwTfIp'

In [None]:
from openai import OpenAI
client = OpenAI()