In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# preprocessing of data (to be retrieved from postgresql restapi in prod)
data = pd.read_csv('places-data.csv')

data['mood'] = data['mood'].apply(lambda x: x.lower().split(', '))
data['mealtime'] = data['mealtime'].apply(lambda x: x.lower().split(', '))
data['cuisine'] = data['cuisine'].apply(lambda x: x.lower().split(', '))

# receive user input (to be received thru gRPC in prod)
user_mealtime = 'brunch'
user_mood = 'comfort'
user_cuisine_dont_wants = ['Italian', 'Chinese']
user_budget = 20

user_cuisine_dont_wants_lower = [x.lower() for x in user_cuisine_dont_wants]

# initial filtering of data based on user input
filtered_restaurants = data[
    (data['budget'] <= user_budget) &
    (~data['cuisine'].apply(lambda x: any(item in user_cuisine_dont_wants_lower for item in x))) &
    (data['mood'].apply(lambda arr: user_mood.lower() in arr)) &
    (data['mealtime'].apply(lambda arr: user_mealtime.lower() in arr))
]

shuffled_restaurants = filtered_restaurants.sample(frac=1)

# Output top 3 restaurant names as recommendations
recommendations = shuffled_restaurants.head(5)

ret = recommendations.to_dict(orient='records')
print(ret)

# create feature matrix for restaurants
# one hot encoding for mood and mealtime

# calculate similarity score using cosine similarity

# sort restuarants based on similarity score and shuffle for randomness

# output top 3 (to send thru grpc in prod) 


[{'name': 'B For Bagel', 'budget': 8, 'mood': ['comfort', 'healthy'], 'cuisine': ['café'], 'mealtime': ['brunch', 'lunch'], 'rating': 4}, {'name': 'Group Therapy Coffee', 'budget': 15, 'mood': ['comfort'], 'cuisine': ['café'], 'mealtime': ['brunch', 'lunch', 'dinner'], 'rating': 4}, {'name': 'Botany Café', 'budget': 20, 'mood': ['comfort'], 'cuisine': ['café'], 'mealtime': ['brunch', 'lunch', 'dinner'], 'rating': 4}, {'name': 'SYIP', 'budget': 20, 'mood': ['comfort'], 'cuisine': ['café'], 'mealtime': ['brunch', 'lunch', 'dinner'], 'rating': 4}, {'name': 'Two Bakers', 'budget': 15, 'mood': ['comfort'], 'cuisine': ['café', 'japanese'], 'mealtime': ['brunch', 'snack'], 'rating': 3}]


In [7]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def process_text(text):
    # replace multiple spaces with one
    text = str(text)
    text = ' '.join(text.split())
    # lowercase
    text = text.lower()

    return text

def index_from_title(df,title):
    return df[df['name']==title].index.values[0]


# function that returns the title of the movie from its index
def title_from_index(df,index):
    return df[df.index==index].name.values[0]


# generating recommendations for given title
def recommendations(name, df,cosine_similarity_matrix,number_of_recommendations):
    index = index_from_title(df,name)
    similarity_scores = list(enumerate(cosine_similarity_matrix[index]))
    similarity_scores_sorted = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    recommendations_indices = [t[0] for t in similarity_scores_sorted[1:]]
    recommendations_indices = list(dict.fromkeys(recommendations_indices))[:number_of_recommendations]
    print(recommendations_indices)
    # recommendations_indices = [t[0] for t in similarity_scores_sorted[1:(number_of_recommendations+1)]]

    # return df['name'].iloc[recommendations_indices]
    return df.iloc[recommendations_indices][['name', 'review']].drop_duplicates(subset='name').head(5)

user = "Porcelain Cafe"

df = pd.read_csv('reviews.csv', encoding='unicode_escape')
print(df)
selected = df[df["name"] == user]["review"]

df['review'] = df.apply(lambda x: process_text(x.review),axis=1)
tf_idf = TfidfVectorizer(stop_words='english')

tf_idf_matrix = tf_idf.fit_transform(df['review']);

cosine_similarity_matrix = cosine_similarity(tf_idf_matrix, tf_idf_matrix)


# Output the top similar restaurants
recommendations(user, df, cosine_similarity_matrix, 20)

                      name                                             review
0     Oven & Fried Chicken  Tried the original and spicy garlic chicken  S...
1     Oven & Fried Chicken  This place gets busy, but it's for a good reas...
2     Oven & Fried Chicken  If you are on the hunt for the best fried chic...
3     Oven & Fried Chicken  The age old golden combination of beer and fri...
4     Oven & Fried Chicken  Friday night super crowded Roasted chicken, or...
...                    ...                                                ...
3290         O Happi Place  Waffles with ice cream, dark chocolate, and bo...
3291         O Happi Place  Hidden gem in Everton Park. Clean & cosy envir...
3292         O Happi Place  Small and cozy ice cream place. Love the inter...
3293         O Happi Place  Found this random ice cream place on google ma...
3294         O Happi Place  This is one of my favorite gelato cafe with it...

[3295 rows x 2 columns]
[3031, 3028, 938, 1596, 3029, 1670, 235

Unnamed: 0,name,review
3031,Porcelain Cafe,i have been doing my facial treatments with em...
938,Symmetry,i just finished my braces treatment with dr eu...
1596,Genki Sushi,"long q on sun, small portion of food, good var..."
2351,Breakfast Club,"had their rosti awhile ago, and tried their ch..."
1216,The Glasshouse,service was pretty chaotic. they served only h...


In [19]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

dfplace = pd.read_csv('places-data.csv', encoding='utf-8')
dfreview = pd.read_csv('reviews.csv', encoding='unicode_escape')

class Model():
    def __init__(self):
        self.data = dfplace
        self.data['mood'] = self.data['mood'].apply(lambda x: x.lower().split(', '))
        self.data['mealtime'] = self.data['mealtime'].apply(lambda x: x.lower().split(', '))
        self.data['cuisine'] = self.data['cuisine'].apply(lambda x: x.lower().split(', '))
        self.reviews = dfreview


    def generate_recommendations(self, mealtime, mood, cuisine_dont_wants, budget):
        # receive user input (to be received thru gRPC in prod)
        print("test")
        user_mealtime = mealtime
        user_mood = mood
        user_cuisine_dont_wants = cuisine_dont_wants
        user_budget = budget

        user_cuisine_dont_wants_lower = [x.lower() for x in user_cuisine_dont_wants]

        # initial filtering of data based on user input
        filtered_restaurants = self.data[
            (self.data['budget'] <= user_budget) &
            (~self.data['cuisine'].apply(lambda x: any(item in user_cuisine_dont_wants_lower for item in x))) &
            (self.data['mood'].apply(lambda arr: user_mood.lower() in arr)) &
            (self.data['mealtime'].apply(lambda arr: user_mealtime.lower() in arr))
        ]

        shuffled_restaurants = filtered_restaurants.sample(frac=1)

        # Output top 3 restaurant names as recommendations
        recommendations = shuffled_restaurants.head(5)
        

        return recommendations.to_dict(orient="records")
    
    def process_text(self,text):
        # replace multiple spaces with one
        text = str(text)
        text = ' '.join(text.split())
        # lowercase
        text = text.lower()

        return text

    def index_from_title(self,df,title):
        return df[df['name']==title].index.values[0]


    # function that returns the title of the movie from its index
    def title_from_index(self,df,index):
        return df[df.index==index].name.values[0]
    
   
    def recommendations(self, name, df,cosine_similarity_matrix,number_of_recommendations):
        index = self.index_from_title(df,name)
        similarity_scores = list(enumerate(cosine_similarity_matrix[index]))
        similarity_scores_sorted = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
        recommendations_indices = [t[0] for t in similarity_scores_sorted[1:]]
        recommendations_indices = list(dict.fromkeys(recommendations_indices))[:number_of_recommendations]
        # recommendations_indices = [t[0] for t in similarity_scores_sorted[1:(number_of_recommendations+1)]]
        # return df['name'].iloc[recommendations_indices]

        rec_df = df.iloc[recommendations_indices][['name']].drop_duplicates(subset='name').head(5)
        rec = rec_df.to_dict(orient="records")

        for place in rec:
            entry = self.data[self.data['name'] == place['name']]
            place['budget'] = entry['budget']
            place['cuisine'] = entry['cuisine']
            place['rating'] = entry['rating']

        return rec

    def generate_personalized(self, user_choice):
        self.reviews['review'] = self.reviews.apply(lambda x: self.process_text(x.review),axis=1)
        tf_idf = TfidfVectorizer(stop_words='english')

        tf_idf_matrix = tf_idf.fit_transform(self.reviews['review'])

        cosine_similarity_matrix = cosine_similarity(tf_idf_matrix, tf_idf_matrix)

        # Output the top similar restaurants
        recommendations = self.recommendations(user_choice, self.reviews, cosine_similarity_matrix, 20)

        return recommendations
    
model = Model()
rec = model.generate_personalized("Porcelain Cafe")
print(type(rec), len(rec))

<class 'list'> 5
