In [1]:
import json 
import os
import csv
import json
import sqlite3
from pathlib import Path
from typing import Dict, Text
from collections import defaultdict
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.layers import Activation
from tensorflow.keras import backend as K
from sklearn.model_selection import train_test_split
import tensorflow.keras as keras
import tensorflow as tf
from sklearn.decomposition import PCA
from tensorflow.keras.models import model_from_json
import pickle
%matplotlib inline
np.random.seed=2022

## Load Model

In [3]:
json_file = open('/kaggle/input/neuralembeddingdata/NeuralEmbeddingModel.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
# load weights into new model
model.load_weights('/kaggle/input/neuralembeddingdata/NeuralEmbeddingModel.h5')
print("Loaded model from disk")

Loaded model from disk


In [4]:
#Connect to database
def connect(path):
    con=sqlite3.connect(path)
    return con

#Query To create table
def createQuery(query,connection):
    try:
        connection.cursor().execute(query)
        print('Table Created')
    except Exception as e:
        print('Exception',e)
        


#Function to perform get queries
def Query(query,connection):
    try:
        result=connection.cursor().execute(query)
        print('Query Executed')
        return result.fetchall()
    except Exception as e:
        print('Exception',e)

        
#Function to convert query(which comes in as a list of tuples to DataFrame)
def toDf(res,cols):
    try:
        return pd.DataFrame(res,columns=cols)
    except Exception as e:
        print(f'The DataFrame cannot be created because of this error: {e}')
        
### Get query Transformed ###

def getQuery(query,connection,cols):
    try:
        result=connection.cursor().execute(query)
        print('Query Executed')
        return toDf(result.fetchall(),cols)
    except Exception as e:
        print('Exception',e)

In [5]:
#Connection to database
connection=connect('../input/yelpdb/database.db')

In [6]:
# We perform query reviews table to get all the users that rated a buisness
buisnessJoined=getQuery('select business_id, user_id, stars from reviews',connection,['business_id','user_id','stars'])

Query Executed


In [7]:
#Taking Unique Users 
users = buisnessJoined.user_id.unique()
restraunts = buisnessJoined.business_id.unique()

# Creating a Reindexing Dictionary
userid2idx = {o:i for i,o in enumerate(users)}
restraunts2idx = {o:i for i,o in enumerate(restraunts)}

idx2restraunts={a:b for b,a in restraunts2idx.items()}

In [None]:
with open('userMapping.pickle','wb') as f:
    pickle.dump(userid2idx,f)

with open('restrauntsMapping.pickle','wb') as f:
    pickle.dump(restraunts2idx,f)


## Recommendation System Backend

We use the users social network to find the best matches of restraunts for them. The idea is to first get the restraunts which user have rated. In the second step we aim to get the friends of the users and all the restraunts they have rated, taking a set difference between the restraunts rated between them, we obtain all the restraunts not rated by the user. We use our model to make predictions about the ratings a user may assign to the restraunts and pick the top 5 restraunts.

Here we have only taken the first 5 friends becuase of the large overhead in I/O calls using database. The main reason we do not use dataframes for querying the data is that in real life scenarios database will be the preferred query point and not loading data into dataframes using files. Hence it is a realistic approcach towards the problem which also covers topics which we have covered under our course DS1.

In [8]:
#Get Recommendation for a user 
def recommendations(user):
    
    print('Executing the Queries, [        ]')
    userRated=Query(f'select business_id from reviews where user_id="{user}" ',connection)
    userRated=[e[0] for e in userRated]
    print('Length of user rated Resstruants list: ',len(userRated))

    print('Executing the Queries, 1st completed 100% [====    ]')
    userFriends=Query(f'select friends from user where user_id="{user}"',connection)
    userFriends=userFriends[0][0].split(',')
    print('Number of users friends are: ',len(userFriends))

    if len(userFriends)==0:
        return

    temp='('
    count=0
    for i in userFriends:
        trans=i.strip()
        temp+=f'"{trans}",'
        count+=1
        if count==5:
            break
    temp=temp[:-1]
    temp+=')'
    

    userFriendsRated=Query(f'select business_id from reviews where user_id IN {temp}',connection)
    userFriendsRated=[e[0] for e in userFriendsRated]

    print('Executing the Queries, 2nd completed 100% [========]')


    print('Preparing Data for Model Predictions')
    notUserRated=list(set(userFriendsRated).difference(set(userRated)))

    print(f'The number of restraunts not rated by the user {len(notUserRated)}')

    restrauntsIndex=np.asarray([restraunts2idx[e] for e in notUserRated]).reshape(-1,1)
    userIndex=np.asarray([userid2idx[user]]*len(restrauntsIndex)).reshape(-1,1)


    print('Data Generated Succesfully, Getting Predictions')
    predictions=model.predict([userIndex,restrauntsIndex])

    predictions=predictions.flatten()
    predictions=np.round(predictions,1)

    print('Predictions Retrieved!, Preparing Results')

    template='('
    for i in notUserRated:
        template+=f'"{i}",'

    template=template[:-1]
    template+=')'


    restrauntNames=Query(f'select name from business where business_id in {template}',connection)

    results=[(i[0],j) for i,j in zip(restrauntNames,predictions)]

    results=sorted(results,key=lambda x: -x[1])
    print('The Recommendations are successfully genrated\n\n')
    return results[:5]
              

We take a user at random and will provide recommendations to them using our model built in the previous notebook. The function above not only provides the recommendation as an ouput but also logs the journey of our recommendation system!

In [9]:
recommendations('q_QQ5kBBwlCcbL1s4NVK3g')

Executing the Queries, [        ]
Query Executed
Length of user rated Resstruants list:  13
Executing the Queries, 1st completed 100% [====    ]
Query Executed
Number of users friends are:  5813
Query Executed
Preparing Data for Model Predictions
The number of restraunts not rated by the user 311
Data Generated Succesfully, Getting Predictions


2022-02-16 18:50:35.939591: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Predictions Retrieved!, Preparing Results
Query Executed
The Recommendations are successfully genrated




[("Stash's Pizza", 5.4),
 ('New Balance Factory Store', 5.4),
 ("Pat's Pizza", 5.2),
 ('Chart House', 5.1),
 ('Bred', 5.1)]

Our model predicts the top 5 restraunts that the user would like to go in.

## Future Work

1) For a more complex model we may not randomly initalize our embeddings but provide a starting point by transforming our data hence optimimzing the latent space even more.