In [11]:
import pandas as pd
import numpy as np
from pathlib import Path
from scipy import sparse
from scipy.sparse import csr_matrix
from implicit.datasets.lastfm import get_lastfm
from implicit.nearest_neighbours import bm25_weight
from implicit.als import AlternatingLeastSquares
from IPython.core.interactiveshell import InteractiveShell
from typing import List
import time

InteractiveShell.ast_node_interactivity = 'all'

In [12]:
class DataLoader:
    def __init__(self, userData:Path , restaurantData: Path) -> None:
        self.userData = userData
        self.restaurantData = restaurantData

        #for user data 
        df = pd.read_csv(self.userData, sep=',')

        self.userDataDF = df

        #for restaurant data
        df = pd.read_csv(self.restaurantData, sep= ',')
        self.restaurantDF = df

    def load_user_data(self) -> csr_matrix:
        df = self.userDataDF

        df = df.set_index(['userID', 'restaurantID'])
        coo = sparse.coo_matrix(
            (
                df["rating"].astype(float),
                ( 
                    df.index.get_level_values(0), 
                   	df.index.get_level_values(1)
                ),
            )
        )
        coo = bm25_weight(coo, K1=1000, B=0.75) 
        return coo.tocsr()

    def get_restaurant(self, rest_id: int) -> str:
        return self.restaurantDF.iloc[rest_id]['name']

    def load_restaurant_data(self) -> None:
        df = pd.read_csv(self.restaurantData, sep=',')
        df = df.set_index('id')
        
        df['score'] = df['score'].fillna(np.random.uniform(0, 10))
        df['ratings'] = df['ratings'].fillna(np.random.randint(1, 100))
        df['price_range'] = df['price_range'].fillna(np.random.randint(1,2) * "$")

        df['zip_code'] = df['zip_code'].str.split('-', n=1, expand=True)[0]
        self.restaurantDF = df
        return 
    
    def get_user_data(self, userID: int) -> pd.DataFrame:
        df = self.userDataDF

        return df.loc[df['userID'] == userID]



In [13]:
class Model:
    def __init__(self, user_items: csr_matrix, restaurant_data: pd.DataFrame) -> None:
      self.model= AlternatingLeastSquares(factors = 50, iterations=15, regularization=0.01)
      self.user_items = user_items
      self.recommend_data = restaurant_data
      
    
    def fitModel(self) -> None:
      start = time.time()
      self.model.fit(self.user_items)

      print(f"Finished training the model at {time.time() - start}")
    
    def recommend_items(self, userID: int, n: int = 10):
      recommendations, scores = self.model.recommend(userID, self.user_items[n], N=n, recalculate_user=True)

      return recommendations, scores
    
    def similar_items(self, restaurant_id): 
      id, scores = self.model.similar_items(restaurant_id)

      return id, scores

In [14]:
loader = DataLoader(Path('user-data.csv'), Path('restaurants.csv'))
user_data = loader.load_user_data()
resturant_data = loader.load_restaurant_data()


In [15]:
#REGULAR 
model = Model(user_data, resturant_data)
model.fitModel()

  0%|          | 0/15 [00:00<?, ?it/s]

100%|██████████| 15/15 [00:13<00:00,  1.08it/s]

Finished training the model at 13.950199842453003





In [34]:
from itertools import zip_longest
restaurants, scores = model.recommend_items(0, 50) 
results = [loader.get_restaurant(rest_id) for rest_id in restaurants]

df = loader.userDataDF
# results, scores
orig_rest = [loader.get_restaurant(id) for id in df[df['userID'] == 10]['restaurantID']]
items = zip_longest(orig_rest, results, scores, )
df = pd.DataFrame(list(items), columns=["Original", "Results", "Scores"])

df
