In [2]:
# code
import numpy as np
import pandas as pd
import sklearn
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import warnings


def get_ratings():
    warnings.simplefilter(action='ignore', category=FutureWarning)

    ratings = pd.read_csv("Ratings.csv")
    #ratings.head()

    movies = pd.read_csv("indian_food.csv")
    #movies.head()

    n_ratings = len(ratings)
    n_movies = len(ratings['Food_Id'].unique())
    n_users = len(ratings['User_Id'].unique())

    print(f"Number of ratings: {n_ratings}")
    print(f"Number of unique movieId's: {n_movies}")
    print(f"Number of unique users: {n_users}")
    print(f"Average ratings per user: {round(n_ratings/n_users, 2)}")
    print(f"Average ratings per movie: {round(n_ratings/n_movies, 2)}")

    user_freq = ratings[['User_Id', 'Food_Id']].groupby('User_Id').count().reset_index()
    user_freq.columns = ['User_Id', 'Ratings']
    #user_freq.head()


    # Find Lowest and Highest rated movies:
    mean_rating = ratings.groupby('Food_Id')[['Ratings']].mean()
    # Lowest rated movies
    lowest_rated = mean_rating['Ratings'].idxmin()
    movies.loc[movies['Id'] == lowest_rated]
    # Highest rated moviess
    highest_rated = mean_rating['Ratings'].idxmax()
    movies.loc[movies['Id'] == highest_rated]
    # show number of people who rated movies rated movie highest
    ratings[ratings['Food_Id']==highest_rated]
    # show number of people who rated movies rated movie lowest
    ratings[ratings['Food_Id']==lowest_rated]

    ## the above movies has very low dataset. We will use bayesian average
    movie_stats = ratings.groupby('Food_Id')[['Ratings']].agg(['count', 'mean'])
    movie_stats.columns = movie_stats.columns.droplevel()
    return ratings,movies

# Now, we create user-item matrix using scipy csr matrix


def create_matrix(df):
	
	N = len(df['User_Id'].unique())
	M = len(df['Food_Id'].unique())
	
	# Map Ids to indices
	user_mapper = dict(zip(np.unique(df["User_Id"]), list(range(N))))
	movie_mapper = dict(zip(np.unique(df["Food_Id"]), list(range(M))))
	
	# Map indices to IDs
	user_inv_mapper = dict(zip(list(range(N)), np.unique(df["User_Id"])))
	movie_inv_mapper = dict(zip(list(range(M)), np.unique(df["Food_Id"])))
	
	user_index = [user_mapper[i] for i in df['User_Id']]
	movie_index = [movie_mapper[i] for i in df['Food_Id']]

	X = csr_matrix((df["Ratings"], (movie_index, user_index)), shape=(M, N))
	
	return X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper



"""
Find similar movies using KNN
"""
def find_similar_movies(movie_id, movie_mapper,movie_inv_mapper,X, k, metric='cosine', show_distance=False):
	
	neighbour_ids = []
	
	movie_ind = movie_mapper[movie_id]
	movie_vec = X[movie_ind]
	k+=1
	kNN = NearestNeighbors(n_neighbors=k, algorithm="brute", metric=metric)
	kNN.fit(X)
	movie_vec = movie_vec.reshape(1,-1)
	neighbour = kNN.kneighbors(movie_vec, return_distance=show_distance)
	for i in range(0,k):
		n = neighbour.item(i)
		neighbour_ids.append(movie_inv_mapper[n])
	neighbour_ids.pop(0)
	return neighbour_ids


def get_recommendations():
    ratings, movies = get_ratings()
    X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper = create_matrix(ratings)
    movie_titles = dict(zip(movies['Id'], movies['name']))

    movie_id = 256

    similar_ids = find_similar_movies(movie_id, movie_mapper,movie_inv_mapper,X, k=10)
    movie_title = movie_titles[movie_id]

    print(f"Since you ate {movie_title}")
    for i in similar_ids:
        print(movie_titles[i])
        
get_recommendations()


Number of ratings: 2200
Number of unique movieId's: 256
Number of unique users: 20
Average ratings per user: 110.0
Average ratings per movie: 8.59
Since you ate Boiled-egg
Tandoori Fish Tikka
Masor Koni
Gulab jamun
Bebinca
Mysore pak
Kutchi dabeli
Dum aloo
Bora Sawul
Pork Bharta
Upma
