In [15]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

## Demographic Filtering

Suggesting the users items that were well-received and are popular among the users, in general. Most trending items and items with the best rating rise to the top and get shortlisted for recommendation.

In [1]:
import pandas as pd 
import numpy as np

# Importing files of food items.
df1=pd.read_csv('./files/food.csv')
df1.columns = ['food_id','title','canteen_id','price', 'num_orders', 'category', 'avg_rating', 'num_rating', 'tags']

df1

Unnamed: 0,food_id,title,canteen_id,price,num_orders,category,avg_rating,num_rating,tags
0,1,Lala Maggi,1,30,35,maggi,3.9,10,"veg, spicy"
1,2,Cheese Maggi,1,25,40,maggi,3.8,15,veg
2,3,Masala Maggi,1,25,10,maggi,3.0,10,"veg, spicy"
3,4,Veg Maggi,1,30,25,maggi,2.5,5,"veg, healthy"
4,5,Paneer Tikka,1,60,50,Punjabi,4.6,30,"veg, healthy"
5,6,Chicken Tikka,1,80,40,Punjabi,4.2,28,"nonveg, healthy, spicy"
6,7,Hyderabadi Biriyani,1,120,60,Hyderabadi,4.7,40,"nonveg, spicy"
7,8,Sheer Khurma,1,80,50,Hyderabadi,4.2,25,"veg, sweet"
8,9,Chiken Pulao,1,100,45,Hyderabadi,3.9,21,"nonveg, spicy"
9,10,Shorba,1,80,24,Hyderabadi,3.8,32,"nonveg, spicy"


In [2]:
# mean of average ratings of all items
C= df1['avg_rating'].mean()

# the minimum number of votes required to appear in recommendation list, i.e, 60th percentile among 'num_rating'
m= df1['num_rating'].quantile(0.6)

# items that qualify the criteria of minimum num of votes
q_items = df1.copy().loc[df1['num_rating'] >= m]

# Calculation of weighted rating based on the IMDB formula
def weighted_rating(x, m=m, C=C):
    v = x['num_rating']
    R = x['avg_rating']
    return (v/(v+m) * R) + (m/(m+v) * C)

# Applying weighted_rating to qualified items
q_items['score'] = q_items.apply(weighted_rating, axis=1)

# Shortlisting the top rated items and popular items
top_rated_items = q_items.sort_values('score', ascending=False)
pop_items= df1.sort_values('num_orders', ascending=False)

In [3]:
# Display results of demographic filtering
top_rated_items[['title', 'num_rating', 'avg_rating', 'score']].head()
pop_items[['title', 'num_orders']].head()

Unnamed: 0,title,num_orders
6,Hyderabadi Biriyani,60
4,Paneer Tikka,50
7,Sheer Khurma,50
8,Chiken Pulao,45
10,Dum Biryani,43


## Content Based Filtering

A bit more personalised recommendation. We will be analysing the past orders of the user and suggesting back those items which are similar.

We will be using <b>Count Vectorizer</b> from <b>Scikit-Learn</b> to find similarity between items based on their title, category and tags. To bring all these properties of each item together we create a <b>"soup"</b> of tags. <b>"Soup"</b> is a processed string correspnding to each item, formed using constituent words of tags, tile and category.

In [4]:
# TODO: clean data

# Creating soup string for each item
def create_soup(x):            
    tags = x['tags'].lower().split(', ')
    tags.extend(x['title'].lower().split())
    tags.extend(x['category'].lower().split())
    return " ".join(sorted(set(tags), key=tags.index))

df1['soup'] = df1.apply(create_soup, axis=1)
df1.tail(3)

Unnamed: 0,food_id,title,canteen_id,price,num_orders,category,avg_rating,num_rating,tags,soup
10,11,Dum Biryani,1,130,43,Hyderabadi,4.1,34,"nonveg, spicy",nonveg spicy dum biryani hyderabadi
11,12,Mutton Curry,1,120,35,Hyderabadi,3.9,35,"nonveg, spicy",nonveg spicy mutton curry hyderabadi
12,13,Chana Dal Pulao,1,40,20,Hyderabadi,4.3,25,"veg, healthy",veg healthy chana dal pulao hyderabadi


In [5]:
# Import CountVectorizer and create the count matrix
from sklearn.feature_extraction.text import CountVectorizer
count = CountVectorizer(stop_words='english')

# df1['soup']
count_matrix = count.fit_transform(df1['soup'])

# Compute the Cosine Similarity matrix based on the count_matrix
from sklearn.metrics.pairwise import cosine_similarity
cosine_sim = cosine_similarity(count_matrix, count_matrix)

indices_from_title = pd.Series(df1.index, index=df1['title'])
indices_from_food_id = pd.Series(df1.index, index=df1['food_id'])

In [6]:
# Function that takes in food title or food id as input and outputs most similar dishes 
def get_recommendations(title="", cosine_sim=cosine_sim, idx=-1):
    # Get the index of the item that matches the title
    if idx == -1 and title != "":
        idx = indices_from_title[title]

    # Get the pairwsie similarity scores of all dishes with that dish
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the dishes based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Get the scores of the 10 most similar dishes
    sim_scores = sim_scores[1:3]

    # Get the food indices
    food_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar dishes
    return food_indices

In [7]:
df1.loc[get_recommendations(title="Dum Biryani")]

Unnamed: 0,food_id,title,canteen_id,price,num_orders,category,avg_rating,num_rating,tags,soup
6,7,Hyderabadi Biriyani,1,120,60,Hyderabadi,4.7,40,"nonveg, spicy",nonveg spicy hyderabadi biriyani
9,10,Shorba,1,80,24,Hyderabadi,3.8,32,"nonveg, spicy",nonveg spicy shorba hyderabadi


We will now some functions, some of which are utility functions, others are actually the functions which will help get personalised recommendations for current user.

In [8]:
# fetch few past orders of a user, based on which personalized recommendations are to be made
def get_latest_user_orders(user_id, orders, num_orders=3):
    counter = num_orders
    order_indices = []
    
    for index, row in orders[['user_id']].iterrows():
        if row.user_id == user_id:
            counter = counter -1
            order_indices.append(index)
        if counter == 0:
            break
            
    return order_indices

# utility function that returns a DataFrame given the food_indices to be recommended
def get_recomms_df(food_indices, df1, columns, comment):
    row = 0
    df = pd.DataFrame(columns=columns)
    
    for i in food_indices:
        df.loc[row] = df1[['title', 'canteen_id', 'price']].loc[i]
        df.loc[row].comment = comment
        row = row+1
    return df

# return food_indices for accomplishing personalized recommendation using Count Vectorizer
def personalised_recomms(orders, df1, user_id, columns, comment="based on your past orders"):
    order_indices = get_latest_user_orders(user_id, orders)
    food_ids = []
    food_indices = []
    recomm_indices = []
    
    for i in order_indices:
        food_ids.append(orders.loc[i].food_id)
    for i in food_ids:
        food_indices.append(indices_from_food_id[i])
    for i in food_indices:
        recomm_indices.extend(get_recommendations(idx=i))
        
    return get_recomms_df(set(recomm_indices), df1, columns, comment)

# Simply fetch new items added by vendor or today's special at home canteen
def get_new_and_specials_recomms(new_and_specials, users, df1, canteen_id, columns, comment="new/today's special item  in your home canteen"):
    food_indices = []
    
    for index, row in new_and_specials[['canteen_id']].iterrows():
        if row.canteen_id == canteen_id:
            food_indices.append(indices_from_food_id[new_and_specials.loc[index].food_id])
            
    return get_recomms_df(set(food_indices), df1, columns, comment)

# utility function to get the home canteen given a user id
def get_user_home_canteen(users, user_id):
    for index, row in users[['user_id']].iterrows():
        if row.user_id == user_id:
            return users.loc[index].home_canteen
    return -1

# fetch items from previously calculated top_rated_items list
def get_top_rated_items(top_rated_items, df1, columns, comment="top rated items across canteens"):
    food_indices = []
    
    for index, row in top_rated_items.iterrows():
        food_indices.append(indices_from_food_id[top_rated_items.loc[index].food_id])
        
    return get_recomms_df(food_indices, df1, columns, comment)

# fetch items from previously calculated pop_items list
def get_popular_items(pop_items, df1, columns, comment="most popular items across canteens"):
    food_indices = []
    
    for index, row in pop_items.iterrows():
        food_indices.append(indices_from_food_id[pop_items.loc[index].food_id])
        
    return get_recomms_df(food_indices, df1, columns, comment)
    

### Recommendations

In [9]:
orders = pd.read_csv('./files/orders.csv')
new_and_specials = pd.read_csv('./files/new_and_specials.csv')
users = pd.read_csv('./files/users.csv')

columns = ['title', 'canteen_id', 'price', 'comment']
current_user = 2
current_canteen = get_user_home_canteen(users, current_user)


personalised_recomms(orders, df1, current_user, columns)
get_new_and_specials_recomms(new_and_specials, users, df1, current_canteen, columns)
get_top_rated_items(top_rated_items, df1, columns)
get_popular_items(pop_items, df1, columns).head(3)

Unnamed: 0,title,canteen_id,price,comment
0,Hyderabadi Biriyani,1,120,most popular items across canteens
1,Paneer Tikka,1,60,most popular items across canteens
2,Sheer Khurma,1,80,most popular items across canteens


We can easily use collaborative filtering or incorporate neural networks to make our prediction even better. However, these are more computationally intensive methods.

### CLI for trying Recommendation

In [17]:
input1=''
while(True):
    if input1=='exit':
        break
    print('Welcome to Menu Recommendation System!\n')
    print('Type exit to quit')
    
    input1=input('Are you new customer type y or n: ')
    if input1=='y':
        print('\nPopular Items Recommendation for new user!')
        print(get_popular_items(pop_items, df1, columns).head(3))
        break
    elif input1=='n':
        input1 = input('Enter your user id: ')
        if int(input1) in list(orders['user_id']):
            print('\nUser Based Recommendation for user_id=='+input1)
            print(personalised_recomms(orders, df1, current_user, columns))
            break
        else:
            input1 = input('Enter Valid user_id: ')
            
    else:
        print("wrong input")
        break
        

Welcome to Menu Recommendation System!

Type exit to quit
Are you new customer type y or n: n
Enter your user id: 9

User Based Recommendation for user_id==9
                 title canteen_id price                    comment
0            Veg Maggi          1    30  based on your past orders
1         Paneer Tikka          1    60  based on your past orders
2        Chicken Tikka          1    80  based on your past orders
3  Hyderabadi Biriyani          1   120  based on your past orders
