In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statistics as st
import sqlite3 as sq
from sklearn.model_selection import train_test_split
import datetime
import math

In [2]:
%matplotlib inline

In [3]:
con = sq.connect('db.sqlite3')

In [4]:
df_dish = pd.read_sql_query("Select * From home_dish", con)
dish_id = df_dish['id'].to_list()
dish = df_dish['item']

In [5]:
df_guest = pd.read_sql_query("Select * From home_guest", con)
guest_id = df_guest['id'].to_list()
guest_mobile = df_guest['mobile']

In [6]:
df_order = pd.read_sql_query("Select * From home_order", con)
guest_order_id = df_order['guest_order_id']
guest_to_attend_id = df_order['guest_to_attend_id']
df_order.head()

Unnamed: 0,id,guest_order_id,guest_to_attend_id,guest_order_ratings
0,1,72,2,4
1,2,68,2,3
2,3,33,1,4
3,4,73,1,3
4,5,67,3,5


In [19]:
class Dish(object):
    def __init__(self, dish_id, dish, pram):
        """
        dish_id: list or dataset of dish's id.
        dish: list or dataset of dish's.
        pram: list or dataset of parameter on which
            we get our recommendations like: mean,
            co-occurrence etc.
        """
        self.dish_id = dish_id
        self.dish = dish
        self.pram = pram
    
    def get_dish_id(self):
        return self.dish_id
        
    def get_dish(self):
        return self.dish
    
    def get_pram(self):
        return self.pram
    
    def __str__(self):
        return str(self.dish_id)+" " + self.dish + ": " +str(self.pram)

In [20]:
class Recommendations(object):

    def __init__(self):
        self.dish_id = None
        self.dishes = None
        self.pram = None
        self.matrix_mean = None
        self.guest_id = None
        
    
    def get_dish_id(self):
        return self.dish_id
    
    def get_dishes(self):
        return self.dish
    
    
    def get_pram(self):
        """
        Recently used parameters list for reccomendations
        """
        return self.pram
        
        
    def get_matrix_mean(self):
        return self.matrix_mean
    
    
    def get_guest_id(self):
        return self.guest_id
        
        
    def get_recommendation_by_pram(self, dish_id, dish, pram):
        """
        get_recommendation_by_pram(self, dish_id, dish, pram)
        
        Gives first five recommendations as per the descending
        order of the given params.
        
        Parameters: 
        dish_id= List/Dataframe/Dataset of id'd of dishes.
        dish= List/Dataframe/Dataset of dishes.
        pram= List/Dataframe/Dataset of parameters like mean or
            co-occurence etc.
            
        *NOTE: dish_id, dish & pram all 3 are of same sizes.
        """
        self.dish_id = dish_id
        self.dish = dish
        self.pram = pram
        
        dish_list = [] # For carrying dish details
        for i in range(len(self.dish_id)):
            dish_list.append(Dish(dish_id=self.dish_id[i], dish=self.dish[i], pram=self.pram[i]))
            
        recommendation = sorted(dish_list, key=Recommendations.get_pram, reverse=True)
        recommendation_print = [print(i) for i in recommendation[:5]]
        
        return recommendation[:5]
    
    
    def get_training_test_data(self, tr, ts, r):
        df_order_train, df_order_test = train_test_split(df_order, test_size=ts, train_size=tr, random_state=100)
        return df_order_train, df_order_test
    
    
    def get_order_and_ratings_matrix(self, orders, dish_id):
        """
        Matrix of guest's orders of size(no. of total guests attended,no. of all 
        dish served), of values 0 & 1.
        1: If guest placed an order of dish then(guest_id_index, dish_id_index)=1
        0: otherwise
        
        Matrix of ratings given by guests to the perticular dishes. If a guest gives
        multiple time ratings to a dish, it takes mean of all ratings.
        0.0 otherwise.
        
        Parameters:
        orders= Dataframe of orders query of guets, carrying guest id of particular
                guest, dish ids of the dishes he rated & the ratings he gave to the
                particular dish.
        dish_id= List/Dataframe/Dataset of id'd of dishes.
        """
        print("Please patience. Its takes saveral minutes to complete.")
        
        # list of guest id's from orders  
        guest_id = list(order['guest_to_attend_id'].unique())
        
        self.guest_id = guest_id
        self.dish_id = dish_id
        
        # initializing rating matrix
        guest_order_matrix = np.zeros((len(guests), len(dish_id)), dtype=np.int)
        guest_rating_matrix = np.zeros((len(guests), len(dish_id)), dtype=np.float)
        
        orders_copy = orders.copy # make copy to protect orders 
        
        # Initialize performance display
        for i in range(100):
            print(''.join('-'), end='')
        print('100%')
        percent = 0
        
        while len(orders_copy)>0:
            i = int(orders_copy['guest_order_id'].iloc[0])
            j = int(orders_copy['guest_to_attend_id'].iloc[0])
            guest_ratings =  orders_copy[( orders_copy['guest_order_id']==i) & ( orders_copy['guest_to_attend_id']==j)]
            x = guests.index(j)
            y = dish_id.index(i)
            guest_rating_matrix[x][y]=guest_ratings['guest_order_ratings'].mean()
            guest_order_matrix[x][y]=1
            orders_copy.drop(guest_ratings.index, inplace=True)
            if math.floor((len(orders_copy)/len(orders))*100)==percent:
                print(''.join('*'), end='') # Shows how much data has been processed
                percent += 1
        
        return guest_order_matrix, guest_rating_matrix
    
    
    def get_matrix_mean(self, matrix):
        """
        get_matrix_mean(self, ratings_matrix=matrix)
        List of column-wise means of a matrix 
        
        Parameter:
        matrix= Matrix of which means to be calculated.
        """
        matrix_mean = matrix.mean(axis=0)
        self.matrix_mean = matrix_mean
        return matrix_mean
    
    
    def get_co_occurrence_matrix(self, order_matrix):
        """
        Co-occurence matrix of orders. If any combination of dishes match
        with order adds 1 with the value, otherwise 0.
        
        Parameter:
        order_matrix= matrix of guests orders.
        """
        order_matrix_trans = np.transpose(order_matrix) #Transpose of order matrix
        co_occurrence_matrix = order_matrix_trans.dot(order_matrix) #dot product
        np.fill_diagonal(co_occurrence_matrix, 0) #setting 0 values at the diagonal
        
        # getting sum of co_occurrence matrix column wise & reshape it in n*1 matrix
        co_occurrence_matrix_sum = co_occurrence_matrix.sum(axis=0).reshape(len(co_occurrence_matrix),1)
        
        # deviding co_occurence matrix row wise to co_occurence_matrix_sum
        co_occurrence_matrix_mean=pd.DataFrame(np.array(co_occurrence_matrix)*(1/co_occurrence_matrix_sum))
        
        return co_occurrence_matrix_mean
    
    
    def get_recommendations_by_highest_ratings(self, dish_id, dish, rating_matrix):
        """
        get_recommendations_by_highest_ratings(self, dish_id=list, dish=list, rating_matrix=matrix)
        
        Top 5 recommendations of highest ratings
        
        Parameters:
        dish_id = list of all dish'ids
        dish = list of all the dishes
        rating_matrix = matrix of rating's means of the ratings given by guests to
                        the dishes they ordered.
        """
        rating_matrix_mean = get_matrix_mean(rating_matrix)
        recommendations_by_highest_ratings = get_recommendation_by_pram(dish_id, dish, rating_matrix_mean)
        
        return recommendations_by_highest_ratings
    
    
    def get_recommendations_by_popularity(self, dish_id, dish, order_matrix):
        """
        get_recommendations_by_popularity(self, dish_id=list, dish=list, order_matrix=matrix)
        
        Top 5 recommendations of highest selling dishes
        
        Parameters:
        dish_id = list of all dish'ids
        dish = list of all the dishes
        order_matrix = matrix of 1 or 0 if guest orders that perticular dish or not,
                        respectively.
        """
        order_matrix_mean = get_matrix_mean(order_matrix)
        recommendations_by_popularity = get_recommendation_by_pram(dish_id, dish, order_matrix_mean)
        
        return recommendations_by_popularity
    
    
    def get_recommendations_by_occurence(self, guest_tried_dishes, dish_id, dishes, co_occurrence_matrix):
        """
        get_recommendations_by_occurence(self, guest_tried_dishes=list, dish_id=list, dishes=list, co_occurrence_matrix=list)
        
        Get 5 recommendations on the basis of nearest neighbour method.
        
        Parameters:
        guest_tried_dishes = list id dishes's ids that user aleady tried.
        dish_id = list of all dish'ids
        dish = list of all the dishes
        co_occurrence_matrix = matrix of integers ranges(0, len(dish_id)),
                                depends number of times any combination of 
                                dishes get ordered.
        """
        co_occurrence_list = []
        for i in guest_tried_dishes:
            co_occurrence_data = co_occurrence_matrix.iloc[dish_id.index(i)]
            co_occurrence_list.append(co_occurrence_data)
        
        df_mean = pd.DataFrame(co_occurrence_list).mean()
        
        recommendations_by_occurence = get_recommendation_by_pram(dish_id, dishes, df_mean)
        
        return recommendations_by_occurence

In [7]:
# rating matrix
#df_order[(df_order['guest_order_id']==72) & (df_order['guest_to_attend_id']==2)]['guest_order_ratings'].mean()
start = datetime.datetime.now()
print("start time:", datetime.datetime.now().isoformat())
# getting samples for testing & training data   
df_order_sample_train, df_order_sample_test = train_test_split(df_order, test_size=0.1, train_size=0.2, random_state=100)
guest_sample_train = list(df_order_sample_train['guest_to_attend_id'].unique())

guest_ratings = np.zeros((len(guest_sample_train), len(dish_id)), dtype=np.float)
#percent = 1 # To check how much percent is done.
        
#for i in range(100):
#    print(''.join('-'), end='')

#print('100%')

num=0
num1=0
first=datetime.datetime.now()
'''
for i in guest_sample_train:
    for j in dish_id:
        guest_rating = df_order_sample_train[(df_order_sample_train['guest_order_id']==int(i)) & (df_order_sample_train['guest_to_attend_id']==int(j))]
        if len(guest_rating) < 1:
            continue
        else:
            x = guest_sample_train.index(i)
            y = dish_id.index(j)
            guest_ratings[x][y]=guest_rating['guest_order_ratings'].mean()
            #print("mean", guest_rating['guest_order_ratings'].mean())
            df_order_sample_train.drop(guest_rating.index, inplace=True)
    if num%100 == 0:
        r = ((len(guest_sample_train)-num)/len(guest_sample_train))*100
        print(r, "% remaining. Time taken:", str(datetime.datetime.now()-first))
        first = datetime.datetime.now()
    num+=1
'''

df4 = df_order_sample_train.copy()
#df1= df4['guest_order_id'].to_list()
#df2=df4['guest_to_attend_id'].to_list()
#df3 = df4['guest_order_ratings'].to_list()
row_start = len(df_order_sample_train)
search = True
while len(df4)>0:
    try:
        #print('i m in')
        guest_rating = df4[(df4['guest_order_id']==int(df4['guest_order_id'].iloc[0])) & (df4['guest_to_attend_id']==int(df4['guest_to_attend_id'].iloc[0]))]
        #print('guest rating', guest_rating)
        x = guest_sample_train.index(int(df4['guest_to_attend_id'].iloc[0]))
        #print('x',x)
        y = dish_id.index(int(df4['guest_order_id'].iloc[0]))
        #print('y', y)
        guest_ratings[x][y]=guest_rating['guest_order_ratings'].mean()
        #print('x,y', guest_ratings[x][y])
        df4.drop(guest_rating.index, inplace=True)
        #print('i drop')
        if num1%100==0:
            rem = (len(df4)/len(df_order_sample_train))*100
            print(rem, "% DONE in: ", str(datetime.datetime.now()-first), "ROWS PROCESSED: ", row_start-len(df4))
            first =datetime.datetime.now()
            row_start = len(df4)
        num1+=1
    except:
        search = False
        break
    
#    if (i/len(guest_id))*100 == percent:
#        print(''.join('*'), end='')
#        percent += 1
#print('Finish')
print("finish in:", str(datetime.datetime.now()-start))

start time: 2019-09-02T17:49:04.671485
99.9981366922561 % DONE in:  0:00:00.056016 ROWS PROCESSED:  1
finish in: 0:00:01.141754


In [None]:
def get_guest_orders_matrix(self, guest_id, dish_id, guest_order_id, guest_to_attend_id):
        """
        To get matrix of size(no. of total guests attended,no. of all dish served),
        of values 0 & 1.
        1: If guest placed an order of dish then(guest_id_index, dish_id_index)=1
        0: otherwise
        
        Parameters:
        guest_ids = Dataframe of all the guest's ids.
        dish_ids = Dataframe of all the dish's ids.
        guest_order_id = Dataframe of dish's if of the placed order.
        guest_to_attend_id = Dataframe of guest's id who placed the order
        """
        self.guest_id = guest_id
        self.dish_id = dish_id
        self.guest_order_id = guest_order_id
        self.guest_to_attend_id = guest_to_attend_id
        
        guest_orders = np.zeros((len(guest_id), len(dish_id)), dtype=np.int8)
        percent = 1 # To check how much percent is done.
        
        for i in range(100):
            print(''.join('-'), end='')
            
        print('100%')
        
        for i in range(len(guest_to_attend_id)):
            x = guest_ids.to_list().index(guest_to_attend_id[i])
            y = dish_id.to_list().index(guest_order_id[i])
            guest_orders[int(x)][int(y)]=1
            
            if ((len(guest_to_attend_id)-i)/len(guest_to_attend_id))*100 == percent:
                print(''.join('*'), end='')
                percent += 1
        print('Finish')

In [None]:
orders.pivot_table(values='guest_order_ratings',
    index='guest_to_attend_id',
    columns='guest_order_id',
    aggfunc='mean',
    fill_value=0,
    margins=False,
    dropna=True,
    margins_name='All',)

In [25]:
start = datetime.datetime.now()
op=df_order.pivot_table(values='guest_order_ratings',
    index='guest_to_attend_id',
    columns='guest_order_id',
    aggfunc='mean',
    fill_value=0,
    margins=False,
    dropna=True,
    margins_name='All',)
print("finish in:", str(datetime.datetime.now()-start))

finish in: 0:00:05.143680


In [26]:
op.shape

(9899, 126)

In [29]:
guests = df_order['guest_to_attend_id'].unique()

In [30]:
len(guests)

9899

In [16]:
a1 = np.array([[0,4 ,0,8],[2,1,0,0],[4,0,0,6],[0,0,3,1]])
a2 =a1>1

In [17]:
a2.astype(dtype=int)

array([[0, 1, 0, 1],
       [1, 0, 0, 0],
       [1, 0, 0, 1],
       [0, 0, 1, 0]])

In [None]:
def get_order_binary_matrix(self, orders):
        """
        Matrix of guest's orders of size(no. of total guests attended,no. of all 
        dish served), of values 0 & 1.
        1: If guest placed an order of dish then(guest_id_index, dish_id_index)=1
        0: otherwise
        
        Parameters:
        orders= Dataframe of orders query of guets, carrying guest id of particular
                guest, dish ids of the dishes he rated & the ratings he gave to the
                particular dish.
        dish_id= List/Dataframe/Dataset of id'd of dishes.
        """
        print("Please patience. Its takes saveral minutes to complete.")
        
        # list of guest id's from orders  
        guest_id = list(order['guest_to_attend_id'].unique())
        
        self.guest_id = guest_id
        self.dish_id = dish_id
        
        # initializing rating matrix
        guest_order_matrix = np.zeros((len(guests), len(dish_id)), dtype=np.int)
        guest_rating_matrix = np.zeros((len(guests), len(dish_id)), dtype=np.float)
        
        orders_copy = orders.copy # make copy to protect orders 
        
        # Initialize performance display
        for i in range(100):
            print(''.join('-'), end='')
        print('100%')
        percent = 0
        
        while len(orders_copy)>0:
            i = int(orders_copy['guest_order_id'].iloc[0])
            j = int(orders_copy['guest_to_attend_id'].iloc[0])
            guest_ratings =  orders_copy[( orders_copy['guest_order_id']==i) & ( orders_copy['guest_to_attend_id']==j)]
            x = guests.index(j)
            y = dish_id.index(i)
            guest_rating_matrix[x][y]=guest_ratings['guest_order_ratings'].mean()
            guest_order_matrix[x][y]=1
            orders_copy.drop(guest_ratings.index, inplace=True)
            if math.floor((len(orders_copy)/len(orders))*100)==percent:
                print(''.join('*'), end='') # Shows how much data has been processed
                percent += 1
        
        return guest_order_matrix, guest_rating_matrix

In [24]:
def get_ratings_matrix(orders):
        """      
        Matrix of ratings given by guests to the perticular dishes. If a guest gives
        multiple time ratings to a dish, it takes mean of all ratings.
        0.0 otherwise.
        
        Parameters:
        orders= Dataframe of orders query of guets, carrying guest id of particular
                guest, dish ids of the dishes he rated & the ratings he gave to the
                particular dish.
        dish_id= List/Dataframe/Dataset of id'd of dishes.
        """
        
        guest_rating_matrix=orders.pivot_table(values='guest_order_ratings',
                                index='guest_to_attend_id',
                                columns='guest_order_id',
                                aggfunc='mean',
                                fill_value=0,
                                margins=False,
                                dropna=True,
                                margins_name='All',)
        
        guest_order_matrix = (guest_rating_matrix>0).astype(dtype=int)
        
        return guest_rating_matrix

In [21]:
rc = Recommendations()

In [22]:
xtr, xts = rc.get_training_test_data(0.25, 0.1, 100)

In [27]:
bin_xtr=get_ratings_matrix(xtr)>0

In [29]:
bt = bin_xtr.astype(dtype=int)

In [30]:
np.dot(bt.transpose(), bt)

array([[514,  61,  40, ...,  52,   9,   6],
       [ 61, 528,  41, ...,  31,  12,  13],
       [ 40,  41, 526, ...,  35,   7,  12],
       ...,
       [ 52,  31,  35, ..., 504,   7,   7],
       [  9,  12,   7, ...,   7, 181,   3],
       [  6,  13,  12, ...,   7,   3, 197]])

In [23]:
np.eye()

False

In [34]:
xam(pd.DataFrame(a1))

ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().