In [35]:
import mysql.connector as connection
import pandas as pd
import numpy as np
import pickle

In [36]:
# Establish connection with MySQL DB
mydb = connection.connect(host="localhost", port="3306", user="root", password="", database="food_ordering_db")

In [37]:
# Accessing food table
query = """Select *
						FROM `food`;"""
food = pd.read_sql(query,mydb)
print(food.head())

# Accessing user table
query = """Select *
						FROM `user`;"""
users = pd.read_sql(query,mydb)
print(users.head())

   foodID  restaurantID                      foodName  price  stock image
0       1             1  Mala Grilled Burmese Chicken  10000      2      
1       2             1             Mala French Fries   3500      2      
2       3             1         Rakhine Seafood Salad   6000      2      
3       4             1           Steamed Chicken Leg   5000      2      
4       5             1            Mala Grilled Prawn  12000      2      
   userID  userRoleID firstName lastName                   email password  \
0       1           1     James    Smith    JamesSmith@gmail.com    12345   
1       2           1   Michael    Smith  MichaelSmith@gmail.com    12345   
2       3           1    Robert    Smith   RobertSmith@gmail.com    12345   
3       4           1     David    Smith    DavidSmith@gmail.com    12345   
4       5           1     James  Johnson  JamesJohnson@gmail.com    12345   

  address  latitude  longitude  
0             1.000      1.000  
1             1.000      1.

In [38]:
# Accessing userID and foodID data with ratings
query = """Select o.foodorderID, o.foodID, o.rating, c.userID 
						FROM `foodorder` o, `cart` c
						WHERE o.cartID = c.cartID;"""
orders = pd.read_sql(query,mydb)
print(orders.head())
print(list(orders.columns))
print("No. of orders - ", len(orders))

   foodorderID  foodID  rating  userID
0            1     283       3     216
1            2    6742       4      41
2            3    3669      10     354
3            4    3910       2     310
4            5     291       3     216
['foodorderID', 'foodID', 'rating', 'userID']
No. of orders -  314


In [39]:
# Merge order and food queries
combine_food_order = pd.merge(orders, food, on="foodID")
print(combine_food_order.head())
print("No. of combine_food_order - ", len(combine_food_order))

   foodorderID  foodID  rating  userID  restaurantID  \
0            1     283       3     216             7   
1            2    6742       4      41           219   
2            3    3669      10     354           121   
3           33    3669       2     354           121   
4            4    3910       2     310           129   

                        foodName  price  stock image  
0  Sour and Spicy Crab Meat Soup   4500      2        
1                  Yakhine Salad    800      2        
2            Plain Water Spinach   1200      2        
3            Plain Water Spinach   1200      2        
4                    Kiwi Yogurt   1800      2        
No. of combine_food_order -  314


In [40]:
print(f'Amount of unique foodIDs - {len(pd.unique(combine_food_order["foodID"]))}')
print(f'Amount of unique userIDs - {len(pd.unique(combine_food_order["userID"]))}')

Amount of unique foodIDs - 250
Amount of unique userIDs - 51


In [41]:
with open("combined_food_order" + ".csv", "w") as f:
	f.write(combine_food_order.to_csv())

Getting the number of times food was ordered

In [42]:
# combine_food_order = combine_food_order.dropna(axis = 0, subset = ['bookTitle'])
food_order_count = (combine_food_order.
     groupby(by = ["foodID"]).
     count().
     reset_index().
     sort_values(by = ["foodorderID"]).
     rename(columns = {"foodorderID": "totalCount"})
     [["foodID", "totalCount"]]
     )
food_order_count.tail()

Unnamed: 0,foodID,totalCount
179,4962,4
166,4917,4
83,2281,4
32,859,5
228,6908,6


In [43]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)
print(food_order_count["totalCount"].describe())

count   250.000
mean      1.256
std       0.716
min       1.000
25%       1.000
50%       1.000
75%       1.000
max       6.000
Name: totalCount, dtype: float64


In [44]:
order_with_total_count = combine_food_order.merge(food_order_count, left_on = 'foodID', right_on = 'foodID', how = 'left')
order_with_total_count["has_ordered"] = 5
print(order_with_total_count.loc[order_with_total_count["foodName"] == "Noodle Salad"])
# order_with_total_count.head()
print("No. of order_with_total_count - ", len(order_with_total_count))

     foodorderID  foodID  rating  userID  restaurantID      foodName  price  \
283          271    4948       6      97           167  Noodle Salad   1200   
289          282    3144       4      13           106  Noodle Salad    700   

     stock image  totalCount  has_ordered  
283      2                 1            5  
289      2                 1            5  
No. of order_with_total_count -  314


In [45]:
with open("order_with_total_count" + ".csv", "w") as f:
	f.write(order_with_total_count.to_csv())

In [46]:
from scipy.sparse import csr_matrix
order_with_total_count = order_with_total_count.drop_duplicates(['userID', 'foodName'])
# order_with_total_count_pivot = order_with_total_count.pivot(index = 'foodName', columns = 'userID', values = 'has_ordered').fillna(0)
order_with_total_count_pivot = order_with_total_count.pivot(index = 'foodName', columns = 'userID', values = 'has_ordered').fillna(0)
order_with_total_count_foodID_pivot = order_with_total_count.pivot(index = 'foodID', columns = 'userID', values = 'has_ordered').fillna(0)
order_with_total_count_matrix = csr_matrix(order_with_total_count_foodID_pivot.values)

from sklearn.neighbors import NearestNeighbors


model_knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute')
model_knn.fit(order_with_total_count_matrix)

NearestNeighbors(algorithm='brute', metric='cosine')

In [47]:
# Its important to use binary mode 
knnPickle = open('knnpickle_file', 'wb') 
# source, destination 
pickle.dump(model_knn, knnPickle)  
# close the file
knnPickle.close()

In [61]:
with open("order_with_total_count_pivot" + ".csv", "w") as f:
	f.write(order_with_total_count_pivot.to_csv())
with open("order_with_total_count_pivot_id" + ".csv", "w") as f:
	f.write(order_with_total_count_foodID_pivot.to_csv())
with open("query_index_to_foodID" + ".csv", "w") as f:
	f.write("query_index,foodID\n")
	pivot_foodID_list = list(order_with_total_count_foodID_pivot.index)
	for i in range(len(pivot_foodID_list)):
		f.write(f"{i},{pivot_foodID_list[i]}\n")

In [49]:
# query_index = np.random.choice(order_with_total_count_pivot.shape[0])
# print(query_index)
# query_index = 124
query_index = 32		#(points to foodID 4924 Real Milk)
distances, indices = model_knn.kneighbors(order_with_total_count_foodID_pivot.iloc[query_index,:].values.reshape(1, -1), n_neighbors = 6)

order_with_total_count_foodID_pivot.iloc[query_index,:].values.reshape(1,-1)

array([[5., 0., 0., 5., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 5., 5.]])

In [50]:
foodID = order_with_total_count_foodID_pivot.index[query_index]
foodName = food.foodName[foodID-1]
print(foodID)
print(foodName)

859
Red Velvet Cake


In [51]:
for i in range(0, len(distances.flatten())):
    if i == 0:
        print('Recommendations for {0}:\n'.format(foodName))
    else:
        foodID = order_with_total_count_foodID_pivot.index[indices.flatten()[i]]
        foodName = food.foodName[foodID-1]
        print('{0}: {1} {2}, with distance of {3}:'.format(i, foodID, foodName, distances.flatten()[i]))

Recommendations for Red Velvet Cake:

1: 861 White Chocolate Iced, with distance of 0.1339745962155614:
2: 860 Powdered Blueberry Filled, with distance of 0.1339745962155614:
3: 215 Rice with Oiled Fish Curry, with distance of 0.5:
4: 220 Rice with Prawn Curry, with distance of 0.5:
5: 221 Rice with Fried Chicken, with distance of 0.5:
