In [None]:
import numpy as np
import pandas as pd
import scipy as sp
from sklearn.metrics.pairwise import cosine_similarity
import operator
%matplotlib inline

In [None]:
data_resto = pd.read_csv("/content/resto.zip")
data_resto.head()

Unnamed: 0,customer_id,gender,latitude_x,longitude_x,latitude_y,longitude_y,vendor_category_en,delivery_charge,serving_distance,commission,delivery_available,discount_percentage,language,rank,restaurent_rating,restaurent_tag_name,restaurant_id
0,SZ5JI7X,male,-0.505,0.095,0.6187,0.5273,Restaurants,0.0,15.0,0.0,Yes,0.0,EN,11,4.5,"American,Burgers,Free Delivery,Hot Dogs,Pasta",106
1,WKYG878,male,0.2096,0.281,-0.601,0.096,Restaurants,0.0,15.0,0.0,Yes,0.0,EN,1,4.4,"American,Burgers,Desserts,Mojitos ,Pasta",90
2,UXCWXNG,male,0.1357,-78.6,0.6187,0.5273,Restaurants,0.0,15.0,0.0,Yes,0.0,EN,11,4.5,"American,Burgers,Free Delivery,Hot Dogs,Pasta",106
3,B9HSJBN,male,-0.88,0.0755,-0.601,0.096,Restaurants,0.0,15.0,0.0,Yes,0.0,EN,1,4.4,"American,Burgers,Desserts,Mojitos ,Pasta",90
4,K3RGL6T,male,0.3879,0.5815,-0.115,0.546,Restaurants,0.7,15.0,0.0,Yes,0.0,EN,11,4.3,"American,Burgers,Fries,Sandwiches",43


In [None]:
data_resto.isnull().sum()

customer_id            0
gender                 0
latitude_x             0
longitude_x            0
latitude_y             0
longitude_y            0
vendor_category_en     0
delivery_charge        0
serving_distance       0
commission             0
delivery_available     0
discount_percentage    0
language               0
rank                   0
restaurent_rating      0
restaurent_tag_name    0
restaurant_id          0
dtype: int64

In [None]:
data_resto.shape

(100000, 17)

In [None]:
def generate_id(df, column_name, id_column):
    value_list = df[column_name].unique().tolist()
    int_value_list = [i for i in range(len(value_list))]
    id_list = []
    for i in df[column_name]:
        id_list.append(int_value_list[ value_list.index(i) ])
    df[id_column] = id_list

def similar_user_recs(user, interaction_mat, user_sim):
    if user not in interaction_mat.columns:
        return('No data available on user {}'.format(user))
    sim_users = user_sim.sort_values(by=user, ascending=False).index[1:11]
    best = []
    most_common = {}

    for i in sim_users:
        max_score = interaction_mat.loc[:, i].max()
        best.append(interaction_mat[interaction_mat.loc[:, i]==max_score].index.tolist())
    for i in range(len(best)):
        for j in best[i]:
            if j in most_common:
                most_common[j] += 1
            else:
                most_common[j] = 1
    sorted_list = sorted(most_common.items(), key=operator.itemgetter(1), reverse=True)
    return sorted_list[:5]

In [None]:
generate_id(data_resto, 'customer_id', 'id_customer')
data_resto.head()

Unnamed: 0,customer_id,gender,latitude_x,longitude_x,latitude_y,longitude_y,vendor_category_en,delivery_charge,serving_distance,commission,delivery_available,discount_percentage,language,rank,restaurent_rating,restaurent_tag_name,restaurant_id,id_customer
0,SZ5JI7X,male,-0.505,0.095,0.6187,0.5273,Restaurants,0.0,15.0,0.0,Yes,0.0,EN,11,4.5,"American,Burgers,Free Delivery,Hot Dogs,Pasta",106,0
1,WKYG878,male,0.2096,0.281,-0.601,0.096,Restaurants,0.0,15.0,0.0,Yes,0.0,EN,1,4.4,"American,Burgers,Desserts,Mojitos ,Pasta",90,1
2,UXCWXNG,male,0.1357,-78.6,0.6187,0.5273,Restaurants,0.0,15.0,0.0,Yes,0.0,EN,11,4.5,"American,Burgers,Free Delivery,Hot Dogs,Pasta",106,2
3,B9HSJBN,male,-0.88,0.0755,-0.601,0.096,Restaurants,0.0,15.0,0.0,Yes,0.0,EN,1,4.4,"American,Burgers,Desserts,Mojitos ,Pasta",90,3
4,K3RGL6T,male,0.3879,0.5815,-0.115,0.546,Restaurants,0.7,15.0,0.0,Yes,0.0,EN,11,4.3,"American,Burgers,Fries,Sandwiches",43,4


In [None]:
list_idrestoran = list(data_resto['restaurant_id'].unique())
list_idrestoran[:10]

[106, 90, 43, 82, 189, 4, 191, 192, 157, 33]

In [None]:
list_pelanggan = list(data_resto['id_customer'].unique())
list_pelanggan[:10]

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [None]:
interaction_dict = dict()
for id_name in list_idrestoran:
    temp_user_order = []
    for customer in list_pelanggan:
        temp_user_order.append( data_resto[ (data_resto['id_customer']==customer) & (data_resto['restaurant_id']==id_name) ].shape[0] )
    interaction_dict[str(id_name)] = temp_user_order

In [None]:
interaction_dataset = pd.DataFrame(interaction_dict)
interaction_dataset

Unnamed: 0,106,90,43,82,189,4,191,192,157,33,28,44,105,289,13,265,104,148,110
0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0
1,0,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1,0,0,1
2,2,2,2,1,0,2,1,1,0,0,1,1,0,0,0,1,0,1,1
3,1,1,1,0,0,0,0,0,1,1,0,1,1,0,1,0,0,1,0
4,1,0,1,2,1,2,2,1,2,0,1,1,1,0,0,0,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15262,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
15263,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
15264,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
15265,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
interaction_dataset = interaction_dataset.T
interaction_dataset

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,15227,15228,15229,15230,15231,15232,15233,15234,15235,15236,15237,15238,15239,15240,15241,15242,15243,15244,15245,15246,15247,15248,15249,15250,15251,15252,15253,15254,15255,15256,15257,15258,15259,15260,15261,15262,15263,15264,15265,15266
106,1,0,2,1,1,0,1,0,0,0,0,1,1,0,0,1,2,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,0,1,4,0,1,...,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
90,0,1,2,1,0,2,2,0,0,0,0,0,1,0,1,1,0,0,0,0,1,1,3,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
43,0,1,2,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,2,0,0,2,0,0,...,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
82,1,1,1,0,2,2,0,1,1,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,0,1,1,0,1,0,1,1,1,0,0,0,0,3,1,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
189,0,1,0,0,1,0,1,0,1,0,1,0,0,0,1,2,0,0,0,1,0,1,2,0,0,1,0,0,3,0,1,1,0,0,1,0,1,3,1,2,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1
4,0,1,2,0,2,2,1,0,0,1,0,0,0,0,0,2,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,2,0,3,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0
191,0,1,1,0,2,2,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,0,0,0,2,1,2,1,1,1,0,1,0,5,0,0,...,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0
192,0,0,1,0,1,2,0,2,0,0,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0,1,1,1,2,...,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
157,1,0,0,1,2,1,0,1,0,0,0,0,0,0,0,2,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,2,1,1,0,0,2,1,0,...,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
33,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,0,2,0,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
inter_sparse_data = sp.sparse.csr_matrix(interaction_dataset.values)
inter_sparse_data

<19x15267 sparse matrix of type '<class 'numpy.longlong'>'
	with 90005 stored elements in Compressed Sparse Row format>

In [None]:
user_similarity = cosine_similarity(inter_sparse_data.T)
user_sim = pd.DataFrame(user_similarity, index = interaction_dataset.columns, columns = interaction_dataset.columns)
user_sim

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,15227,15228,15229,15230,15231,15232,15233,15234,15235,15236,15237,15238,15239,15240,15241,15242,15243,15244,15245,15246,15247,15248,15249,15250,15251,15252,15253,15254,15255,15256,15257,15258,15259,15260,15261,15262,15263,15264,15265,15266
0,1.000000,0.166667,0.306186,0.333333,0.588348,0.283473,0.129099,0.433013,0.447214,0.000000,0.500000,0.250000,0.176777,0.223607,0.158114,0.464238,0.235702,0.250000,0.000000,0.333333,0.000000,0.500000,0.092848,0.377964,0.000000,0.447214,0.188982,0.250000,0.218218,0.000000,0.166667,0.121268,0.416025,0.223607,0.223607,0.000000,0.204124,0.429058,0.530330,0.182574,...,0.000000,0.353553,0.000000,0.707107,0.000000,0.000000,0.353553,0.000000,0.500000,0.000000,0.000000,0.000000,0.500000,0.500000,0.000000,0.000000,0.500000,0.0,0.000000,0.500000,0.500000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.500000,0.000000,0.000000,0.000000,0.000000,0.000000
1,0.166667,1.000000,0.680414,0.333333,0.588348,0.692935,0.688530,0.384900,0.447214,0.500000,0.500000,0.166667,0.353553,0.447214,0.632456,0.618984,0.392837,0.333333,0.000000,0.666667,0.577350,0.444444,0.680883,0.251976,0.166667,0.298142,0.629941,0.166667,0.581914,0.333333,0.444444,0.565916,0.462250,0.298142,0.447214,0.623610,0.136083,0.730988,0.353553,0.547723,...,0.333333,0.000000,0.333333,0.000000,0.000000,0.235702,0.235702,0.471405,0.000000,0.000000,0.333333,0.000000,0.000000,0.000000,0.000000,0.235702,0.333333,0.0,0.000000,0.000000,0.000000,0.000000,0.333333,0.235702,0.000000,0.471405,0.333333,0.333333,0.000000,0.0,0.333333,0.000000,0.000000,0.000000,0.000000,0.333333,0.000000,0.000000,0.333333,0.333333
2,0.306186,0.680414,1.000000,0.544331,0.680545,0.771517,0.737865,0.412479,0.182574,0.510310,0.204124,0.510310,0.649519,0.456435,0.516398,0.492764,0.433013,0.306186,0.000000,0.476290,0.471405,0.544331,0.568574,0.385758,0.306186,0.182574,0.617213,0.408248,0.356348,0.204124,0.272166,0.544581,0.509525,0.456435,0.593366,0.600099,0.333333,0.739574,0.360844,0.447214,...,0.204124,0.433013,0.204124,0.000000,0.000000,0.288675,0.144338,0.433013,0.000000,0.204124,0.408248,0.204124,0.408248,0.000000,0.000000,0.144338,0.204124,0.0,0.204124,0.000000,0.000000,0.204124,0.408248,0.144338,0.000000,0.144338,0.204124,0.408248,0.000000,0.0,0.204124,0.204124,0.204124,0.000000,0.408248,0.408248,0.204124,0.000000,0.000000,0.000000
3,0.333333,0.333333,0.544331,1.000000,0.457604,0.440959,0.602464,0.192450,0.000000,0.166667,0.000000,0.333333,0.353553,0.298142,0.316228,0.495188,0.628539,0.000000,0.333333,0.444444,0.192450,0.444444,0.557086,0.503953,0.500000,0.298142,0.629941,0.333333,0.145479,0.166667,0.222222,0.323381,0.554700,0.298142,0.596285,0.178174,0.408248,0.603860,0.235702,0.426006,...,0.000000,0.235702,0.000000,0.235702,0.333333,0.235702,0.235702,0.235702,0.000000,0.000000,0.333333,0.000000,0.333333,0.333333,0.333333,0.000000,0.000000,0.0,0.333333,0.000000,0.000000,0.333333,0.000000,0.235702,0.333333,0.235702,0.000000,0.000000,0.333333,0.0,0.000000,0.333333,0.000000,0.333333,0.333333,0.000000,0.333333,0.333333,0.000000,0.000000
4,0.588348,0.588348,0.680545,0.457604,1.000000,0.778312,0.557007,0.679366,0.526235,0.392232,0.588348,0.490290,0.416025,0.438529,0.558156,0.691939,0.369800,0.490290,0.000000,0.653720,0.339683,0.653720,0.437014,0.444750,0.294174,0.526235,0.370625,0.294174,0.513553,0.294174,0.588348,0.570782,0.598321,0.701646,0.526235,0.419314,0.400320,0.822753,0.624038,0.465475,...,0.000000,0.277350,0.392232,0.416025,0.196116,0.277350,0.416025,0.416025,0.196116,0.196116,0.196116,0.196116,0.196116,0.392232,0.196116,0.138675,0.392232,0.0,0.196116,0.196116,0.196116,0.196116,0.392232,0.277350,0.000000,0.000000,0.392232,0.392232,0.196116,0.0,0.392232,0.196116,0.196116,0.196116,0.196116,0.392232,0.196116,0.196116,0.196116,0.196116
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15262,0.000000,0.333333,0.408248,0.000000,0.392232,0.377964,0.258199,0.000000,0.000000,0.500000,0.000000,0.000000,0.000000,0.000000,0.000000,0.371391,0.000000,0.500000,0.000000,0.000000,0.000000,0.000000,0.185695,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.242536,0.000000,0.447214,0.223607,0.534522,0.000000,0.286039,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000
15263,0.000000,0.000000,0.204124,0.333333,0.196116,0.188982,0.000000,0.000000,0.000000,0.500000,0.000000,0.000000,0.353553,0.000000,0.316228,0.000000,0.000000,0.000000,0.000000,0.333333,0.000000,0.333333,0.000000,0.377964,0.000000,0.000000,0.377964,0.000000,0.000000,0.000000,0.000000,0.485071,0.554700,0.447214,0.000000,0.000000,0.000000,0.190693,0.353553,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.707107,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000
15264,0.000000,0.000000,0.000000,0.333333,0.196116,0.188982,0.258199,0.288675,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.316228,0.000000,0.235702,0.000000,0.000000,0.000000,0.000000,0.333333,0.185695,0.377964,0.500000,0.000000,0.000000,0.000000,0.000000,0.000000,0.333333,0.000000,0.000000,0.000000,0.223607,0.000000,0.408248,0.095346,0.000000,0.182574,...,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.707107,0.000000,0.000000,0.000000,0.000000,1.000000,0.0,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000
15265,0.000000,0.333333,0.000000,0.000000,0.196116,0.000000,0.258199,0.000000,0.447214,0.000000,0.500000,0.000000,0.000000,0.000000,0.316228,0.371391,0.000000,0.000000,0.000000,0.333333,0.000000,0.333333,0.371391,0.000000,0.000000,0.447214,0.000000,0.000000,0.654654,0.000000,0.333333,0.242536,0.000000,0.000000,0.223607,0.000000,0.408248,0.286039,0.353553,0.365148,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,1.000000


In [None]:
similar_user_recs(0, interaction_dataset, user_sim)

[('82', 10), ('104', 10), ('157', 8), ('106', 7), ('105', 1)]