In [1]:
import pandas as pd
import numpy as np
from ast import literal_eval
from sklearn.preprocessing import MultiLabelBinarizer

In [2]:
data = pd.read_csv("../data/resturants_singapore_yelp.csv")

# retrieve wanted columns
features = ['id', 'categories']
restaurant_matrix = data[features]

# change categories data type from str -> list
restaurant_matrix['categories'] = restaurant_matrix['categories'].apply(literal_eval)

# Perform One-Hot Encoding
mlb = MultiLabelBinarizer(sparse_output=True)

restaurant_matrix = restaurant_matrix.join(
            pd.DataFrame.sparse.from_spmatrix(
                mlb.fit_transform(restaurant_matrix.pop('categories')),
                index=restaurant_matrix.index,
                columns=mlb.classes_))

restaurant_matrix.memory_usage()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  restaurant_matrix['categories'] = restaurant_matrix['categories'].apply(literal_eval)


Index            128
id             21160
afghani            8
african            8
arts               8
               ...  
waffles           24
wine_bars         88
wineries          16
womenscloth        8
yakitori          32
Length: 165, dtype: int64

In [3]:
type(restaurant_matrix)

pandas.core.frame.DataFrame

In [10]:
restaurant_matrix.loc[restaurant_matrix['id'] == "eCWTbl5onXRM_5z3zGTtew"].sum(axis=1)

  restaurant_matrix.loc[restaurant_matrix['id'] == "MkGYR-ijzg2lpVK_nkemQg"].sum(axis=1)
  restaurant_matrix.loc[restaurant_matrix['id'] == "MkGYR-ijzg2lpVK_nkemQg"].sum(axis=1)


3    2
dtype: int64

User Matrix

In [5]:
user_input = ['5Z0DVwlOtra6CSx_HWf04Q', 'b-q1UdGaEazAxhCtJeKRKQ', 'M-PobdAR2gJhPyLR_WzMCQ']

user_matrix = restaurant_matrix.loc[data['id'].isin(user_input)]

user_matrix

Unnamed: 0,id,afghani,african,arts,asianfusion,australian,bagels,bakeries,bars,bbq,...,turkish,vegan,vegetarian,venues,vietnamese,waffles,wine_bars,wineries,womenscloth,yakitori
2,5Z0DVwlOtra6CSx_HWf04Q,0,0,0,0,0,0,0,0,0,...,0,1,1,0,0,0,0,0,0,0
13,b-q1UdGaEazAxhCtJeKRKQ,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
82,M-PobdAR2gJhPyLR_WzMCQ,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


User Profile

In [6]:
user_profile = user_matrix.sum(axis='rows')[1:]

# converting numpy.int32 -> int
user_profile = user_profile.apply(lambda x: x.item() if isinstance(x, np.int32) else x)

# adding weights to each category
total_score = user_profile.sum()
user_profile = user_profile.apply(lambda x : x/total_score if isinstance(x, int) else x)


type(user_profile)

pandas.core.series.Series

Weighted Restaurant Matrix

In [19]:
weighted_restaurant_matrix = restaurant_matrix.copy()

for alias, weight in user_profile.items():
    weighted_restaurant_matrix[alias] = weighted_restaurant_matrix[alias].apply(lambda x : x * user_profile[alias])

weighted_restaurant_matrix['weighted_average'] = weighted_restaurant_matrix.sum(axis=1)

weighted_restaurant_matrix

  weighted_restaurant_matrix['weighted_average'] = weighted_restaurant_matrix.sum(axis=1)
  weighted_restaurant_matrix['weighted_average'] = weighted_restaurant_matrix.sum(axis=1)


Unnamed: 0,id,afghani,african,arts,asianfusion,australian,bagels,bakeries,bars,bbq,...,vegan,vegetarian,venues,vietnamese,waffles,wine_bars,wineries,womenscloth,yakitori,weighted_average
0,oW2PgLgibBGo1bSVWW4tRw,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
1,p7o1QXgRLBmba8qDb7h6DA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
2,5Z0DVwlOtra6CSx_HWf04Q,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.142857,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.285714
3,MkGYR-ijzg2lpVK_nkemQg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
4,x4cC5VI54ZnOrkMy3Zk5xw,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2640,zhwRY36CieIxV1WDv4MjFg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
2641,KAZY1yUamggANWXuFG2m9w,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
2642,4TkfHJ1RvMcGk-eGpFqAsg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
2643,JxZweJ0T-1LTBbuPq56zXg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000


Restaurant Recommendations

In [20]:
weighted_restaurant_matrix.sort_values("weighted_average", ascending=False)[['id', 'weighted_average']].head(10)

Unnamed: 0,id,weighted_average
13,b-q1UdGaEazAxhCtJeKRKQ,0.571429
2018,eCWTbl5onXRM_5z3zGTtew,0.428571
82,M-PobdAR2gJhPyLR_WzMCQ,0.428571
2028,6MIcHNUrsAS6BxHIAsKSXA,0.428571
2185,37jl3pgAjRxvr1d4gz_L3A,0.428571
763,tk2M4DzOo-KLHFFRH96g4Q,0.285714
990,D2Fn1MAv_pugKsDKjDsHnA,0.285714
1281,aFiCaAyKxBB6PI3hyV3cRQ,0.285714
474,8GqMPYFlhilRnqht7B_sdg,0.285714
2213,Zcaf2RPoQIwi7nazM7nTiQ,0.285714


In [33]:
data.iloc[1324, :]

id                                          boT6KjtpTc_4xzJ66oWPjA
alias                                        you-and-mee-singapore
name                                                     You & Mee
image_url        https://s3-media2.fl.yelpcdn.com/bphoto/BQ8lI1...
is_closed                                                    False
url              https://www.yelp.com/biz/you-and-mee-singapore...
review_count                                                     1
categories                                             ['chinese']
rating                                                         1.0
coordinates                                 [1.42942, 103.8358994]
transactions                                                    []
location                                          930 Yishun Ave 2
phone                                                 6567593153.0
display_phone                                        +65 6759 3153
distance                                               2077.24

Future Implementations:

* Features can be improved. Features only consist of alias keys within category.
* Limit recommendation to user's search area
* Only recommend restaurant that is open
* Other features such as review_count and rating can be considered