In [1]:
import pandas as pd 
import numpy as np 
import random
import math
from math import radians, cos, sin, asin, sqrt

import matplotlib.pyplot as plt # data visualization
import seaborn as sns
%matplotlib inline

# display trick to display all columns of large dataframes
from IPython.display import display
pd.set_option('display.max_columns', None)
pd.options.mode.chained_assignment = None

### Loading network data

In [2]:
network_df = pd.read_csv('yelp_dataset/yelp_network_data.csv')
print(network_df.shape)
#network_df.head()
network_df_list = network_df
network_df_list['friends'] = network_df_list.friends.apply(lambda x: x.split(','))
network_df_list.head()

(334326, 23)


Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours,review_id,user_id,review_stars,useful,funny,cool,text,date,friends
0,51M2Kk903DFYI6gnB5I6SQ,USE MY GUY SERVICES LLC,4827 E Downing Cir,Mesa,AZ,85205,33.428065,-111.726648,4.5,26.0,1.0,"{'BusinessAcceptsCreditCards': 'True', 'ByAppo...","Home Services, Plumbing, Electricians, Handyma...","{'Monday': '0:0-0:0', 'Tuesday': '9:0-16:0', '...",EY8t3ndAZo2vWY7eeOnVLw,nKBtfZ93gPYybGEz2QOvTQ,5.0,0.0,1.0,0.0,UMGS does an amazing job serving the community...,2016-01-30 03:26:19,"[fnlLgrXzfFZhMqA5G8MeJQ, WzKaL2lws_-wSnwYU_II..."
1,51M2Kk903DFYI6gnB5I6SQ,USE MY GUY SERVICES LLC,4827 E Downing Cir,Mesa,AZ,85205,33.428065,-111.726648,4.5,26.0,1.0,"{'BusinessAcceptsCreditCards': 'True', 'ByAppo...","Home Services, Plumbing, Electricians, Handyma...","{'Monday': '0:0-0:0', 'Tuesday': '9:0-16:0', '...",uoujAVvyx-GLyQnByuon0w,UjVtviHTm2mgZnXCfl33CQ,5.0,0.0,0.0,0.0,I called Connie needing some stuff done on a S...,2016-03-02 20:15:09,"[cssHBZ55fJ8hHXOYfW1o5w, EBrLq4Bi2sFKkOUC80P1..."
2,51M2Kk903DFYI6gnB5I6SQ,USE MY GUY SERVICES LLC,4827 E Downing Cir,Mesa,AZ,85205,33.428065,-111.726648,4.5,26.0,1.0,"{'BusinessAcceptsCreditCards': 'True', 'ByAppo...","Home Services, Plumbing, Electricians, Handyma...","{'Monday': '0:0-0:0', 'Tuesday': '9:0-16:0', '...",pECK3p9w7m-_xEp--lGxHg,L498DJb5YDAtoqgv9thWCg,5.0,0.0,0.0,0.0,We are selling our home and needed our back fa...,2017-06-07 18:24:25,"[cTi5rF54rDsffJhGUjqC5Q, kuk9VQFkn42GKCcmaLBk..."
3,51M2Kk903DFYI6gnB5I6SQ,USE MY GUY SERVICES LLC,4827 E Downing Cir,Mesa,AZ,85205,33.428065,-111.726648,4.5,26.0,1.0,"{'BusinessAcceptsCreditCards': 'True', 'ByAppo...","Home Services, Plumbing, Electricians, Handyma...","{'Monday': '0:0-0:0', 'Tuesday': '9:0-16:0', '...",981LKlJg8emqRshseIdMrA,L498DJb5YDAtoqgv9thWCg,5.0,2.0,2.0,1.0,We are selling our home and needed our back fa...,2018-04-04 21:39:04,"[cTi5rF54rDsffJhGUjqC5Q, kuk9VQFkn42GKCcmaLBk..."
4,b8QAxQUBy14H6AJt7MUMgw,Anytime Auto Glass,"2659 W Guadalupe Rd, Ste D-202",Mesa,AZ,85202,33.363646,-111.892016,4.5,90.0,1.0,"{'ByAppointmentOnly': 'True', 'WiFi': ""u'free'...","Automotive, Home Services, Auto Glass Services...","{'Monday': '6:0-18:0', 'Tuesday': '6:0-18:0', ...",xKVw-kj0ia4XLrK6war2Ow,L498DJb5YDAtoqgv9thWCg,5.0,1.0,0.0,0.0,"Thanks to Rey and Marty, they replaced a winds...",2018-02-08 21:46:59,"[cTi5rF54rDsffJhGUjqC5Q, kuk9VQFkn42GKCcmaLBk..."


### Recommender 
For an arbitrary user and arbitrary home services category, chooses top $k$ businesses based on their friends' ratings of businesses in that category and in the same locale in which the user resides, based on the general location he or she is associated with in review(s) left

#### Distance function

In [3]:
AVG_EARTH_RADIUS = 6371  # in km

def haversine(point1, point2):
    '''
    Calculate the great-circle distance bewteen two points on the Earth surface
    point1, point2: two tuples, containing the latitude and longitude of each point (decimal degrees)
    Returns the distance bewteen point1 and point2 (kilometers)
    '''
    # unpack latitude/longitude
    lat1, lng1 = point1
    lat2, lng2 = point2

    # convert all latitudes/longitudes from decimal degrees to radians
    lat1, lng1, lat2, lng2 = map(radians, (lat1, lng1, lat2, lng2))

    # calculate haversine
    lat = lat2 - lat1
    lng = lng2 - lng1
    d = sin(lat * 0.5) ** 2 + cos(lat1) * cos(lat2) * sin(lng * 0.5) ** 2
    h = 2 * AVG_EARTH_RADIUS * asin(sqrt(d))
    return h  # in kilometers

In [30]:
def recommend(user_id, category, k):
    user_reviews_df = network_df_list.loc[network_df['user_id'] == user_id]
    user_pos = (user_reviews_df.iloc[0]['latitude'], user_reviews_df.iloc[0]['longitude'])
    user_friends = user_reviews_df.iloc[0]['friends']
    user_friends.append(user_id) # adding user's own past reviews

    appended_data_list = []
    for friend in user_friends:
        #print(friend)
        friend_reviews_df = network_df_list.loc[network_df_list['user_id'] == friend]
        #print(friend_reviews_df)
        appended_data_list.append(friend_reviews_df)
    appended_data = pd.concat(appended_data_list) # all of users' friends review info
    
    # filtering by category and location
    appended_data = appended_data[appended_data['categories'].str.contains(category, case=False, na=False)]
    appended_data['distance'] = appended_data.apply(lambda x:haversine(user_pos, (x['latitude'], x['longitude'])), axis=1)
    unsorted_data = appended_data[appended_data['distance'] <= 50]
    sorted_data = unsorted_data.sort_values(by=['stars'], ascending=False).drop_duplicates('review_id')
    recommended = sorted_data[['name', 'address', 'city', 'state', 'postal_code', 'stars', 'review_count']].head(k)
    return recommended.groupby(['name', 'address', 'city', 'state', 'postal_code']).mean().sort_values(by=['stars'], ascending=False).reset_index()

In [31]:
# example run
user_id = 'L498DJb5YDAtoqgv9thWCg' #'P0gWNnfLRzKaBQ5IY4GFPg' #'nKBtfZ93gPYybGEz2QOvTQ'
category = 'Home Services'
k = 10
output = recommend(user_id, category, k)
print(output.shape)
output.head(k)

(6, 7)


Unnamed: 0,name,address,city,state,postal_code,stars,review_count
0,Convenient Termite & Pest Control LLC,"6747 E University Dr, Ste B",Mesa,AZ,85205,5.0,19.0
1,Anytime Auto Glass,"2659 W Guadalupe Rd, Ste D-202",Mesa,AZ,85202,4.5,90.0
2,USE MY GUY SERVICES LLC,4827 E Downing Cir,Mesa,AZ,85205,4.5,26.0
3,Freedom Landscaping,21459 S 187th Way,Queen Creek,AZ,85142,4.0,9.0
4,Copper State Home Builders,2903 N Norfolk Dr,Mesa,AZ,85215,3.5,68.0
5,Guild Mortgage,"1400 N Gilbert Rd, Ste B",Gilbert,AZ,85234,3.5,3.0
