In [194]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

business = pd.read_csv('businessV2.csv')
business.head()

Unnamed: 0.1,Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,...,street.vendors,fish...chips,breweries,cheesesteaks,southern,gastropubs,caribbean,cajun.creole,american,sum
0,1,gnKjwL_1w79qoiV3IC_xQQ,Musashi Japanese Restaurant,"10110 Johnston Rd, Ste 15",charlotte,NC,28210.0,35.092564,-80.859132,4.0,...,0,0,0,0,0,0,0,0,0,2
1,2,1Dfx3zM-rW4n-31KeC8sJg,Taco Bell,2450 E Indian School Rd,phoenix,AZ,85016.0,33.495194,-112.028588,3.0,...,0,0,0,0,0,0,0,0,0,5
2,3,fweCYi8FmbJXHCqLnwuk8w,Marco's Pizza,5981 Andrews Rd,mentor on the lake,OH,44060.0,41.70852,-81.359556,4.0,...,0,0,0,0,0,0,0,0,0,3
3,4,1RHY4K3BD22FK7Cfftn8Mg,Marathon Diner,"Center Core - Food Court, Fl 3, Pittsburgh Int...",pittsburgh,PA,15231.0,40.496177,-80.246011,4.0,...,0,0,0,0,0,0,0,0,0,4
4,5,tstimHoMcYbkSC4eBA1wEg,Maria's Mexican Restaurant & Bakery,6055 E Lake Mead Blvd,las vegas,NV,89156.0,36.195615,-115.040529,4.5,...,0,0,0,0,0,0,0,0,0,2


In [3]:
business.columns

Index(['Unnamed: 0', 'business_id', 'name', 'address', 'city', 'state',
       'postal_code', 'latitude', 'longitude', 'stars', 'review_count',
       'sushi.bars', 'japanese', 'breakfast...brunch', 'mexican', 'tacos',
       'tex.mex', 'fast.food', 'italian', 'pizza', 'chicken.wings',
       'sandwiches', 'salad', 'burgers', 'comfort.food', 'bars', 'vegan',
       'chinese', 'pubs', 'sports.bars', 'cafes', 'thai', 'ethnic.food',
       'asian.fusion', 'specialty.food', 'korean', 'caterers', 'hot.dogs',
       'vegetarian', 'seafood', 'french', 'buffets', 'beer', 'wine...spirits',
       'dive.bars', 'barbeque', 'delis', 'desserts', 'bakeries',
       'coffee...tea', 'mediterranean', 'middle.eastern', 'wraps', 'hawaiian',
       'poke', 'food.delivery.services', 'diners', 'soul.food',
       'juice.bars...smoothies', 'vietnamese', 'latin.american',
       'local.flavor', 'wine.bars', 'ice.cream...frozen.yogurt',
       'convenience.stores', 'food.stands', 'indian', 'meat.shops',
      

In [4]:
import random

def randsample(dat):
    n = len(dat.index)
    for x in range(5):
        rand = random.randint(1,n)
        print(dat.iloc[rand,0])
        
randsample(dat=business)

16123
117
21675
8391
16343


----
### Preprocessing the data

In [155]:
business[['city','state','postal_code','business_id','name']].isna().sum()

city            0
state           0
postal_code    41
business_id     0
name            0
dtype: int64

In [156]:
business = business[business['postal_code'].isna() == False]

In [167]:
business.shape

(24638, 91)

In [158]:
business['postal_code'] = business['postal_code'].astype('int64')
business['city'] = business['city'].str.lower() 

----

In [190]:
def content_based_filtering(x_vec = [] ,numberOfResult = 5,city = None,state = None,postalCode = None):
    
    business_visited = business[business['business_id'].isin(x_vec)]
    
    n = len(x_vec)
    if n == 0:
        return "Please select the Restaurants you have visited"
    
    business_filtered = business.loc[(business['city'].str.contains(city)) & (business['state'].str.contains(state)) & \
                 (business['postal_code']==postalCode)]
    
    business_filtered = business_filtered[-business_filtered['business_id'].isin(business_visited['business_id'])]
  
    
    business_filtered_cat = np.matrix(business_filtered.iloc[:,11:90])
    business_visited_cat = np.matrix(business_visited.iloc[:,11:90])

    sim_mat = cosine_similarity(business_visited_cat,business_filtered_cat)
    sim_sum = list(np.mean(sim_mat, axis=0))
    
    business_filtered['similarity'] = sim_sum
    business_filtered= business_filtered[business_filtered['similarity'] > 0]
    business_filtered = business_filtered.sort_values(by=['similarity'], ascending = False)

    return business_filtered.head(numberOfResult)


In [193]:
content_based_filtering(x_vec=['onC6LNdf3HIwjB6LqMjQxw','4v9nhZ5h-KScIsG7uXjfzg', 'M2Qdv-0LXV8RUmCH-Qo6xQ',
                             'SLVjLgWfUqgaXtmc0wmLiw' ],numberOfResult=10,city='las vegas',state='NV',postalCode=89156)

Unnamed: 0.1,Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,...,fish...chips,breweries,cheesesteaks,southern,gastropubs,caribbean,cajun.creole,american,sum,similarity
17632,17754,qA27MJtwlu0zbnDS0mO2Jw,Subway,"6520 E Lake Mead Blvd, Ste 100",las vegas,NV,89156.0,36.19675,-115.030427,3.0,...,0,0,0,0,0,0,0,0,2,0.227062
17958,18080,YoJt2bvCrYMSSyoeUO-YTg,PT's,6055 E Lake Mead Blvd,las vegas,NV,89156.0,36.195615,-115.040529,3.0,...,0,0,0,0,0,0,0,1,4,0.188982
11700,11780,efSwJYCHpQ5wz6hIzUiWIA,Taco Bell,"6540 E Lake Mead Blvd, Ste 205",las vegas,NV,89156.0,36.196068,-115.030179,3.0,...,0,0,0,0,0,0,0,0,3,0.185395
9648,9712,AkG4ozbKUgjDk8-Mmywsxw,Timbers - Lake Mead,6330 E Lake Mead Blvd,las vegas,NV,89156.0,36.196378,-115.034458,3.5,...,0,0,0,0,0,0,0,1,5,0.126773
4,5,tstimHoMcYbkSC4eBA1wEg,Maria's Mexican Restaurant & Bakery,6055 E Lake Mead Blvd,las vegas,NV,89156.0,36.195615,-115.040529,4.5,...,0,0,0,0,0,0,0,0,2,0.066815
8372,8421,MTkDsAaz-bQKrEB_czblUA,Pizza Hut,"6895 E. Lake Mead Blvd., #1",las vegas,NV,89156.0,36.194184,-115.025515,2.0,...,0,0,0,0,0,0,0,0,2,0.066815
18855,18982,pmsGwCJoMjtj-wwP78PIWQ,Tropicana Pizza,"5841 E Lake Mead Blvd, Ste C",las vegas,NV,89156.0,36.195791,-115.044815,3.5,...,0,0,0,0,0,0,0,0,2,0.066815
24109,24268,TUpyKJFqL_ySZMo54pT-Sw,La Flor De Michoacan Restaurant,6055 E Lake Mead Blvd,las vegas,NV,89156.0,36.195615,-115.040529,3.5,...,0,0,0,0,0,0,0,1,4,0.047246
