In [1]:
#import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from surprise import Reader
from surprise import Dataset
from surprise import accuracy
from surprise.model_selection import train_test_split
from surprise import KNNBasic

import pymongo
from pymongo import MongoClient

In [2]:
# Initialize mongoDB client
client = MongoClient()
db = client.yelp

In [3]:
# Gather businesses data from DB
# create new collection with split categories
db.temp_businesses.drop()
agr = [{'$project': {'business_id':1,'name':1,'address':1,'city':1,'state':1,'postal_code':1,'latitude':1,'longitude':1,'stars':1,'review_count':1,'is_open':1,'attributes':1,'categories': {'$split':["$categories",","]},'hours':1}},{'$out': 'temp_businesses'}]
businesses_agr = db.businesses.aggregate(agr)
businesses_collection = db.temp_businesses
businesses_df = pd.DataFrame(list(businesses_collection.find()))
businesses_count = businesses_df.shape[0]

In [4]:
# Obtain random business from businesses dataframe
random_number = np.random.randint(1,businesses_count)
business = businesses_df.iloc[random_number]
business

_id                                      5e925b5825c46ba0c36ad84b
business_id                                YosCsvRg4fNCdNQ6DrREqw
name                                                         Link
address           Sheraton Café, 140 Richmond Street W, Suite 100
city                                                      Toronto
state                                                          ON
postal_code                                               M5H 3K6
latitude                                                  43.6511
longitude                                                -79.3844
stars                                                           4
review_count                                                    5
is_open                                                         1
attributes      {'Ambience': '{'touristy': False, 'hipster': F...
hours                                                        None
categories                                  [Cafes,  Restaurants]
Name: 1026

In [5]:
# Get categories array
categories = business.get('categories')

In [6]:
# Get Businesses in categories ($all for all in categories, $in for any in categories)
businesses_in_categories_df = pd.DataFrame(list(businesses_collection.find({ "categories": {"$all": categories} })))
business_ids_in_categories = businesses_in_categories_df['business_id'].tolist()

In [7]:
# Now obtain all useful (greater than zero) reviews from the group of businesses_in_categories_df
reviews_df = pd.DataFrame(list(db.reviews.find({'business_id': {'$in': business_ids_in_categories}, 'useful': {'$gt': 0}})))
reviews_df.stars = reviews_df.stars.astype('int')

In [8]:
# Create Surprise data
reader = Reader( rating_scale= (1,5))
surprise_dataset = Dataset.load_from_df(reviews_df[['business_id','user_id','stars']], reader)
train_set, test_set = train_test_split(surprise_dataset, test_size=.2)

In [9]:
# Setup algorithm using KNNBasic
sim_options = {'name': 'cosine', 'user_based': True}
algo = KNNBasic(k = 10, min_k=5, sim_options=sim_options)
algo.fit(train_set)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x113be2280>

In [10]:
# Get neighbour ids of selected business
business_inner_id = algo.trainset.to_inner_uid(business.get('business_id'))
business_neighbours_inner_ids = algo.get_neighbors(business_inner_id,k=10)
business_neighbours_ids = (algo.trainset.to_raw_uid(rid) for rid in business_neighbours_inner_ids)
business_neighbours = []
for neighbour in business_neighbours_ids:
    business_neighbours.append(neighbour)

In [11]:
# Query business ids to obtain dataframe of neighbours
businesses_neighbours_df = pd.DataFrame(list(businesses_collection.find({ "business_id": {"$in": business_neighbours} })))
businesses_neighbours_df

Unnamed: 0,_id,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,hours,categories
0,5e925b5525c46ba0c3698398,39sCYNzSYk_z8v_KoLH0KA,Juliette & Chocolat,1615 Rue Saint-Denis,Montréal,QC,H2X 3K3,45.514915,-73.56273,4.0,215,1,"{'RestaurantsAttire': 'u'casual'', 'Restaurant...","{'Monday': '11:0-23:0', 'Tuesday': '11:0-23:0'...","[Cafes, Desserts, Chocolatiers & Shops, Spe..."
1,5e925b5625c46ba0c369d08f,VaQhUU3P4Z5A3oYPsEMbkw,Tazza D'oro,1125 N Highland Ave,Pittsburgh,PA,15206,40.474888,-79.918866,4.5,165,1,"{'GoodForKids': 'True', 'RestaurantsTakeOut': ...","{'Monday': '7:0-19:0', 'Tuesday': '7:0-19:0', ...","[Cafes, Restaurants, Delis, Food, Coffee &..."
2,5e925b5625c46ba0c36a0632,s1b39BK1WFRY9200fwFCmg,Wildflower,3111 W Chandler Blvd,Chandler,AZ,85226,33.301694,-111.897176,4.0,214,1,"{'Alcohol': 'u'none'', 'RestaurantsGoodForGrou...","{'Monday': '7:0-21:0', 'Tuesday': '7:0-21:0', ...","[Cafes, Coffee & Tea, Restaurants, Bakeries..."
3,5e925b5725c46ba0c36a516d,N5B7OBTyNGgwU_Y2fwNcyw,Phoenix Public Market Cafe,14 E Pierce St,Phoenix,AZ,85004,33.455843,-112.073526,4.0,1131,1,"{'WiFi': 'u'free'', 'GoodForKids': 'True', 'Bu...","{'Monday': '0:0-0:0', 'Tuesday': '7:0-20:0', '...","[Cafes, Restaurants, Food Trucks, Sandwiche..."
4,5e925b5725c46ba0c36aa0d4,cQK9M2JAwETQnnBoYyua5A,The Senator,249 Victoria Street,Toronto,ON,M5B 1T8,43.655749,-79.378992,4.0,471,1,"{'BikeParking': 'True', 'OutdoorSeating': 'Fal...","{'Monday': '8:0-14:0', 'Tuesday': '7:30-21:0',...","[Cafes, Diners, Desserts, Coffee & Tea, Br..."
5,5e925b5725c46ba0c36ab78c,t6WY1IrohUecqNjd9bG42Q,Red Velvet Cafe,"7875 W Sahara Ave, Ste 103",Las Vegas,NV,89117,36.143204,-115.262866,3.5,351,0,"{'Alcohol': 'u'none'', 'BikeParking': 'True', ...","{'Monday': '10:30-20:0', 'Tuesday': '10:30-20:...","[Cafes, Restaurants, American (New), Vegan,..."
6,5e925b5925c46ba0c36b4776,piHJq_NDrOh5SYk3AyQVOg,Chestnut Fine Foods & Provisions,"4350 E Camelback Rd, Bldg I-100",Phoenix,AZ,85018,33.509826,-111.987098,3.5,301,1,"{'RestaurantsTableService': 'False', 'Restaura...","{'Monday': '0:0-0:0', 'Tuesday': '7:0-14:0', '...","[Cafes, Breakfast & Brunch, Ramen, Restaura..."
7,5e925b5a25c46ba0c36bec5d,VWtT4DKxQ4YVRWT88Z5SMA,Sambalatte,"6555 S Jones Blvd, Ste 100",Las Vegas,NV,89118,36.069822,-115.224872,4.0,360,1,"{'GoodForKids': 'True', 'OutdoorSeating': 'Tru...","{'Monday': '7:0-18:0', 'Tuesday': '7:0-18:0', ...","[Cafes, Coffee & Tea, Food, Restaurants]"
8,5e925b5a25c46ba0c36bef22,frCxZS7lPhEnQRJ3UY6m7A,La Santisima Gourmet Taco Shop,1919 N 16th St,Phoenix,AZ,85006,33.469201,-112.047381,4.0,2152,1,"{'RestaurantsTakeOut': 'True', 'RestaurantsPri...","{'Monday': '11:0-22:0', 'Tuesday': '11:0-22:0'...","[Cafes, New Mexican Cuisine, Nightlife, Veg..."
9,5e925b5b25c46ba0c36c6633,LhZsPzCOVdYobQy9b5dPFA,Estonian House Cafe,958 Broadview Avenue,East York,ON,M4K 2R6,43.681798,-79.357908,4.0,3,1,"{'GoodForKids': 'True', 'WiFi': 'u'free'', 'Re...",,"[Cafes, Restaurants]"
