In [1]:
#import seaborn as sns
import numpy as np
import pandas as pd

from surprise import Reader
from surprise import Dataset
from surprise import accuracy
from surprise.model_selection import train_test_split
from surprise import SVD

from sklearn.metrics import pairwise_distances

import pymongo
from pymongo import MongoClient

In [2]:
# Initialize mongoDB client
client = MongoClient()
db = client.yelp

In [3]:
# Gather businesses data from DB
# create new collection with split categories
db.temp_businesses.drop()
agr = [{'$project': {'business_id':1,'name':1,'address':1,'city':1,'state':1,'postal_code':1,'latitude':1,'longitude':1,'stars':1,'review_count':1,'is_open':1,'attributes':1,'categories': {'$split':["$categories",","]},'hours':1}},{'$out': 'temp_businesses'}]
businesses_agr = db.businesses.aggregate(agr)
businesses_collection = db.temp_businesses
businesses_df = pd.DataFrame(list(businesses_collection.find()))
businesses_count = businesses_df.shape[0]

In [4]:
# Obtain random business from businesses dataframe
random_number = np.random.randint(1,businesses_count)
business = businesses_df.iloc[random_number]
business

_id                                      5e925b5925c46ba0c36b62f9
business_id                                Fuqtf6fcUpzcl4qIuohCpQ
name                                         Makeup By Sheena Zar
address                                                          
city                                                    Las Vegas
state                                                          NV
postal_code                                                 89101
latitude                                                   36.175
longitude                                                -115.136
stars                                                           5
review_count                                                   19
is_open                                                         1
attributes      {'BusinessAcceptsBitcoin': 'False', 'WiFi': 'u...
hours           {'Monday': '5:0-0:0', 'Tuesday': '5:0-0:0', 'W...
categories      [Beauty & Spas,  Makeup Artists,  Skin Care,  ...
Name: 1381

In [5]:
# Get categories array
categories = business.get('categories')

In [6]:
# Get Businesses in categories ($all for all in categories, $in for any in categories)
businesses_in_categories_df = pd.DataFrame(list(businesses_collection.find({ "categories": {"$all": categories} })))
business_ids_in_categories = businesses_in_categories_df['business_id'].tolist()

In [7]:
# Now obtain all useful (greater than zero) reviews from the group of businesses_in_categories_df
reviews_df = pd.DataFrame(list(db.reviews.find({'business_id': {'$in': business_ids_in_categories}, 'useful': {'$gt': 0}})))
reviews_df.stars = reviews_df.stars.astype('int')

In [8]:
# Create Surprise data
reader = Reader( rating_scale= (1,5))
surprise_dataset = Dataset.load_from_df(reviews_df[['business_id','user_id','stars']], reader)
train_set, test_set = train_test_split(surprise_dataset, test_size=.2)

In [9]:
# Setup algorithm using SVD
algo = SVD( n_factors = 5, n_epochs = 200, biased = True, lr_all = 0.005, reg_all = 0, init_mean = 0, init_std_dev = 0.01, verbose = False )
algo.fit(train_set)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x10d6f8730>

In [10]:
# Obtain pu matrix and get our business vector
pu = algo.pu
business_vector_innerid = train_set.to_inner_uid(business.get('business_id'))
business_vector = pu[business_vector_innerid]

In [11]:
# Calculate the euclidean distance between our business and everything else
distances = pairwise_distances(pu,business_vector.reshape(1, -1),'euclidean')
# Obtain an indirect array with the indices of the sorted distances array
ordered_distances_index = np.argsort(distances.flatten())

In [12]:
# If number of neighbours is greater than number_of_neighbours, return first number_of_neighbours, otherwise return what we have
number_of_neighbours = 10
neighbour_indices = []
if ordered_distances_index.shape[0] > number_of_neighbours:
    neighbour_indices = ordered_distances_index[1:number_of_neighbours+1]
else:
    neighbour_indices = ordered_distances_index

In [13]:
# Get neighbour ids of selected business
business_neighbours_ids = (train_set.to_raw_uid(rid) for rid in neighbour_indices)
business_neighbours = []
for neighbour in business_neighbours_ids:
    business_neighbours.append(neighbour)

In [14]:
# Query business ids to obtain dataframe of neighbours
businesses_neighbours_df = pd.DataFrame(list(businesses_collection.find({ "business_id": {"$in": business_neighbours} })))
businesses_neighbours_df

Unnamed: 0,_id,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,hours,categories
0,5e925b5425c46ba0c3694bf1,06CZ8ZPsWCj8Hle_0AHpVQ,Mica Beauty Cosmetics,"7014 E Camelback Rd, Ste 2268",Scottsdale,AZ,85251,33.503895,-111.929438,1.0,4,0,,,"[Beauty & Spas, Cosmetics & Beauty Supply, H..."
1,5e925b5625c46ba0c36a09b5,5qc1Xv36E36DF6zCBG51Cw,Globe Salon,900 S Las Vegas Blvd,Las Vegas,NV,89101,36.159999,-115.147533,4.5,146,1,"{'BusinessAcceptsCreditCards': 'True', 'Busine...","{'Monday': '0:0-0:0', 'Tuesday': '10:0-17:0', ...","[Beauty & Spas, Nail Salons, Skin Care, Hai..."
2,5e925b5725c46ba0c36a80c4,B7n1ROadKJn-RIVYxbfIng,Beauticians On The Go,"2390 E Camelback Rd, Ste 130",Phoenix,AZ,85016,33.509687,-112.031895,4.0,7,1,"{'BikeParking': 'False', 'WheelchairAccessible...","{'Monday': '0:0-0:0', 'Tuesday': '8:0-20:0', '...","[Beauty & Spas, Makeup Artists, Hair Extensi..."
3,5e925b5725c46ba0c36aa833,6LZMjOagYR6SVJc9OL1L9g,Tease & Snap,"2530 Saint Rose Pkwy, Ste 110",Henderson,NV,89074,36.018099,-115.101225,4.5,23,0,"{'GoodForKids': 'True', 'BusinessParking': '{'...","{'Wednesday': '9:30-17:0', 'Thursday': '9:30-1...","[Beauty & Spas, Blow Dry/Out Services, Skin ..."
4,5e925b5925c46ba0c36b62f9,Fuqtf6fcUpzcl4qIuohCpQ,Makeup By Sheena Zar,,Las Vegas,NV,89101,36.175,-115.136389,5.0,19,1,"{'BusinessAcceptsBitcoin': 'False', 'WiFi': 'u...","{'Monday': '5:0-0:0', 'Tuesday': '5:0-0:0', 'W...","[Beauty & Spas, Makeup Artists, Skin Care, ..."
5,5e925b5925c46ba0c36b6375,abkRbKUpoDcSyaRenh10UA,Xpressions Salon and Day Spa,"1371 West Warm Springs, Ste A",Henderson,NV,89014,36.055558,-115.044977,4.0,28,1,"{'ByAppointmentOnly': 'False', 'RestaurantsPri...","{'Monday': '8:0-18:0', 'Tuesday': '8:0-18:0', ...","[Beauty & Spas, Nail Salons, Hair Extensions..."
6,5e925b5a25c46ba0c36bbc96,SfQGRw4hJ1qLbDGBdQfIpg,Leidan Mitchell Salon,2177 W Queen Creek Rd,Chandler,AZ,85248,33.260613,-111.881864,4.5,51,1,"{'BusinessAcceptsCreditCards': 'True', 'Busine...","{'Monday': '10:0-19:0', 'Tuesday': '9:0-21:0',...","[Beauty & Spas, Hair Stylists, Day Spas, Ma..."
7,5e925b5a25c46ba0c36c1081,HpqHmIE9KJqaUZadF-Cb2w,Sognatore,"1100 Liberty Ave, Ste 1017",Pittsburgh,PA,15222,40.444228,-79.992233,4.0,24,1,"{'GoodForKids': 'False', 'BusinessParking': '{...","{'Tuesday': '10:0-21:0', 'Wednesday': '10:0-21...","[Beauty & Spas, Hair Stylists, Men's Hair Sa..."
