## Recommender Based on Universal Sentence Encoder

In [2]:
import pandas as pd
import numpy as np
import tensorflow_hub as hub
import ast

In [3]:
STOP = 100
embedder = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")
EMBEDDED_CATEGORIES_FILE = 'data/embedded_categories.npy'
EMBEDDED_ATTRIBUTES_FILE = 'data/embedded_attributes.npy'

In [4]:
def cosine_similarity(mat, vec):
    """
    Calculate cosine similarity scores

    :param mat: 2D array of all targets
    :param vec: 1D array of vector to compare to
    :return: all cosine similarity scores to vec
    """
    vec_mag = np.linalg.norm(vec)
    mat_mag = np.linalg.norm(mat, axis=1).reshape((-1, 1))
    return (1/vec_mag) * np.dot(mat, vec).reshape((-1, 1)) / mat_mag

def return_attribute_soup(input):
    """
    Converts dictionary of inputs into list of items that are true.  Can also handle nested examples

    :param input: dictionary of attributes
    :return: list of keys
    """
    current = []
    for key in input:
        # Inner dictionaries appear to be malformed in places
        try:
            value = ast.literal_eval(input[key])
        except ValueError:
            value = input[key]
        if isinstance(value, dict):
            current.extend(return_attribute_soup(value))
        elif value not in (False, 'No', 'False'):
            current.append(key)
    return current

In [5]:
df_business = pd.read_csv('data/yelp_academic_dataset_business_filtered.csv')
df_business = df_business.dropna(subset=['categories'])
df_business['attributes'] = df_business['attributes'].fillna('{}')

In [6]:
df_business['attribute_dict'] = df_business['attributes'].apply(ast.literal_eval)
df_business['attribute_soup'] = df_business['attribute_dict'].apply(return_attribute_soup)
df_business['attribute_string'] = df_business['attribute_soup'].apply(lambda x: ' '.join(map(str, x)))

In [7]:
df_business.head()


Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours,attribute_dict,attribute_soup,attribute_string
0,0,0,6iYb2HFDywm3zjuRg0shjw,Oskar Blues Taproom,921 Pearl St,Boulder,CO,80302,40.017544,-105.283348,4.0,86,1,"{'RestaurantsTableService': 'True', 'WiFi': ""u...","Gastropubs, Food, Beer Gardens, Restaurants, B...","{'Monday': '11:0-23:0', 'Tuesday': '11:0-23:0'...","{'RestaurantsTableService': 'True', 'WiFi': 'u...","[RestaurantsTableService, WiFi, BikeParking, s...",RestaurantsTableService WiFi BikeParking stree...
1,1,1,tCbdrRPZA0oiIYSmHG3J0w,Flying Elephants at PDX,7000 NE Airport Way,Portland,OR,97218,45.588906,-122.593331,4.0,126,1,"{'RestaurantsTakeOut': 'True', 'RestaurantsAtt...","Salad, Soup, Sandwiches, Delis, Restaurants, C...","{'Monday': '5:0-18:0', 'Tuesday': '5:0-17:0', ...","{'RestaurantsTakeOut': 'True', 'RestaurantsAtt...","[RestaurantsTakeOut, RestaurantsAttire, GoodFo...",RestaurantsTakeOut RestaurantsAttire GoodForKi...
2,5,5,D4JtQNTI4X3KcbzacDJsMw,Bob Likes Thai Food,3755 Main St,Vancouver,BC,V5V,49.251342,-123.101333,3.5,169,1,"{'GoodForKids': 'True', 'Alcohol': ""u'none'"", ...","Restaurants, Thai","{'Monday': '17:0-21:0', 'Tuesday': '17:0-21:0'...","{'GoodForKids': 'True', 'Alcohol': 'u'none'', ...","[GoodForKids, Alcohol, RestaurantsGoodForGroup...",GoodForKids Alcohol RestaurantsGoodForGroups R...
3,10,10,rYs_1pNB_RMtn5WQh55QDA,Chautauqua General Store,100 Clematis Dr,Boulder,CO,80302,39.998449,-105.281006,3.5,5,1,"{'BikeParking': 'True', 'RestaurantsTakeOut': ...","Food, Shopping, Convenience Stores, Souvenir S...","{'Monday': '8:0-20:0', 'Tuesday': '8:0-20:0', ...","{'BikeParking': 'True', 'RestaurantsTakeOut': ...","[BikeParking, RestaurantsTakeOut, street, Whee...",BikeParking RestaurantsTakeOut street Wheelcha...
4,12,12,HPA_qyMEddpAEtFof02ixg,Mr G's Pizza & Subs,474 Lowell St,Peabody,MA,01960,42.541155,-70.973438,4.0,39,1,"{'RestaurantsGoodForGroups': 'True', 'HasTV': ...","Food, Pizza, Restaurants","{'Monday': '11:0-21:0', 'Tuesday': '11:0-21:0'...","{'RestaurantsGoodForGroups': 'True', 'HasTV': ...","[RestaurantsGoodForGroups, HasTV, GoodForKids,...",RestaurantsGoodForGroups HasTV GoodForKids Res...


In [8]:
try:
    with open(EMBEDDED_CATEGORIES_FILE, 'rb') as f:
        embedded_categories = np.load(EMBEDDED_CATEGORIES_FILE, allow_pickle=True)

    with open(EMBEDDED_ATTRIBUTES_FILE, 'rb') as f:
        embedded_attributes = np.load(EMBEDDED_ATTRIBUTES_FILE, allow_pickle=True)

    print('EMBEDDINGS LOADED')

except IOError as e:
    print('ARRAY(S) NOT FOUND. EMBEDDING:')
    categories = list(df_business['categories'])
    embedded_categories = embedder(categories).numpy()
    np.save(EMBEDDED_CATEGORIES_FILE, embedded_categories)

    attributes = list(df_business['attribute_string'])
    embedded_attributes = embedder(attributes).numpy()
    np.save(EMBEDDED_ATTRIBUTES_FILE, embedded_attributes)
    print('EMBEDDINGS SAVED')


EMBEDDINGS LOADED


In [73]:
def get_knn(input, embedded_attributes, embedded_categories, df_business, k=5, category_weighting=0.75, min_rating=-1, verbose=False):
    """
    Get the most similar businesses by knn search and sort.  This function assumes recommendations are in the same state
    and that the restaurants are rated greater than or equal to 4 stars.

    :param input: input business, in the form of a Series
    :param embedded_attributes: services offered at the business
    :param embedded_categories: embedded categories (business type)
    :param df_business: DataFrame of all business info that must match embedded_attributes
    :param k: Number of top results desired
    :param category_weighting: emphasis placed on the contents of the restaurant.
    :return: DataFrame of similar businesses
    """
    assert df_business.shape[0] == embedded_categories.shape[0]
    assert df_business.shape[0] == embedded_attributes.shape[0]

    # Filter by state before continuing
    df_business_copy = df_business.copy()
    state = input['state']
    query_id = input['business_id']
    # List of all matching indices
    match_dex = df_business.index[(df_business['state'] == state) & (df_business['stars'] >= 4)].tolist()

    embedded_attributes = embedded_attributes[match_dex]
    embedded_categories = embedded_categories[match_dex]
    df_business_copy = df_business.loc[match_dex, :]


    query_category = input.loc['categories']
    query_attributes = input.loc['attribute_string']
    if verbose: print("INPUT BUSINESS:", input.loc['name'])
    embedded_query_category = embedder([query_category]).numpy().flatten()
    embedded_query_attributes = embedder([query_attributes]).numpy().flatten()

    categ_simil_scores = cosine_similarity(embedded_categories, embedded_query_category).flatten()
    attri_simil_scores = cosine_similarity(embedded_attributes, embedded_query_attributes).flatten()

    weighted_simil_scores = category_weighting * categ_simil_scores + (1 - category_weighting) + attri_simil_scores
    weighted_simil_scores *= 0.5

    df_business_copy['scores'] = weighted_simil_scores

    # After filtering, choose the smallest, account for perfect match
    available_businesses = weighted_simil_scores.shape[0]
    k = min(k + 1, available_businesses)

    indices = np.argsort(weighted_simil_scores)[-k:][::-1]
    ret = df_business_copy.iloc[indices, :]
    return ret[(ret['stars'] >= min_rating) & (ret['business_id'] != query_id)]

In [74]:
df_business.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours,attribute_dict,attribute_soup,attribute_string
0,0,0,6iYb2HFDywm3zjuRg0shjw,Oskar Blues Taproom,921 Pearl St,Boulder,CO,80302,40.017544,-105.283348,4.0,86,1,"{'RestaurantsTableService': 'True', 'WiFi': ""u...","Gastropubs, Food, Beer Gardens, Restaurants, B...","{'Monday': '11:0-23:0', 'Tuesday': '11:0-23:0'...","{'RestaurantsTableService': 'True', 'WiFi': 'u...","[RestaurantsTableService, WiFi, BikeParking, s...",RestaurantsTableService WiFi BikeParking stree...
1,1,1,tCbdrRPZA0oiIYSmHG3J0w,Flying Elephants at PDX,7000 NE Airport Way,Portland,OR,97218,45.588906,-122.593331,4.0,126,1,"{'RestaurantsTakeOut': 'True', 'RestaurantsAtt...","Salad, Soup, Sandwiches, Delis, Restaurants, C...","{'Monday': '5:0-18:0', 'Tuesday': '5:0-17:0', ...","{'RestaurantsTakeOut': 'True', 'RestaurantsAtt...","[RestaurantsTakeOut, RestaurantsAttire, GoodFo...",RestaurantsTakeOut RestaurantsAttire GoodForKi...
2,5,5,D4JtQNTI4X3KcbzacDJsMw,Bob Likes Thai Food,3755 Main St,Vancouver,BC,V5V,49.251342,-123.101333,3.5,169,1,"{'GoodForKids': 'True', 'Alcohol': ""u'none'"", ...","Restaurants, Thai","{'Monday': '17:0-21:0', 'Tuesday': '17:0-21:0'...","{'GoodForKids': 'True', 'Alcohol': 'u'none'', ...","[GoodForKids, Alcohol, RestaurantsGoodForGroup...",GoodForKids Alcohol RestaurantsGoodForGroups R...
3,10,10,rYs_1pNB_RMtn5WQh55QDA,Chautauqua General Store,100 Clematis Dr,Boulder,CO,80302,39.998449,-105.281006,3.5,5,1,"{'BikeParking': 'True', 'RestaurantsTakeOut': ...","Food, Shopping, Convenience Stores, Souvenir S...","{'Monday': '8:0-20:0', 'Tuesday': '8:0-20:0', ...","{'BikeParking': 'True', 'RestaurantsTakeOut': ...","[BikeParking, RestaurantsTakeOut, street, Whee...",BikeParking RestaurantsTakeOut street Wheelcha...
4,12,12,HPA_qyMEddpAEtFof02ixg,Mr G's Pizza & Subs,474 Lowell St,Peabody,MA,01960,42.541155,-70.973438,4.0,39,1,"{'RestaurantsGoodForGroups': 'True', 'HasTV': ...","Food, Pizza, Restaurants","{'Monday': '11:0-21:0', 'Tuesday': '11:0-21:0'...","{'RestaurantsGoodForGroups': 'True', 'HasTV': ...","[RestaurantsGoodForGroups, HasTV, GoodForKids,...",RestaurantsGoodForGroups HasTV GoodForKids Res...


In [75]:
%%time
results = get_knn(df_business.iloc[0, :], embedded_attributes, embedded_categories, df_business, category_weighting=0.5)
results.head()

CPU times: user 66.2 ms, sys: 288 ms, total: 354 ms
Wall time: 453 ms


Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours,attribute_dict,attribute_soup,attribute_string,scores
19374,59580,59580,GDmUoW3hmokCMAVys8M1PQ,The Post Brewing Co. Boulder,2027 13th St,Boulder,CO,80302,40.018848,-105.279096,4.0,179,1,"{'BikeParking': 'True', 'HappyHour': 'True', '...","Pubs, Breweries, Chicken Shop, American (Tradi...","{'Monday': '11:30-20:30', 'Tuesday': '11:30-20...","{'BikeParking': 'True', 'HappyHour': 'True', '...","[BikeParking, HappyHour, HasTV, casual, Alcoho...",BikeParking HappyHour HasTV casual Alcohol Whe...,0.856014
38669,118221,118221,_GteKUXq5Hx0ZTzzAkUW9g,Twisted Pine Brewing Company,3201 Walnut St,Boulder,CO,80301,40.020832,-105.251023,4.0,306,1,"{'HasTV': 'True', 'BusinessParking': ""{'garage...","Sports Bars, Sandwiches, Pubs, Food, Breweries...","{'Monday': '0:0-0:0', 'Friday': '16:0-19:0', '...","{'HasTV': 'True', 'BusinessParking': '{'garage...","[HasTV, street, lot, BikeParking, RestaurantsG...",HasTV street lot BikeParking RestaurantsGoodFo...,0.852223
5130,15784,15784,rrXppQgpsjFZs8TCRNcfhg,Upslope Brewing Company - Flatiron Park,1898 S Flatiron Ct,Boulder,CO,80301,40.020163,-105.218338,4.5,84,1,"{'BikeParking': 'True', 'RestaurantsPriceRange...","Beer, Wine & Spirits, Breweries, Bars, Nightli...","{'Monday': '12:0-20:0', 'Tuesday': '12:0-20:0'...","{'BikeParking': 'True', 'RestaurantsPriceRange...","[BikeParking, RestaurantsPriceRange2, Business...",BikeParking RestaurantsPriceRange2 BusinessAcc...,0.839652
48625,148328,148328,SXgLnNoWd62TEiqBYRiIkA,Peckish,1320 College Ave,Boulder,CO,80302,40.007179,-105.275761,4.0,20,1,"{'BYOB': 'False', 'Ambience': ""{'touristy': Fa...","Bars, Beer Bar, Sports Bars, Sandwiches, Chick...","{'Monday': '11:0-22:0', 'Tuesday': '11:0-22:0'...","{'BYOB': 'False', 'Ambience': '{'touristy': Fa...","[RestaurantsTakeOut, street, WiFi, HasTV, Alco...",RestaurantsTakeOut street WiFi HasTV Alcohol W...,0.814933
37079,113409,113409,epJrb9ikQOUit-K0yuDEuA,12 Degree Brewing,"820 Main St, Ste 2",Louisville,CO,80027,39.978274,-105.131889,4.5,170,1,"{'BikeParking': 'True', 'NoiseLevel': ""u'loud'...","Pubs, Brewpubs, Food, Breweries, Belgian, Nigh...","{'Monday': '12:0-21:30', 'Tuesday': '16:0-20:0...","{'BikeParking': 'True', 'NoiseLevel': 'u'loud'...","[BikeParking, NoiseLevel, garage, street, Outd...",BikeParking NoiseLevel garage street OutdoorSe...,0.81225


### Load the users and their reviews into a train and testable form
Given a user:
- Find all reviews produced by this user
- Filter to only positive reviews (4 stars or above)
- For each positive review, find the top 5 most similar restaurants in the area specified by the review (it's unlikely the user would be willing to travel across the country to find a restaurant that might just be similar)
- Pool all of the results, and sort by similarity scores.  Therefore, the user should have a collection of potential places to visit.

By using a text embedder to calculate similarity, a training set isn't truly needed:
- Require at least 2 good reviews from each test user, otherwise skip the training instance although a larger threshold for minimum number of reviews may make more sense.
- For each user, having $$n$$ reviews, we randomly use 50% of the reviews for generating similar results.  Then, follow the above results to obtain $5n/2$ predictions.
- We examine whether the predictions and remaining $n/2$ test reviews have _any_ intersection (although we can change this later).


In [12]:
df_users = pd.read_csv('data/yelp_academic_dataset_user.csv')
df_reviews = pd.read_csv('data/yelp_academic_dataset_review.csv')
print("REVIEWS SIZE:", df_reviews.shape[0])
print("USERS SIZE:", df_users.shape[0])

In [58]:
# Set of restaurant IDs
restaurant_set = set(df_business['business_id'].values)
# Only keep restaurant reviews in the filtered dataset
df_reviews_final = df_reviews[df_reviews['business_id'].isin(restaurant_set)]
# Remove bad reviews
df_reviews_final = df_reviews_final[df_reviews_final['stars'] >= 4]


In [60]:
df_reviews_sampled = df_reviews_final.sample(n=1_000_000, replace=False)

In [61]:
user_id_set = set(df_reviews_sampled['user_id'].values)
df_users_sampled = df_users[df_users['user_id'].isin(user_id_set)]

In [62]:
df_user_and_review = pd.merge(df_reviews_sampled, df_users_sampled, on='user_id')
df_all_filtered = df_user_and_review[df_user_and_review.groupby('user_id').transform('count')['review_id'] > 5]

In [63]:
user_id_set = set(df_all_filtered['user_id'].values)

In [76]:
def get_all_user_recommendations(df_individual_reviews, df_business, embedded_attributes, embedded_categories):
    """
    Gets a DataFrame of all recommendations.  If the user reviews n items positively, return 5n items that they might enjoy.
    :param user_id: id of the queried user
    :param df_individual_reviews: reviews from a given user.
    :param df_business: DataFrame including all potential businesses
    :return: DataFrame of all recommendations
    """
    all_rec = []
    for business_id in df_individual_reviews['business_id']:
        # Retrieve first instance
        df_input_business = df_business[df_business['business_id'] == business_id].iloc[0, :]
        # Append all corresponding predictions
        all_rec.append(get_knn(df_input_business, embedded_attributes, embedded_categories, df_business, category_weighting=0.5))
    return pd.concat(all_rec)

In [77]:
# Single User Demo:

user_iter = iter(user_id_set)
user_id = next(user_iter)

data = df_all_filtered[df_all_filtered['user_id'] == user_id]
y_pred = get_all_user_recommendations(X, df_business, embedded_attributes, embedded_categories)
y_pred


Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours,attribute_dict,attribute_soup,attribute_string,scores
5968,18391,18391,Oi9afWDVgmmykw8kzc7LxA,Firehouse Subs,10725 E Colonial Dr,Orlando,FL,32817,28.569947,-81.229994,4.0,6,0,"{'RestaurantsDelivery': 'False', 'RestaurantsG...","Fast Food, Restaurants, Sandwiches, Delis","{'Monday': '10:30-21:0', 'Tuesday': '10:30-21:...","{'RestaurantsDelivery': 'False', 'RestaurantsG...","[RestaurantsGoodForGroups, HasTV, RestaurantsT...",RestaurantsGoodForGroups HasTV RestaurantsTake...,0.845477
6339,19616,19616,KAP9fE9BC4pEJOQa3OOy8g,Subway,"2823 S Orange Ave, Ste 140",Orlando,FL,32806,28.51134,-81.374782,5.0,5,1,"{'GoodForKids': 'True', 'RestaurantsAttire': ""...","Sandwiches, Restaurants, Fast Food","{'Monday': '8:0-22:0', 'Tuesday': '8:0-22:0', ...","{'GoodForKids': 'True', 'RestaurantsAttire': '...","[GoodForKids, RestaurantsAttire, RestaurantsGo...",GoodForKids RestaurantsAttire RestaurantsGoodF...,0.844694
16527,50836,50836,ucNTiYiNJRh3-TZNQdNaqw,Subway,2213 Edgewater Dr,Orlando,FL,32804,28.571722,-81.389433,4.0,5,0,"{'RestaurantsPriceRange2': '1', 'GoodForKids':...","Fast Food, Sandwiches, Restaurants","{'Monday': '7:0-23:0', 'Tuesday': '7:0-23:0', ...","{'RestaurantsPriceRange2': '1', 'GoodForKids':...","[RestaurantsPriceRange2, GoodForKids, Restaura...",RestaurantsPriceRange2 GoodForKids Restaurants...,0.835984
6479,20050,20050,vQL8hbqQfaTBcxHPPzkC3A,Five Guys,"8031 Turkey Lake Rd, Ste 100",Orlando,FL,32819,28.448157,-81.477188,4.0,32,1,"{'DriveThru': 'False', 'RestaurantsGoodForGrou...","Sandwiches, Restaurants, Fast Food, Burgers","{'Monday': '11:0-22:0', 'Tuesday': '11:0-22:0'...","{'DriveThru': 'False', 'RestaurantsGoodForGrou...","[RestaurantsGoodForGroups, BikeParking, Restau...",RestaurantsGoodForGroups BikeParking Restauran...,0.831877
35631,109130,109130,FzIV4gsiFKZbv2OUvhNosg,Jersey Mike's Subs,450 S Hunt Club Blvd,Apopka,FL,32703,28.673476,-81.450175,4.0,22,1,"{'BusinessAcceptsCreditCards': 'True', 'Restau...","Fast Food, Delis, Restaurants, Sandwiches","{'Monday': '10:0-21:0', 'Tuesday': '10:0-21:0'...","{'BusinessAcceptsCreditCards': 'True', 'Restau...","[BusinessAcceptsCreditCards, GoodForKids, Rest...",BusinessAcceptsCreditCards GoodForKids Restaur...,0.82257
32050,98297,98297,MXPKpYA0ZEMgQ-yZsb9rDg,Subway,667 E Hwy 50,Clermont,FL,34711,28.548409,-81.747271,4.0,5,1,"{'RestaurantsTakeOut': 'True', 'RestaurantsGoo...","Restaurants, Sandwiches, Fast Food","{'Monday': '9:0-23:0', 'Tuesday': '9:0-23:0', ...","{'RestaurantsTakeOut': 'True', 'RestaurantsGoo...","[RestaurantsTakeOut, RestaurantsGoodForGroups,...",RestaurantsTakeOut RestaurantsGoodForGroups Go...,0.817526
42677,130427,130427,fWFY6h63Bvy9tZIKdSH_qw,Beijing & Tokyo,"16112 Marsh Rd, Ste 413",Winter Garden,FL,34787,28.516451,-81.622002,4.0,97,1,"{'RestaurantsAttire': ""u'casual'"", 'NoiseLevel...","Sushi Bars, Chinese, Asian Fusion, Restaurants","{'Monday': '11:0-22:0', 'Tuesday': '11:0-22:0'...","{'RestaurantsAttire': 'u'casual'', 'NoiseLevel...","[RestaurantsAttire, NoiseLevel, GoodForKids, W...",RestaurantsAttire NoiseLevel GoodForKids Wheel...,0.878007
16488,50733,50733,qwiJHZv22ahl1Pzhkso5DA,Sus Hi Eatstation,"380 S State Rd 434, Ste 1004",Altamonte Springs,FL,32714,28.656818,-81.420208,4.0,228,1,"{'WiFi': ""'no'"", 'ByAppointmentOnly': 'False',...","Restaurants, Sushi Bars, Poke, Japanese, Asian...","{'Monday': '0:0-0:0', 'Tuesday': '11:0-22:0', ...","{'WiFi': ''no'', 'ByAppointmentOnly': 'False',...","[WiFi, RestaurantsTakeOut, RestaurantsAttire, ...",WiFi RestaurantsTakeOut RestaurantsAttire Outd...,0.872823
48942,149221,149221,W7PgsBw0QQ6CHFR8YlwiUw,Sus Hi Eatstation,"4693 Gardens Park Blvd, Ste 105",Orlando,FL,32839,28.494599,-81.423477,4.0,140,1,"{'NoiseLevel': ""u'average'"", 'WiFi': ""u'no'"", ...","Japanese, Sushi Bars, Asian Fusion, Restaurant...","{'Monday': '0:0-0:0', 'Tuesday': '11:0-22:0', ...","{'NoiseLevel': 'u'average'', 'WiFi': 'u'no'', ...","[NoiseLevel, WiFi, lot, RestaurantsGoodForGrou...",NoiseLevel WiFi lot RestaurantsGoodForGroups A...,0.87196
5350,16450,16450,1ggfHF5zecMy1W5aeGlOGA,WAVE Asian Bistro & Sushi,"301 N Baker St, Ste 106",Mount Dora,FL,32757,28.798896,-81.644134,4.5,326,1,"{'RestaurantsPriceRange2': '2', 'WiFi': ""'no'""...","Asian Fusion, Japanese, Sushi Bars, Restaurants","{'Tuesday': '11:0-20:0', 'Wednesday': '11:0-20...","{'RestaurantsPriceRange2': '2', 'WiFi': ''no''...","[RestaurantsPriceRange2, WiFi, RestaurantsRese...",RestaurantsPriceRange2 WiFi RestaurantsReserva...,0.867718


In [79]:
# TEST LOOP
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm

user_iter = iter(user_id_set)
hit = 0

EXAMPLES = 1000

for dex in tqdm(range(EXAMPLES)):
    user_id = next(user_iter)
    data = df_all_filtered[df_all_filtered['user_id'] == user_id]
    X, y = train_test_split(data, train_size=0.5)
    y_pred = get_all_user_recommendations(X, df_business, embedded_attributes, embedded_categories)

    y_businesses = set(y['business_id'].values)
    y_pred_businesses = set(y_pred['business_id'].values)
    if len(y_pred_businesses.intersection(y_businesses)) > 0:
        hit += 1

print(hit/EXAMPLES)

  0%|          | 0/1000 [00:00<?, ?it/s]

0.14


0.14