# Imports

In [148]:
import random
import pandas as pd
import numpy
import tensorflow as tf

import gzip
import json

from collections import defaultdict
import heapq
from tqdm import tqdm

# Loader Functions

In [149]:
def readGz(path):
    # Open in text mode ('rt') with UTF-8 encoding for JSON lines
    path = "datasets/" + path
    with gzip.open(path, 'rt', encoding='utf-8') as f:
        for l in f:
            # Safely parse each line as JSON
            yield json.loads(l)

def load_to_dict(file_to_read):
    data = []
    try:
        for item in readGz(file_to_read):
            data.append(item)
    except EOFError as e:
        # Catching the specific EOFError indicating a corrupted file
        print(f"EOFError: Compressed file '{file_to_read}' ended prematurely. Error: {e}")
        print(f"This often indicates a corrupted or incomplete gzip file. Successfully loaded {len(data)} items before the error.")
    except Exception as e:
        # Catching other potential errors during decompression or JSON parsing
        print(f"An unexpected error occurred while reading '{file_to_read}': {e}")
        print(f"Successfully loaded {len(data)} items before the error.")
    return data

def save_likes(filename, data_dict):
    filename = "eval/"+filename
    with open(filename, "w") as fp:
        json.dump(data_dict, fp, indent=4)
    print("Saved to ", filename)

def load_user_likes(filename):
    """
    Load a user_likes JSON file back into a dict[user_id] = list of liked places.
    """
    filename = "eval/"+filename
    with open(filename, "r") as f:
        data = json.load(f)

    # Ensure values are lists, not sets or other types
    return {user_id: list(likes) for user_id, likes in data.items()}


# Exploratory Analysis

# Pre-processing

## Universal Variables

In [150]:
TOP_K = 30
RANDOM_SEED = 42
POS_THRESHOLD = 4

RATIO_FOR_REVEALED = 0.8

# Files we're checking the evaluation from
REVIEW_DATA_FNAME = "review-Oregon_10.json.gz"
METADATA_DATA_FNAME = "meta-Oregon.json.gz"

FULL_LIKES_DATA_FNAME = "users_likes_full.json"
REVEALED_LIKES_DATA_FNAME = "users_revealed_likes.json"
HIDDEN_LIKES_DATA_FNAME = "users_hidden_likes.json"
HIDDEN_LIKES_BPR_DATA_FNAME = "users_hidden_likes_BPR.json"

ITER_UPDATE_REC_FNAME = "iterative_update_recommendation_per_user.json"
BPR_REC_FNAME = "bpr_recommendation_per_user.json"

BASELINE_REC_FNAME = "baseline_recommendation_per_user.json"

## The Pre-processing of the data

In [151]:
# Loading the data
reviews = load_to_dict(REVIEW_DATA_FNAME)[:100000]
metadata = load_to_dict(METADATA_DATA_FNAME)

In [152]:
# Get each users' highly reviewed stores list
# users_likes[user_id] = [stores they rated >= 4]
users_likes = defaultdict(set)
dupe_review_count = 0
dupe_removed_count = 0

for review in reviews:
    user_id = review["user_id"]
    gmap_id = review["gmap_id"]
    rating = review["rating"]

    if gmap_id in users_likes[user_id]:
        dupe_review_count += 1
    
    # Use the most recent review, meaning if a user re-reviewed a place and they didn't like it, update our set
    if gmap_id in users_likes[user_id] and rating < 4:
        users_likes[user_id].remove(gmap_id)
        dupe_removed_count += 1

    if rating >= POS_THRESHOLD and (gmap_id not in users_likes[user_id]):
        users_likes[user_id].add(gmap_id)

print("Num dupes: ", dupe_review_count) 
print("Num dupes removed: ", dupe_removed_count) 

# Split off the users_likes to revealed and hidden
users_revealed_likes = defaultdict(list)
users_hidden_likes = defaultdict(list)
users_total_likes = defaultdict(list)

random.seed(RANDOM_SEED)
for user_id, liked_places in users_likes.items():
    # For now let's say 8:2 ratio for revealed vs hidden
    # Shuffle before splitting

    liked_list = list(liked_places)
    num_likes = len(liked_list)
    
    random.shuffle(liked_list)

    # ensures at least 1 review is hidden
    min_hidden_count = 1
    split_point = max(min_hidden_count, int(RATIO_FOR_REVEALED * num_likes))

    revealed = liked_list[split_point:]
    hidden = liked_list[:split_point]
    
    if len(hidden) >= min_hidden_count:
        users_revealed_likes[user_id] = revealed
        users_hidden_likes[user_id] = hidden
        users_total_likes[user_id] = liked_list

# Save user likes: revealed, hidden, and full
save_likes(FULL_LIKES_DATA_FNAME, users_total_likes)
save_likes(REVEALED_LIKES_DATA_FNAME, users_revealed_likes)
save_likes(HIDDEN_LIKES_DATA_FNAME, users_hidden_likes)

Num dupes:  1477
Num dupes removed:  0
Saved to  eval/users_likes_full.json
Saved to  eval/users_revealed_likes.json
Saved to  eval/users_hidden_likes.json


# Models

## Iterative Update (HW3) Model

In [155]:
# The functions we'll use for to update in the iterations

def getGlobalAverage(trainRatings):
    # Return the average rating in the training set
    res = numpy.average(trainRatings)

    return res

def alphaUpdate(ratingsTrain, alpha, betaU, betaLoc, lamb):
    # Update equation for alpha
    newAlpha = 0

    # From slide 83 of the recommendation slide
    # alpha = sum_{u,i in train} (R_u,i - (betaU + betaLoc)) / Ntrain
    Ntrain = len(ratingsTrain)

    for u, loc, rating in ratingsTrain:
        # u, loc, rating = r["user_id"], r["gmap_id"], r["rating"]

        newAlpha += rating - (betaU[u] + betaLoc[loc])
    
    newAlpha /= Ntrain

    return newAlpha

def betaUUpdate(ratingsPerUser, alpha, betaU, betaLoc, lamb):
    # Update equation for betaU
    newBetaU = {}

    # From slide 83 of the recommendation slide
    # betaU = sum_{i in I_u} (R_u,i - (alpha + betaLoc)) / (lamb + |I_u|)
    
    # Structure is ratingsPerUser[user] = [(location, rating)]
    # betaU[user] = how much does this user tend to rate things above mean

    for u in ratingsPerUser:
        curr = 0
        
        for i, r in ratingsPerUser[u]:
            curr += r - (alpha + betaLoc[i])
    
        curr /= (lamb + len(ratingsPerUser[u]))

        newBetaU[u] = curr

    return newBetaU

def betaLocUpdate(ratingsPerLocation, alpha, betaU, betaLoc, lamb):
    # Update equation for betaLoc
    newBetaLoc = {}

    # From slide 83 of the recommendation slide
    # betaU = sum_{u in U_i} (R_u,i - (alpha + betaU)) / (lamb + |U_i|)

    # ratingsPerLocation[location] = [(user, rating)]

    for i in ratingsPerLocation:
        curr = 0

        for u, r in ratingsPerLocation[i]:
            curr += r - (alpha + betaU[u])
        
        curr /= (lamb + len(ratingsPerLocation[i]))

        newBetaLoc[i] = curr

    return newBetaLoc

In [156]:
# The model we're using
def iterativeUpdateModel(ratingsTrain, ratingsPerUser, ratingsPerLocation, alpha, betaU, betaLoc):
    # Improve upon your model from the previous question (e.g. by running multiple iterations)

    # Running multiple iterations
    lamb = 0.5
    for i in range(100):
        alpha = alphaUpdate(ratingsTrain, alpha, betaU, betaLoc, lamb)
        betaU = betaUUpdate(ratingsPerUser, alpha, betaU, betaLoc, lamb)
        betaLoc = betaLocUpdate(ratingsPerLocation, alpha, betaU, betaLoc, lamb)

    return alpha, betaU, betaLoc

In [157]:
# Extra pre-processing for this specific model: get all users and locations from the test set
# Also, get all the reviews that are revealed / negative (which will be used to train)

test_users_IUM = set()
test_locations_IUM = set()

# of (user_id, gmap_id, rating)
test_revealed_and_negative_reviews = set()

for review in reviews:
    user = review["user_id"]
    test_users_IUM.add(user)

    loc = review["gmap_id"]
    test_locations_IUM.add(loc)

    rating = review["rating"]

    if loc in users_revealed_likes[user] or rating < 4:
        test_revealed_and_negative_reviews.add((user, loc, rating))

test_users_IUM = list(test_users_IUM)
test_locations_IUM = list(test_locations_IUM)

In [158]:
# Use the train set to figure out the generally ideal alpha, betaU, betaLoc of the model in this kind of task
ratingsTrain = test_revealed_and_negative_reviews
ratingsPerUser = defaultdict(list)
ratingsPerLocation = defaultdict(list)
for u, loc, r in ratingsTrain:
    ratingsPerUser[u].append((loc,r))
    ratingsPerLocation[loc].append((u,r))

trainRatings = [r for _, _, r in ratingsTrain]

betaU = {}
betaLoc = {}
for u in ratingsPerUser:
    betaU[u] = 0

for loc in ratingsPerLocation:
    betaLoc[loc] = 0

alpha = getGlobalAverage(trainRatings) # Could initialize anywhere, this is a guess

alpha, betaU, betaLoc = iterativeUpdateModel(ratingsTrain, ratingsPerUser, ratingsPerLocation, alpha, betaU, betaLoc)

In [159]:
# Go through each user and location, get the recommendations
recommendation = {}

for user_id in tqdm(test_users_IUM):
    # min-heap storing (score, gmap_id)
    heap = []

    bu = betaU.get(user_id, 0)

    revealed = users_revealed_likes[user_id]

    for gmap_id in test_locations_IUM:

        if gmap_id in revealed:
            continue

        bi = betaLoc.get(gmap_id, 0)
        score = alpha + bu + bi

        if len(heap) < TOP_K:
            # just push until full
            heapq.heappush(heap, (score, gmap_id))
        else:
            # if better than the smallest in heap → replace the smallest
            if score > heap[0][0]:
                heapq.heapreplace(heap, (score, gmap_id))

    # extract best K sorted from highest to lowest
    heap.sort(reverse=True)
    recommendation[user_id] = [gmap_id for score, gmap_id in heap]

# Export to json
save_likes(ITER_UPDATE_REC_FNAME, recommendation)

100%|██████████| 65671/65671 [01:22<00:00, 797.81it/s]


Saved to  eval/iterative_update_recommendation_per_user.json


## Bayesian Personalized Ranking (BPR) Model
Note: Codes referenced from the Professor's page Chapter 5

In [160]:
# BPR-specific variables
MIN_INTERACTIONS_PER_USER = 1
MAX_USERS = 100000
MAX_ITEMS = 100000

LATENT_DIM = 64
LEARNING_RATE = 0.01
REG_LAMBDA = 1e-5
NSAMPLES_PER_BATCH = 50000
N_TRAIN_STEPS = 1000

In [161]:
# Extra pre-processing for this model
random.seed(RANDOM_SEED)
numpy.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)

df = pd.DataFrame(reviews)


df_pos = df[df["rating"] >= POS_THRESHOLD].copy()

user_counts = df_pos["user_id"].value_counts()
eligible_users = user_counts[user_counts >= MIN_INTERACTIONS_PER_USER].index

df_pos = df_pos[df_pos["user_id"].isin(eligible_users)].reset_index(drop=True)


# Limiting the unique users and items. This is done for the sake of time.
# For the record, we tried running this without the limitation and the score we got was negligibly similar to the one with this limitation.
unique_users = df_pos["user_id"].unique()
if MAX_USERS is not None:
    unique_users = unique_users[:MAX_USERS]

df_pos = df_pos[df_pos["user_id"].isin(unique_users)]

unique_items = df_pos["gmap_id"].unique()
if MAX_ITEMS is not None:
    unique_items = unique_items[:MAX_ITEMS]

df_pos = df_pos[df_pos["gmap_id"].isin(unique_items)].reset_index(drop=True)

user_id_to_idx = {u: idx for idx, u in enumerate(unique_users)}
item_id_to_idx = {i: idx for idx, i in enumerate(unique_items)}

df_pos["user_idx"] = df_pos["user_id"].map(user_id_to_idx)
df_pos["item_idx"] = df_pos["gmap_id"].map(item_id_to_idx)

num_users = len(user_id_to_idx)
num_items = len(item_id_to_idx)


# Shuffle rows (if no timestamp, just randomize)
if "time" in df_pos.columns:
    df_pos = df_pos.sort_values(["user_idx", "time"])
else:
    df_pos = df_pos.sample(frac=1.0, random_state=RANDOM_SEED)

train_rows = []
test_rows = []

# Split each user's interactions 80:20
for user_id, group in df_pos.groupby("user_id"):
    idx_list = group.index.tolist()
    random.shuffle(idx_list)
    
    split_point = max(1, int(RATIO_FOR_REVEALED * len(idx_list)))  # ensure at least 1 row in train
    train_rows.extend(idx_list[:split_point])
    test_rows.extend(idx_list[split_point:])

train_df = df_pos.loc[train_rows].reset_index(drop=True)
test_df = df_pos.loc[test_rows].reset_index(drop=True)

# Get a translation dictionary so we know the user and location id we're referring to by their idx after the fitting
idx_to_user_id = {idx: u for u, idx in user_id_to_idx.items()}
idx_to_item_id = {idx: i for i, idx in item_id_to_idx.items()}


interactions_train = list(
    zip(
        train_df["user_idx"].astype(int).tolist(),
        train_df["item_idx"].astype(int).tolist(),
        train_df["rating"].tolist()
    )
)

items_per_user_train = defaultdict(set)
for u, i, r in interactions_train:
    items_per_user_train[u].add(i)

all_items = list(range(num_items))

In [162]:
# The model we're using
class BPRbatch(tf.keras.Model):
    def __init__(self, K, lamb):
        super().__init__()
        self.lamb = lamb

        # Global item bias
        self.betaI = self.add_weight(
            name="betaI",
            shape=(num_items,),
            initializer=tf.random_normal_initializer(stddev=0.001),
            trainable=True,
        )

        # User latent factors
        self.gammaU = self.add_weight(
            name="gammaU",
            shape=(num_users, K),
            initializer=tf.random_normal_initializer(stddev=0.001),
            trainable=True,
        )

        # Item latent factors
        self.gammaI = self.add_weight(
            name="gammaI",
            shape=(num_items, K),
            initializer=tf.random_normal_initializer(stddev=0.001),
            trainable=True,
        )

    def score(self, sampleU, sampleI):
        # sampleU, sampleI are index tensors
        u = tf.cast(sampleU, tf.int32)
        i = tf.cast(sampleI, tf.int32)

        beta_i = tf.nn.embedding_lookup(self.betaI, i)
        gamma_u = tf.nn.embedding_lookup(self.gammaU, u)
        gamma_i = tf.nn.embedding_lookup(self.gammaI, i)

        x_ui = beta_i + tf.reduce_sum(gamma_u * gamma_i, axis=1)
        return x_ui

    def call(self, sampleU, sampleI, sampleJ):
        x_ui = self.score(sampleU, sampleI)
        x_uj = self.score(sampleU, sampleJ)
        # BPR loss: -log σ(x_ui - x_uj)
        loss = -tf.reduce_mean(tf.math.log_sigmoid(x_ui - x_uj))
        return loss

    def reg(self):
        return self.lamb * (
            tf.nn.l2_loss(self.betaI)
            + tf.nn.l2_loss(self.gammaU)
            + tf.nn.l2_loss(self.gammaI)
        )

In [163]:
# Training the model
optimizer = tf.keras.optimizers.Adam(LEARNING_RATE)
modelBPR = BPRbatch(LATENT_DIM, REG_LAMBDA)

def trainingStepBPR(model, interactions, items_per_user, items, Nsamples):
    sampleU, sampleI, sampleJ = [], [], []

    for _ in range(Nsamples):
        u, i, r = random.choice(interactions)
        j = random.choice(items)
        while j in items_per_user[u]:
            j = random.choice(items)

        sampleU.append(u)
        sampleI.append(i)
        sampleJ.append(j)

    # Convert lists → tensors
    sampleU_tf = tf.convert_to_tensor(sampleU, dtype=tf.int32)
    sampleI_tf = tf.convert_to_tensor(sampleI, dtype=tf.int32)
    sampleJ_tf = tf.convert_to_tensor(sampleJ, dtype=tf.int32)

    with tf.GradientTape() as tape:
        loss = model(sampleU_tf, sampleI_tf, sampleJ_tf)
        loss += model.reg()

    grads = tape.gradient(loss, model.trainable_variables)

    # Pair gradients with variables, skipping any None grads just in case
    grads_and_vars = [
        (g, v) for g, v in zip(grads, model.trainable_variables) if g is not None
    ]

    if grads_and_vars:
        optimizer.apply_gradients(grads_and_vars)
    else:
        print("Warning: no gradients to apply this step.")

    return float(loss.numpy())

# Run the training
test_loss = trainingStepBPR(
    modelBPR,
    interactions_train,
    items_per_user_train,
    all_items,
    Nsamples=NSAMPLES_PER_BATCH  # mini testing
)

print(f"One mini-batch objective: {test_loss:.4f}")

for step in range(N_TRAIN_STEPS):
    obj = trainingStepBPR(
        modelBPR,
        interactions_train,
        items_per_user_train,
        all_items,
        NSAMPLES_PER_BATCH,
    )
    if (step + 1) % 5 == 0:
        print(f"Step {step + 1}, objective = {obj:.4f}")

One mini-batch objective: 0.6932
Step 5, objective = 0.6851
Step 10, objective = 0.6778
Step 15, objective = 0.6705
Step 20, objective = 0.6644
Step 25, objective = 0.6569
Step 30, objective = 0.6494
Step 35, objective = 0.6360
Step 40, objective = 0.6144
Step 45, objective = 0.5855
Step 50, objective = 0.5536
Step 55, objective = 0.5217
Step 60, objective = 0.4951
Step 65, objective = 0.4743
Step 70, objective = 0.4634
Step 75, objective = 0.4572
Step 80, objective = 0.4539
Step 85, objective = 0.4533
Step 90, objective = 0.4502
Step 95, objective = 0.4508
Step 100, objective = 0.4492
Step 105, objective = 0.4481
Step 110, objective = 0.4489
Step 115, objective = 0.4483
Step 120, objective = 0.4482
Step 125, objective = 0.4477
Step 130, objective = 0.4471
Step 135, objective = 0.4482
Step 140, objective = 0.4469
Step 145, objective = 0.4478
Step 150, objective = 0.4470
Step 155, objective = 0.4473
Step 160, objective = 0.4471
Step 165, objective = 0.4465
Step 170, objective = 0.4469
S

In [164]:
# Getting the recommendation

test_items_per_user = defaultdict(list)
for _, row in test_df.iterrows():
    u = int(row["user_idx"])
    i = int(row["item_idx"])
    test_items_per_user[u].append(i)

def get_top_k_recommendations(model, train_items_per_user, test_items_per_user, items, k, idx_to_user_id, idx_to_item_id):
    """
    Returns: dict of user_id -> list of top-k gmap_ids, excluding training items.
    """
    recs = {}
    users = list(test_items_per_user.keys())
    all_items_array = numpy.array(items, dtype=numpy.int32)

    for idx, u in enumerate(users):
        user_id = idx_to_user_id[u]

        train_items = train_items_per_user[u]

        # Candidate items = all items not already interacted with
        candidate_mask = ~numpy.isin(all_items_array, list(train_items))
        candidate_items = all_items_array[candidate_mask]

        if len(candidate_items) == 0:
            recs[user_id] = []
            continue

        # Score all candidate items
        u_list = numpy.full(len(candidate_items), u, dtype=numpy.int32)
        scores = model.score(u_list, candidate_items).numpy()

        # Sort scores and get top-k
        top_idx = numpy.argsort(-scores)[:k]  # sorted descending
        top_item_indices_sorted = candidate_items[top_idx]

        # Map indices back to gmap_ids
        top_item_ids = [idx_to_item_id[i] for i in top_item_indices_sorted]

        recs[user_id] = top_item_ids

    return recs

# Running the function and getting the recommendations
recommendation = get_top_k_recommendations(
    modelBPR,
    items_per_user_train,
    test_items_per_user,
    all_items,
    TOP_K,
    idx_to_user_id,
    idx_to_item_id
)

In [165]:
# Exporting to json

# Save the recommendations to a file so it can be used in the evaluation function
save_likes(BPR_REC_FNAME, recommendation)

# Also save the hidden likes
users_hidden_likes = defaultdict(list)

for _, row in test_df.iterrows():
    user_id = idx_to_user_id[row["user_idx"]]
    item_id = idx_to_item_id[row["item_idx"]]
    users_hidden_likes[user_id].append(item_id)

users_hidden_likes = dict(users_hidden_likes)

# Saving the hidden likes specific for this
# This is because of the 100000 limiting of users and items as done above
save_likes(HIDDEN_LIKES_BPR_DATA_FNAME, users_hidden_likes)

Saved to  eval/bpr_recommendation_per_user.json
Saved to  eval/users_hidden_likes_BPR.json


# Unused Models

## Linear Regression

In [None]:
# # Feature vector
# def feature(datum):
#     feat = [1]

#     # The features that make sense to see:
#     # price, latitude/longitude, description length, num_of_reviews

#     feat.append(len(datum["price"]) if datum["price"] else 0)
#     # feat.append(datum["latitude"])
#     # feat.append(datum["longitude"])

#     feat.append(len(datum["description"]) if datum["description"] else 0)
#     feat.append(datum["num_of_reviews"])

#     # Days open
#     days_open = [0] * 7

#     if datum["hours"]:
#         for d in datum["hours"]:
#             if d[0] == "Monday" and str.lower(d[1]) != "closed":
#                 days_open[0] = 1
#             if d[0] == "Tuesday" and str.lower(d[1]) != "closed":
#                 days_open[1] = 1
#             if d[0] == "Wednesday" and str.lower(d[1]) != "closed":
#                 days_open[2] = 1
#             if d[0] == "Thursday" and str.lower(d[1]) != "closed":
#                 days_open[3] = 1
#             if d[0] == "Friday" and str.lower(d[1]) != "closed":
#                 days_open[4] = 1
#             if d[0] == "Saturday" and str.lower(d[1]) != "closed":
#                 days_open[5] = 1
#             if d[0] == "Sunday" and str.lower(d[1]) != "closed":
#                 days_open[6] = 1

#     feat += days_open

#     # Categories
#     # cat_one_hot = [0] * len(list(cats))
    
#     # if datum["category"]:
#     #     for c in datum["category"]:
#     #         cat_one_hot[cats[c]] = 1

#     # feat += cat_one_hot

#     # Planning
#     # plannings_one_hot = [0] * len(list(plannings))
#     # if datum["MISC"] and "Planning" in datum["MISC"] and datum["MISC"]["Planning"]:
#     #     for p in datum["MISC"]["Planning"]:
#     #         plannings_one_hot[plannings[p]] = 1
    
#     # feat += plannings_one_hot

#     # Payment
#     # payments_one_hot = [0] * len(list(payments))
#     # if datum["MISC"] and "Payments" in datum["MISC"] and datum["MISC"]["Payments"]:
#     #     for p in datum["MISC"]["Payments"]:
#     #         payments_one_hot[payments[p]] = 1
    
#     # feat += payments_one_hot

#     return feat

In [154]:
# # Implement and predict

# y = numpy.array([d["avg_rating"] for d in train_metadata])

# x = numpy.array([feature(d) for d in train_metadata])

# # Setting up the logistic regression using sklearn library (class weight is balanced)
# regr = linear_model.LinearRegression()

# # Train the model using feature = x, label = y
# regr.fit(x, y)

# # Now predict if we feed the model another feature (x)
# x_test = numpy.array([feature(d) for d in metadata])

# y_train_prediction = regr.predict(x_test)

# loc_and_pred = []

# for i in range(len(x_test)):
#     loc_and_pred.append((y_train_prediction[i], metadata[i]["gmap_id"]))

# loc_and_pred.sort(reverse=True)

# popularity_list_id = [b for a, b in loc_and_pred]

# # Building the dictionary to feed to the evaluation function (same as baseline)
# # recommendation[user_id] = [top k items the model recommend]
# recommendation = {}

# # Get each user that has reviewed
# for review in reviews:
#     user_id = review["user_id"]

#     # Recommend the top number of hidden reviews for each user
#     if user_id not in recommendation:
#         # Filter the popularity list so that the users' revealed likes isn't included here
#         filtered_popularity_list = []

#         for name in popularity_list_id:
#             if name not in users_revealed_likes[user_id]:
#                 filtered_popularity_list.append(name)

#             if len(filtered_popularity_list) == k:
#                 break

#         recommendation[user_id] = filtered_popularity_list[:k]

# loader.save_likes("linear_regression_recommendation_per_user.json", recommendation)

# Evaluation and Baseline

## Baseline Model

In [166]:
# Because the baseline doesn't need any training, we build it off of the entire set
# We’ll use a standard baseline for ranking latent factor model, which is by always recommending the top most popular places in the testing dataset
# “Popular” means aggregation of features from each places’ metadata; number of reviews * average rating per store

# Preprocessing the data; get the number of reviews per store in the metadata
locations_review_count = defaultdict(int)
locations_avg_rating = defaultdict(int)

# First get the count of all the reviews for each location
for review in reviews:
    locations_review_count[review["gmap_id"]] += 1

# Get the average rating listed in the metadata
for metadata in metadata:
    locations_avg_rating[metadata["gmap_id"]] = metadata["avg_rating"]

# Then multiply the two collected data and fill in the locations_popularity[gmap_id] = number of reviews * average rating
locations_popularity = defaultdict(int)

for gmap_id in locations_review_count:
    locations_popularity[gmap_id] = locations_review_count[gmap_id] * locations_avg_rating[gmap_id]


# Getting the resulting "most popular" list that can be used for the baseline
# Turn the locations_popularity dictionary to list of tuples that we can sort
popularity_list = [(pop, gmap_id) for gmap_id, pop in locations_popularity.items()]

# Sort in reverse order so the most popular place is at the top
popularity_list.sort(reverse=True)

# And then the gmap_id only list
popularity_list_id = [gmap_id for _, gmap_id in popularity_list]


# Building the dictionary to feed to the evaluation function
# recommendation[user_id] = [top k items the model recommend]
recommendation = {}

# Get each user that has reviewed
for review in reviews:
    user_id = review["user_id"]

    # Recommend the top number of hidden reviews for each user
    if user_id not in recommendation:
        # Filter the popularity list so that the users' revealed likes isn't included here
        filtered_popularity_list = []

        for name in popularity_list_id:
            if name not in users_revealed_likes[user_id]:
                filtered_popularity_list.append(name)

            if len(filtered_popularity_list) == TOP_K:
                break

        recommendation[user_id] = filtered_popularity_list[:TOP_K]

# Export to json
save_likes(BASELINE_REC_FNAME, recommendation)

Saved to  eval/baseline_recommendation_per_user.json


## Evaluation

In [167]:
def evalFunc(recs_fname, hidden_fname):
    recs = load_user_likes(recs_fname)
    hidden = load_user_likes(hidden_fname)

    hit_scores = {}  
    high_hit_scores = {}  

    for user_id, hidden_items in hidden.items():
        recommended_items = recs[user_id]

        # count how many hidden items appear in recommendations
        hits = sum(1 for item in hidden_items if item in recommended_items)

        # divide by number of hidden items
        #hit_rate = hits / len(hidden_items)
        
        hit_for_this_user = 1 if hits > 0 else 0
        
        hit_scores[user_id] = hit_for_this_user
        if len(hidden_items) < 2:
            high_hit_scores[user_id] = hit_for_this_user

    # average performance across all users
    overall_score = sum(hit_scores.values()) / len(hit_scores)

    print("Overall Hitrate@k score: ",overall_score)

    max_user = max(hit_scores, key=hit_scores.get)
    max_score = hit_scores[max_user]

    print("Highest score: ",max_score)

    # score when only considering high number of reviews
    overall_high_score = sum(high_hit_scores.values()) / len(high_hit_scores)

    print("Overall high hit score: ", overall_high_score)

In [168]:
# Running the evaluations

print("Evaluation for Iterative Update Model")
evalFunc(ITER_UPDATE_REC_FNAME, HIDDEN_LIKES_DATA_FNAME)

print("Evaluation for BPR Model")
evalFunc(BPR_REC_FNAME, HIDDEN_LIKES_BPR_DATA_FNAME)

print("Evaluation for Baseline Model")
evalFunc(BASELINE_REC_FNAME, HIDDEN_LIKES_DATA_FNAME)

Evaluation for Iterative Update Model
Overall Hitrate@k score:  0.006363479163064153
Highest score:  1
Overall high hit score:  0.005573714739169044
Evaluation for BPR Model
Overall Hitrate@k score:  0.08545410686286335
Highest score:  1
Overall high hit score:  0.08073110285006196
Evaluation for Baseline Model
Overall Hitrate@k score:  0.03219782120006917
Highest score:  1
Overall high hit score:  0.03045704460861186
