In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import precision_recall_fscore_support, ndcg_score
from statistics import mean

In [2]:
ratings_csv = "../Data/predicted_ratings.csv"
test_csv = "../Data/validation_100k.csv"

In [3]:
def get_basic_recommendations(df):
    recommendation_dict = {}
    unique_users = pd.unique(df.reviewerID)
    for user_id in unique_users:
        user_data = df[df.reviewerID == user_id].sort_values(by = "overall", ascending = False)
        num_recommendations = min(len(user_data), 10)
        recommendation_dict[user_id] = user_data.head(num_recommendations).asin.values
    return recommendation_dict

In [4]:
#ratings_df = pd.read_csv(ratings_csv, low_memory = False)
ratings_df = pd.DataFrame(np.random.randint(1, 6, size=(1000, 3)), columns=["reviewerID", "asin", "overall"])
test_df = pd.DataFrame(np.random.randint(1, 6, size=(100, 3)), columns=["reviewerID", "asin", "overall"])
recommendations = get_basic_recommendations(ratings_df)


In [5]:
test_vals = {}
unique_users = pd.unique(test_df.reviewerID)
for user_id in unique_users:
        user_data = test_df[test_df.reviewerID == user_id]
        test_vals[user_id] = user_data.asin.values


In [6]:
multibinarizer = MultiLabelBinarizer()
y_pred = multibinarizer.fit(recommendations.values()).transform(recommendations.values())
y_true = multibinarizer.transform(test_vals.values())
precision, recall, fscore, support = precision_recall_fscore_support(y_true, y_pred)
ndcg = ndcg_score(y_true, y_pred)

In [7]:
mean_precision = mean(precision)
mean_recall = mean(recall)
mean_fscore = mean(fscore)
print("Precision: {}\nRecall: {}\nF1 Score: {}\nNDCG Score: {}".format(mean_precision, mean_recall, mean_fscore, ndcg))

Precision: 1.0
Recall: 0.88
F1 Score: 0.9333333333333333
NDCG Score: 0.9999999999999998
