In [1]:
import pandas as pd

import eval_fns

In [None]:
root = '../KuaiRec 2.0/'

prediction_scores_caption = pd.read_csv(root + 'recommendations/recommendations_caption_2.csv', index_col=0)
prediction_scores_ncf = pd.read_csv(root + 'recommendations/ncf_predictions_combined.csv')
prediction_scores_random = pd.read_csv(root + 'recommendations/recommendations_random.csv', index_col=0)
joined_train_data = pd.read_csv(root + 'data_exports/joined_train_data.csv')
joined_val_data = pd.read_csv(root + 'data_exports/joined_val_data.csv')

video_data = pd.read_csv(root + 'data/kuairec_caption_category_translated.csv', index_col=0)

In [3]:
# Rename
prediction_scores_caption = prediction_scores_caption.rename(columns={'watch_ratio': 'predicted_watch_ratio'})
prediction_scores_ncf = prediction_scores_ncf.rename(columns={'watch_ratio': 'predicted_watch_ratio'})
prediction_scores_random = prediction_scores_random.rename(columns={'watch_ratio': 'predicted_watch_ratio'})

# Sort predictions
prediction_scores_caption = prediction_scores_caption.sort_values(by=['user_id', 'video_id'])
prediction_scores_ncf = prediction_scores_ncf.sort_values(by=['user_id', 'video_id'])
prediction_scores_random = prediction_scores_random.sort_values(by=['user_id', 'video_id'])
prediction_scores_caption

Unnamed: 0,user_id,video_id,predicted_watch_ratio
0,14,0,4.739207
1,14,1,4.587039
2,14,2,4.758113
3,14,3,4.664595
4,14,4,4.527781
...,...,...,...
12585121,7162,10723,4.786916
12585122,7162,10724,4.605312
12585123,7162,10725,4.621752
12585124,7162,10726,4.626483


## Get user watch history

We want to be able to filter out videos that the user has already watched. This is so that we recommend new videos instead.

In [4]:
user_watch_history = eval_fns.get_user_watch_history(joined_train_data)

## Getting ground truth videos for each user

In [5]:
users_in_train_data = set(joined_train_data['user_id'])
videos_in_train_data = set(joined_train_data['video_id'])

ground_truth = eval_fns.get_ground_truth(joined_val_data[['user_id', 'video_id', 'watch_ratio']], users_in_train_data, videos_in_train_data, user_watch_history)

In [6]:
# Ground truth scores for user 14
ground_truth[ground_truth['user_id'] == 14]

Unnamed: 0,user_id,video_id,watch_ratio
11,14,8766,3.318871
702,14,8799,3.185954
607,14,2735,2.598506
602,14,4201,2.478148
573,14,4015,2.319912
...,...,...,...
131,14,7297,0.032396
991,14,4021,0.032293
180,14,4141,0.032250
61,14,7461,0.029277


## Getting recommendations for each user

In [7]:
videos_in_val_data = set(joined_val_data['video_id'])

recommendations_caption = eval_fns.get_user_recommendations(prediction_scores_caption, videos_in_val_data, user_watch_history)
recommendations_ncf = eval_fns.get_user_recommendations(prediction_scores_ncf, videos_in_val_data, user_watch_history)
recommendations_random = eval_fns.get_user_recommendations(prediction_scores_random, videos_in_val_data, user_watch_history)

100%|██████████| 1411/1411 [00:01<00:00, 759.80it/s]
100%|██████████| 1411/1411 [00:02<00:00, 572.86it/s]
100%|██████████| 1411/1411 [00:01<00:00, 808.15it/s]


In [8]:
# Recommendations based on caption for user 14
recommendations_caption[recommendations_caption['user_id'] == 14]

Unnamed: 0,user_id,video_id,predicted_watch_ratio
2327,14,2966,4.821256
6113,14,7485,4.812589
3407,14,4318,4.810647
6351,14,7723,4.809793
499,14,825,4.804419
...,...,...,...
6166,14,7538,4.253353
7074,14,8586,4.253353
1931,14,2546,4.248945
5883,14,7207,4.240262


In [9]:
reco_grp_caption = recommendations_caption.groupby('user_id')
reco_grp_ncf = recommendations_ncf.groupby('user_id')
reco_grp_random = recommendations_random.groupby('user_id')
ground_truth_grp = ground_truth.groupby('user_id')

In [10]:
k = 50

# Get top 50 ground truth and recommendations for user 14
top_50_ground_truth_user_14 = eval_fns.get_top_k_for_user(k, 14, ground_truth)
top_50_recommendations_user_14_caption = eval_fns.get_top_k_for_user(k, 14, reco_grp_caption)
top_50_recommendations_user_14_ncf = eval_fns.get_top_k_for_user(k, 14, reco_grp_ncf)
top_50_recommendations_user_14_random = eval_fns.get_top_k_for_user(k, 14, reco_grp_random)

# top_50_ground_truth_user_14
top_50_recommendations_user_14_caption

Unnamed: 0,user_id,video_id,predicted_watch_ratio
2327,14,2966,4.821256
6113,14,7485,4.812589
3407,14,4318,4.810647
6351,14,7723,4.809793
499,14,825,4.804419
926,14,1268,4.803011
578,14,918,4.802865
983,14,1325,4.802204
3249,14,4122,4.801659
4603,14,5743,4.799829


## Calcuation of Evaluation Metrics

### Category-Aware NDCG@k

In [11]:
# Get the category-aware NDCG@50 for user 14
print(eval_fns.get_category_ndcg_at_k(top_50_recommendations_user_14_caption, top_50_ground_truth_user_14, video_data))
print(eval_fns.get_category_ndcg_at_k(top_50_recommendations_user_14_ncf, top_50_ground_truth_user_14, video_data))
print(eval_fns.get_category_ndcg_at_k(top_50_recommendations_user_14_random, top_50_ground_truth_user_14, video_data))

0.7635716414327793
0.7618081067801695
0.9239853220909193


### Distinct Categories @ k

In [12]:
# Get Distinct Categories @ 50 for user 14
print(eval_fns.get_user_distinct_categories_at_k(50, 14, recommendations_caption, video_data))
print(eval_fns.get_user_distinct_categories_at_k(50, 14, recommendations_ncf, video_data))
print(eval_fns.get_user_distinct_categories_at_k(50, 14, recommendations_random, video_data))

16
19
22


### Average watch ratio @ k

In [13]:
# Get avg watch ratio @ 50 for user 14
print(eval_fns.get_user_avg_watch_ratio_at_k(50, 14, reco_grp_caption, 'watch_ratio', ground_truth_grp))
print(eval_fns.get_user_avg_watch_ratio_at_k(50, 14, reco_grp_ncf, 'watch_ratio', ground_truth_grp))
print(eval_fns.get_user_avg_watch_ratio_at_k(50, 14, reco_grp_random, 'watch_ratio', ground_truth_grp))

1.041527408845175
0.9537275806478508
0.99515572098966


### Precision@k, Recall@k, F1Score@k

In [14]:
threshold = 0.7
k = 500

# Precision, recall, and F1 @ 500 for user 14
precision, recall, f1 = eval_fns.get_user_precision_recall_f1_at_k(k, 14, reco_grp_caption, ground_truth, threshold)
precision_ncf, recall_ncf, f1_ncf = eval_fns.get_user_precision_recall_f1_at_k(k, 14, reco_grp_ncf, ground_truth, threshold)
precision_random, recall_random, f1_random = eval_fns.get_user_precision_recall_f1_at_k(k, 14, reco_grp_random, ground_truth, threshold)
print(precision, recall, f1)
print(precision_ncf, recall_ncf, f1_ncf)
print(precision_random, recall_random, f1_random)

0.568 0.9562289562289562 0.7126725219573399
0.58 0.9764309764309764 0.7277289836888331
0.576 0.9696969696969697 0.7227101631116687


In [15]:
k1 = 50
k2 = 500
threshold = 0.7

metrics_df_caption = eval_fns.get_all_metrics(k1, k2, ground_truth_grp, reco_grp_caption, video_data, threshold, by_cluster = False)
metrics_df_ncf = eval_fns.get_all_metrics(k1, k2, ground_truth_grp, reco_grp_ncf, video_data, threshold, by_cluster = False)
metrics_df_random = eval_fns.get_all_metrics(k1, k2, ground_truth_grp, reco_grp_random, video_data, threshold, by_cluster = False)

100%|██████████| 1411/1411 [00:31<00:00, 44.72it/s]
100%|██████████| 1411/1411 [00:15<00:00, 90.64it/s]
100%|██████████| 1411/1411 [00:01<00:00, 1370.20it/s]
100%|██████████| 1411/1411 [01:40<00:00, 13.99it/s]
100%|██████████| 1411/1411 [00:31<00:00, 45.43it/s]
100%|██████████| 1411/1411 [00:15<00:00, 91.79it/s]
100%|██████████| 1411/1411 [00:00<00:00, 1433.54it/s]
100%|██████████| 1411/1411 [01:40<00:00, 14.06it/s]
100%|██████████| 1411/1411 [00:31<00:00, 44.61it/s]
100%|██████████| 1411/1411 [00:15<00:00, 90.11it/s]
100%|██████████| 1411/1411 [00:01<00:00, 1368.31it/s]
100%|██████████| 1411/1411 [01:40<00:00, 14.00it/s]


In [16]:
metrics_df_copy = metrics_df_caption.copy()

# concatenate
metrics_combined = pd.concat([metrics_df_caption, metrics_df_ncf, metrics_df_random], axis=0)

# add column for the type of recommendation
metrics_combined['recommendation_type'] = ['caption', 'ncf', 'random']

# make it appear to the left
cols = metrics_combined.columns.tolist()
cols = cols[-1:] + cols[:-1]
metrics_combined = metrics_combined[cols]

metrics_combined

Unnamed: 0,recommendation_type,cluster,NDCG@50,Distinct Categories @ 50,Avg Watch Ratio @ 50,Avg Precision@500,Avg Recall@500,Avg F1@500
0,caption,Overall,0.83634,15.778172,0.877204,0.556102,0.984704,0.702779
0,ncf,Overall,0.873789,21.484054,0.87726,0.561729,0.977443,0.705414
0,random,Overall,0.880532,20.600283,0.842897,0.55613,0.984886,0.702854


In [17]:
k1 = 50
k2 = 500
threshold = 0.7

metrics_ncf_per_cluster = eval_fns.get_all_metrics(k1, k2, ground_truth, recommendations_ncf, video_data, threshold, by_cluster=True)

100%|██████████| 269/269 [00:19<00:00, 13.91it/s]
100%|██████████| 419/419 [00:30<00:00, 13.89it/s]
100%|██████████| 345/345 [00:24<00:00, 14.12it/s]
100%|██████████| 378/378 [00:26<00:00, 14.13it/s]
100%|██████████| 269/269 [00:03<00:00, 88.81it/s]
100%|██████████| 419/419 [00:04<00:00, 89.81it/s]
100%|██████████| 345/345 [00:03<00:00, 88.94it/s]
100%|██████████| 378/378 [00:04<00:00, 88.39it/s]
100%|██████████| 269/269 [00:08<00:00, 31.35it/s]
100%|██████████| 419/419 [00:13<00:00, 31.27it/s]
100%|██████████| 345/345 [00:11<00:00, 31.22it/s]
100%|██████████| 378/378 [00:12<00:00, 31.38it/s]
100%|██████████| 269/269 [00:27<00:00,  9.94it/s]
100%|██████████| 419/419 [00:43<00:00,  9.66it/s]
100%|██████████| 345/345 [00:33<00:00, 10.24it/s]
100%|██████████| 378/378 [00:37<00:00, 10.07it/s]


In [18]:
metrics_ncf_per_cluster

Unnamed: 0,cluster,NDCG@50,Distinct Categories @ 50,Avg Watch Ratio @ 50,Avg Precision@500,Avg Recall@500,Avg F1@500
0,0,0.881977,20.349442,0.901299,0.548727,0.985713,0.696482
0,1,0.874977,22.393795,0.86303,0.574838,0.973073,0.714973
0,2,0.866198,21.136232,0.855349,0.552932,0.967328,0.696145
0,3,0.873572,21.600529,0.895924,0.56448,0.985632,0.709633
0,Overall,0.873789,21.484054,0.87726,0.561729,0.977443,0.705414
