In [69]:
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
import numpy as np

from tqdm import tqdm
import os

import seaborn as sns
sns.set_theme(style="whitegrid")
predictions = pd.read_csv("~/Downloads/predictions.csv")

In [70]:
predictions['pred_diff'] = predictions['y_pred'] - predictions['y_true']
predictions['diff_sq'] = predictions['pred_diff'] ** 2
predictions = predictions[predictions['policy'].isin(['total_error_cold', 'min_past'])]

In [71]:
predictions

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,y_true,y_pred,user_id,movie_id,timestamp,policy,updates,ts_factor,pred_diff,diff_sq
900000,0,0,3,3.715949,895,223,0,total_error_cold,0.5,60,0.715949,0.512583
900001,1,1,5,4.496678,895,2336,0,total_error_cold,0.5,60,-0.503322,0.253333
900002,2,2,4,3.805784,895,2369,0,total_error_cold,0.5,60,-0.194216,0.037720
900003,3,3,2,3.715616,895,1911,0,total_error_cold,0.5,60,1.715616,2.943338
900004,4,4,3,4.349813,895,2599,0,total_error_cold,0.5,60,1.349813,1.821995
...,...,...,...,...,...,...,...,...,...,...,...,...
4499995,99995,99995,4,3.526553,319,2671,20018,min_past,8.0,60,-0.473447,0.224152
4499996,99996,99996,3,3.518007,319,2759,20018,min_past,8.0,60,0.518007,0.268331
4499997,99997,99997,2,4.104482,326,1263,20018,min_past,8.0,60,2.104482,4.428844
4499998,99998,99998,3,4.517343,326,1193,20018,min_past,8.0,60,1.517343,2.302330


In [72]:
# Remove Outliers
from scipy import stats
predictions = predictions[(np.abs(stats.zscore(predictions['diff_sq'])) < 3)]

In [73]:
predictions

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,y_true,y_pred,user_id,movie_id,timestamp,policy,updates,ts_factor,pred_diff,diff_sq
900000,0,0,3,3.715949,895,223,0,total_error_cold,0.5,60,0.715949,0.512583
900001,1,1,5,4.496678,895,2336,0,total_error_cold,0.5,60,-0.503322,0.253333
900002,2,2,4,3.805784,895,2369,0,total_error_cold,0.5,60,-0.194216,0.037720
900003,3,3,2,3.715616,895,1911,0,total_error_cold,0.5,60,1.715616,2.943338
900004,4,4,3,4.349813,895,2599,0,total_error_cold,0.5,60,1.349813,1.821995
...,...,...,...,...,...,...,...,...,...,...,...,...
4499995,99995,99995,4,3.526553,319,2671,20018,min_past,8.0,60,-0.473447,0.224152
4499996,99996,99996,3,3.518007,319,2759,20018,min_past,8.0,60,0.518007,0.268331
4499997,99997,99997,2,4.104482,326,1263,20018,min_past,8.0,60,2.104482,4.428844
4499998,99998,99998,3,4.517343,326,1193,20018,min_past,8.0,60,1.517343,2.302330


In [74]:
min_past_error = predictions[predictions['policy'] == 'min_past'].groupby('updates').mean()['diff_sq']

In [75]:
error_cold_error = predictions[predictions['policy'] == 'total_error_cold'].groupby('updates').mean()['diff_sq']

In [76]:
overall_dataset_per_diff = min_past_error - error_cold_error

Now we look at the dataset we've trained on, and whether that influences the percent error difference across policies.

In [77]:
past_updates =  pd.read_pickle('~/Downloads/past_updates-2.pkl')

In [78]:
trained_predictions = predictions[predictions['user_id'].isin(past_updates)]
untrained_predictions = predictions[~predictions['user_id'].isin(past_updates)]

In [79]:
min_past_train_error = trained_predictions[trained_predictions['policy'] == 'min_past'].groupby('updates').mean()['diff_sq']
cold_train_error = trained_predictions[trained_predictions['policy'] == 'total_error_cold'].groupby('updates').mean()['diff_sq']

In [80]:
trained_dataset_per_diff = min_past_train_error - cold_train_error

In [81]:
min_past_untrain_error = untrained_predictions[untrained_predictions['policy'] == 'min_past'].groupby('updates').mean()['diff_sq']
cold_untrain_error = untrained_predictions[untrained_predictions['policy'] == 'total_error_cold'].groupby('updates').mean()['diff_sq']

In [82]:
untrained_dataset_per_diff = min_past_untrain_error - cold_untrain_error

Now we look for whether bursty periods of time influence the percent error difference. I've defined bursty to be more than 200 ratings/timestamp, but you can change it to be whatever threshold you think is appropriate.

In [83]:
threshold = 200

In [84]:
temp_table = predictions[predictions['policy'] == 'min_past'].groupby('timestamp').count().sort_values('user_id', ascending = False)

In [85]:
timestamp_num = {}
for index, row in temp_table.iterrows():
    timestamp_num[index] = row['user_id']

In [86]:
new_table = predictions.copy(deep=True)

In [87]:
new_table['num_per_timestamp'] = new_table.apply(lambda x: timestamp_num[x['timestamp']], axis=1)

In [88]:
above_threshold = new_table[new_table['num_per_timestamp'] > threshold]

In [89]:
min_past_above_error = above_threshold[above_threshold['policy'] == 'min_past'].groupby('updates').mean()['diff_sq']
cold_above_error = above_threshold[above_threshold['policy'] == 'total_error_cold'].groupby('updates').mean()['diff_sq']

In [90]:
bursty_dataset_per_diff = min_past_above_error - cold_above_error

In [91]:
d = {"updates": sorted(list(set(predictions['updates']))), 'overall': overall_dataset_per_diff, 'trained': trained_dataset_per_diff, 'untrained': untrained_dataset_per_diff, 'bursty': bursty_dataset_per_diff}
conclusion_df = pd.DataFrame(data=d).drop('updates', axis=1)

In [92]:
conclusion_df

Unnamed: 0_level_0,overall,trained,untrained,bursty
updates,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.2,0.009638115,0.2088318,0.007353174,0.004201531
0.25,0.01007647,0.2378742,0.007476865,0.005819464
0.5,0.005643904,-0.01328223,0.005857535,0.009365993
1.0,0.004871532,0.05240128,0.004323165,0.01226468
2.0,0.001984951,0.0005699049,0.002000418,0.00487617
3.0,0.0003528289,0.0004838348,0.0003513234,0.00108877
4.0,-0.0002153553,-0.001969289,-0.0001940934,-0.0006507257
5.0,-0.0002023452,0.0009489643,-0.0002156128,-0.0007240916
8.0,4.555895e-08,-1.946566e-08,4.630686e-08,3.098897e-07
