In [3]:
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
import numpy as np

from tqdm import tqdm
import os

import seaborn as sns
sns.set_theme(style="whitegrid")
predictions = pd.read_csv("~/Downloads/predictions.csv")

In [4]:
predictions['pred_diff'] = predictions['y_pred'] - predictions['y_true']
predictions['per_error'] = abs(predictions['pred_diff']) / predictions['y_true']
predictions = predictions[predictions['policy'].isin(['total_error_cold', 'min_past'])]

In [5]:
# Remove Outliers
from scipy import stats
predictions = predictions[(np.abs(stats.zscore(predictions['per_error'])) < 3)]

In [6]:
predictions.groupby('policy').mean()['per_error']

policy
min_past            0.256342
total_error_cold    0.255298
Name: per_error, dtype: float64

In [7]:
overall_dataset_per_diff = 0.256342 - 0.255298

Now we look at the dataset we've trained on, and whether that influences the percent error difference across policies.

In [9]:
past_updates =  pd.read_pickle('~/Downloads/past_updates-2.pkl')

In [10]:
trained_predictions = predictions[predictions['user_id'].isin(past_updates)]
untrained_predictions = predictions[~predictions['user_id'].isin(past_updates)]

In [11]:
trained_predictions.groupby('policy').mean()['per_error']

policy
min_past            0.248708
total_error_cold    0.238725
Name: per_error, dtype: float64

In [12]:
trained_dataset_per_diff = 0.248708 - 0.238725

In [13]:
untrained_predictions.groupby('policy').mean()['per_error']

policy
min_past            0.256430
total_error_cold    0.255489
Name: per_error, dtype: float64

In [14]:
untrained_dataset_per_diff = 0.256430 - 0.255489

Now we look for whether bursty periods of time influence the percent error difference. I've defined bursty to be more than 200 ratings/timestamp, but you can change it to be whatever threshold you think is appropriate.

In [15]:
threshold = 200

In [16]:
temp_table = predictions[predictions['policy'] == 'min_past'].groupby('timestamp').count().sort_values('user_id', ascending = False)

In [17]:
timestamp_num = {}
for index, row in temp_table.iterrows():
    timestamp_num[index] = row['user_id']

In [18]:
new_table = predictions.copy(deep=True)

In [19]:
new_table['num_per_timestamp'] = new_table.apply(lambda x: timestamp_num[x['timestamp']], axis=1)

In [20]:
above_threshold = new_table[new_table['num_per_timestamp'] > threshold]

In [21]:
above_threshold.groupby('policy').mean()['per_error']

policy
min_past            0.257234
total_error_cold    0.256289
Name: per_error, dtype: float64

In [22]:
bursty_dataset_per_diff = 0.257234 - 0.256289

In [24]:
print('Overall Dataset Diff in Percent Errors: {:0.4f}\nTrained Dataset Diff in Percent Errors: {:0.4f}\nUntrained Dataset Diff in Percent Errors: {:0.4f}\nBursty Dataset Diff in Percent Errors: {:0.4f}\n'.format(overall_dataset_per_diff,trained_dataset_per_diff, untrained_dataset_per_diff,bursty_dataset_per_diff))

Overall Dataset Diff in Percent Errors: 0.0010
Trained Dataset Diff in Percent Errors: 0.0100
Untrained Dataset Diff in Percent Errors: 0.0009
Bursty Dataset Diff in Percent Errors: 0.0009

