In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
bac = pd.read_feather('../input/bac_market_data.feather')

In [None]:
baseline_model = pd.DataFrame({'date': bac['time'].dt.date,
                               'X': 0,
                               'y': bac['y']})

In [None]:
def score_model(X, y):
    adj_X = np.array(X) + 1
    adj_y = np.array(y) + 1
    relative_per_diffs = []
    for val_X, val_y in zip(adj_X, adj_y):
        # calculate relative percent difference
        relative_per_diffs.append( (val_X - val_y) / (np.abs(val_X) + np.abs(val_y)) )
        #print('{:>8.3f} <-> {:>8.3f} = {:>8.3f}'.format(val_X, val_y, relative_per_diffs[-1]))
    # adjust relative percent differnces scale from [-2, 2] to [0, 1]
    # higher scores closer to 1 being more accurate
    relative_per_diffs = 1 - ((np.array(relative_per_diffs) + 1) / 2)
    return list(relative_per_diffs)

In [None]:
baseline_model['scores'] = score_model(baseline_model['X'].values, baseline_model['y'].values)

In [None]:
def plot_vs_time(data_frame, column, calculation='mean', span=10):
    if calculation == 'mean':
        group_temp = data_frame.groupby('date')[column].mean().reset_index()
    if calculation == 'count':
        group_temp = data_frame.groupby('date')[column].count().reset_index()
    if calculation == 'nunique':
        group_temp = data_frame.groupby('date')[column].nunique().reset_index()
    group_temp = group_temp.ewm(span=span).mean()
    fig = plt.figure(figsize=(10,3))
    plt.plot(group_temp['date'], group_temp[column])
    plt.xlabel('Time')
    plt.ylabel(column)
    plt.ylim((0,1))
    plt.title('%s versus time' %column)

In [None]:
plot_vs_time(baseline_model, 'scores')

In [None]:
print('Baseline model score is {:.5f}'.format(baseline_model['scores'].mean()))