In [2]:
import pandas as pd
from bokeh.plotting import figure, output_file, show
import numpy as np
from sklearn.metrics import mean_squared_error
from math import sqrt

In [3]:
lin_df = pd.read_csv("./../predictions/linear_predictions.csv")
rf_df = pd.read_csv("./../predictions/rf_predictions.csv")
lstm_df = pd.read_csv("./../predictions/actual_lstm_predictions.csv")

In [4]:
# calculate RMSE
lin_rmse = sqrt(mean_squared_error(lin_df['y'], lin_df['yhat']))
rf_rmse = sqrt(mean_squared_error(rf_df['y'], rf_df['yhat']))
lstm_rmse = sqrt(mean_squared_error(lstm_df['y'], lstm_df['y_hat']))
print(lin_rmse, rf_rmse, lstm_rmse)

0.03501120695267922 0.02673814413711916 0.029784398272927025


In [5]:
days_df = pd.DataFrame()
days = lin_df['num_days'].unique()
lin_rmse_list = []
rf_rmse_list = []
lstm_rmse_list = []

last_30 = ['2015-11-12', '2015-11-13', '2015-11-14', '2015-11-15', '2015-11-16', '2015-11-17', '2015-11-18',
          '2015-11-19', '2015-11-20', '2015-11-21', '2015-11-22', '2015-11-23', '2015-11-24', '2015-11-25',
          '2015-11-26', '2015-11-27', '2015-11-28', '2015-11-29', '2015-11-30', '2015-12-01', '2015-12-02',
          '2015-12-03', '2015-12-04', '2015-12-05', '2015-12-06', '2015-12-07', '2015-12-08', '2015-12-09',
          '2015-12-10', '2015-12-10']

for day in days:
    t = lin_df[lin_df['num_days'] == day]
    lin_rmse = sqrt(mean_squared_error(t['y'], t['yhat']))
    lin_rmse_list.append(lin_rmse)
    
    l = rf_df[rf_df['num_days'] == day]
    rf_rmse = sqrt(mean_squared_error(l['y'], l['yhat']))
    rf_rmse_list.append(rf_rmse)

    r = lstm_df[lstm_df['14'] == day]
    lstm_rmse = sqrt(mean_squared_error(r['y'], r['y_hat']))
    lstm_rmse_list.append(lstm_rmse)

    
days_df['date'] = pd.to_datetime(last_30)
days_df['lin_rmse'] = lin_rmse_list
days_df['rf_rmse'] = rf_rmse_list
days_df['lstm_rmse'] = lstm_rmse_list

In [6]:
days_df
# df[df['num_days'] == 550]

Unnamed: 0,date,lin_rmse,rf_rmse,lstm_rmse
0,2015-11-12,0.037759,0.041696,0.026072
1,2015-11-13,0.049974,0.012243,0.026031
2,2015-11-14,0.031182,0.033068,0.028072
3,2015-11-15,0.035212,0.032881,0.031839
4,2015-11-16,0.032821,0.027886,0.030471
5,2015-11-17,0.031389,0.020893,0.028213
6,2015-11-18,0.032225,0.019066,0.027191
7,2015-11-19,0.031446,0.020441,0.028561
8,2015-11-20,0.03108,0.020843,0.027737
9,2015-11-21,0.032658,0.023077,0.029304


In [54]:
from bokeh.models import Range1d, Span
output_file("linear_rmse.html")

p = figure(plot_width=400, plot_height=400, x_axis_type='datetime')
p.yaxis.axis_label = 'Root Mean Squared Error'
p.y_range = Range1d(0.011,0.057)

# add a line renderer
p.line(days_df['date'], days_df['lin_rmse'], color='red', line_width=2, legend='Linear regression')
p.line(days_df['date'], days_df['lstm_rmse'], color='orange', line_width=2, legend='LSTM network')
p.line(days_df['date'], days_df['rf_rmse'], color='green', line_width=2, legend='Random Forest')

# add averages
lin_avg = Span(location=0.03501120695267922, line_dash='dashed', line_color='red', dimension='width')
lstm_avg = Span(location=0.029784398272927025, line_dash='dashed', line_color='orange', dimension='width')
rf_avg = Span(location=0.02673814413711916, line_dash='dashed', line_color='green', dimension='width')

p.add_layout(lin_avg)
p.add_layout(lstm_avg)
p.add_layout(rf_avg)

# p.legend.label_text_font_size = "8pt"
# p.legend.location = 'bottom_right'

show(p)