___
# Compute prediction errors for model comparison

In [1]:
import pandas as pd
import numpy as np
import os

In [3]:
# Read forecasts for all models stored in forecasts.csv
forecasts = pd.read_csv('forecasts/forecasts.csv').set_index('Month')

# Per model
for column in ['Mean_Forecast','ARIMA_Forecast','RNN_Forecast']:
    model = column[:-9]
    # MAE
    forecasts[model+'_AbsError'] = pd.Series.abs(forecasts[column]-forecasts.AmountConsumed)
    # MAPE
    forecasts[model+'_PerError'] = pd.Series.abs(forecasts[column]-forecasts.AmountConsumed)/forecasts.AmountConsumed*100

error_columns = [column for column in forecasts.columns if 'Error' in column]
error_columns = np.append('MedCode', error_columns, axis=None)

# Infinites are replaced for NaNs, since they are caused by NaN values and are just computation errors
errors = forecasts.loc[['2020-01-01','2020-02-01','2020-03-01'], error_columns].replace(np.inf, np.nan)
errors = errors.reset_index().sort_values(['MedCode','Month']).set_index('Month')
errors.to_csv('forecasts/prediction_errors.csv')
errors

Unnamed: 0_level_0,MedCode,Mean_AbsError,Mean_PerError,ARIMA_AbsError,ARIMA_PerError,RNN_AbsError,RNN_PerError
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-01-01,3300115,29.5,737.5,0.472522,11.813046,3.293327,82.333171
2020-02-01,3300115,31.5,630.0,0.566291,11.325819,5.0,100.0
2020-03-01,3300115,30.5,277.272727,2.892463,26.295114,11.0,100.0
2020-01-01,3300142,1.0,25.0,1.559689,38.992227,1.460683,36.517082
2020-02-01,3300142,2.0,40.0,1.272373,25.447465,0.875637,17.512732
2020-03-01,3300142,4.5,225.0,2.296794,114.839722,2.076986,103.849299
2020-01-01,3300663,9.5,27.142857,17.512064,50.034468,10.041333,28.689523
2020-02-01,3300663,13.0,260.0,12.034169,240.683373,12.87925,257.585001
2020-03-01,3300663,39.0,325.0,5.642763,47.023024,8.972961,74.774672
2020-01-01,3301771,39.5,54.861111,6.027266,8.371203,1.472908,2.045706


___
## Timeseries forecasts comparison between models

This section is ploted using Plotly for interactivity, the plots were later exported to the web site Dashboard. The code is intentionally set to markdown because Plotly presents some bugs in some systems where plots are not displayed. Fixed images are shown instead. If needed, the code blocks can be uncommented and executed to see the interactive plots.

```python
import plotly.express as px
import plotly.graph_objects as go

forecasts = pd.read_csv('forecasts/forecasts.csv').set_index('Month')
errors    = pd.read_csv('forecasts/prediction_errors.csv').set_index('Month')
```

```python
# Show timeseries and forecasts with barplots of the error per medicine

for MedCode in forecasts.MedCode.unique():
    
    # Timeseries line plot
    plot_df = forecasts[forecasts.MedCode==MedCode]
    line_plot = go.Figure()
    for column in ['Mean_Forecast','ARIMA_Forecast','RNN_Forecast','AmountConsumed']:
        line_plot.add_trace(go.Scatter(x=plot_df.index, y=plot_df[column], mode='lines+markers', name=column))
    MedDescription = plot_df.MedDescription.unique()[0]
    line_plot.update_layout(title=MedDescription,
                            xaxis_title='Date', yaxis_title='Consumed Amount',
                            autosize=False, width=1000, height=450)
    line_plot.show()
    
    # Errors bar plot
    plot_errors = errors[errors.MedCode==MedCode].mean()
    colors = ['#636efa','#ef553b','#00cc96']
    # MAE
    mae_bar = go.Figure()
    mae_error = plot_errors[['Mean_AbsError','ARIMA_AbsError','RNN_AbsError']]
    mae_bar.add_trace(go.Bar(x=['Mean','ARIMA','RNN'], y=mae_error,
                             text=mae_error, name='MAE', marker_color=colors)
                     )
    # MAPE
    mape_bar = go.Figure()
    mape_error = plot_errors[['Mean_PerError','ARIMA_PerError','RNN_PerError']]
    mape_bar.add_trace(go.Bar(x=['Mean','ARIMA','RNN'], y=mape_error,
                              text=mape_error, name='MAPE', marker_color=colors)
                      )
    # Format
    mae_bar.update_traces(texttemplate='%{value:.1f}', textposition='auto')
    mae_bar.update_layout(title='Forecast MAE per model', barmode='group',
                          xaxis_title='Model',
                          yaxis_title='Mean Absolute Error',
                          autosize=False, width=500, height=450)
    mape_bar.update_traces(texttemplate='%{value:.1f}%', textposition='auto')
    mape_bar.update_layout(title='Forecast MAPE per model', barmode='group',
                           xaxis_title='Model',
                           yaxis_title='Mean Absolute Percentage Error [%]',
                           autosize=False, width=500, height=450)
    mae_bar.show()
    mape_bar.show()
```

<img src="plotly/line_plot.png">

<img src="plotly/mae_bar.png">

<img src="plotly/mape_bar.png">

___

## Overall errors

```python
# Average errors for every drug and make a boxplot of errors per model
error_overall = errors.groupby('MedCode').mean()

mae_overall = go.Figure()
mape_overall = go.Figure()

for model in ['Mean','ARIMA','RNN']:
    # MAE
    mae_model = error_overall[model+'_AbsError']
    mae_overall.add_trace(go.Box(y=mae_model, name=model))
    # MAPE
    mape_model = error_overall[model+'_PerError']
    mape_overall.add_trace(go.Box(y=mape_model, name=model))
    
mae_overall.update_layout(title='Overall Mean Absolute Error per model',
                          xaxis_title='Prediction model',
                          yaxis_title='Mean Absolute Error',
                          autosize=False, width=700, height=450)
mape_overall.update_layout(title='Overall Mean Absolute Percentage Error per model',
                           xaxis_title='Prediction model',
                           yaxis_title='Mean Absolute Percentage Error [%]',
                           autosize=False, width=700, height=450)
mae_overall.show()
mape_overall.show()
```

<img src="plotly/mae_overall.png">

<img src="plotly/mape_overall.png">

## Errors per month

```python
# Group errors by month

mae_monthly = go.Figure()
mape_monthly = go.Figure()

for model in ['Mean','ARIMA','RNN']:
    # MAE
    mae_model = errors[model+'_AbsError']
    mae_monthly.add_trace(go.Box(x=errors.index.str[:-3], y=mae_model, name=model))
    # MAPE
    mape_model = errors[model+'_PerError']
    mape_monthly.add_trace(go.Box(x=errors.index.str[:-3], y=mape_model, name=model))
    
mae_monthly.update_layout(title='Mean Absolute Error per month',
                          boxmode='group', xaxis_type='category',
                          xaxis_title='Forecasted month',
                          yaxis_title='Mean Absolute Error',
                          autosize=False, width=700, height=450)
mape_monthly.update_layout(title='Mean Absolute Percentage Error per month',
                           boxmode='group', xaxis_type='category',
                           xaxis_title='Forecasted month',
                           yaxis_title='Mean Absolute Percentage Error [%]',
                           autosize=False, width=700, height=450)
mae_monthly.show()
mape_monthly.show()
```

<img src="plotly/mae_monthly.png">

<img src="plotly/mape_monthly.png">

## Average error per month

```python
# Average errors per month

error_avg = errors.groupby(errors.index).mean()

avg_mae_monthly = go.Figure()
avg_mape_monthly = go.Figure()

for model in ['Mean','ARIMA','RNN']:
    # MAE
    avg_mae_model = error_avg[model+'_AbsError']
    avg_mae_monthly.add_trace(go.Scatter(x=error_avg.index.str[:-3], y=avg_mae_model, mode='lines+markers', name=model))
    # MAPE
    avg_mape_model = error_avg[model+'_PerError']
    avg_mape_monthly.add_trace(go.Scatter(x=error_avg.index.str[:-3], y=avg_mape_model, mode='lines+markers', name=model))
    
avg_mae_monthly.update_layout(title='Average MAE per month',
                          boxmode='group', xaxis_type='category',
                          xaxis_title='Forecasted month',
                          yaxis_title='Average MAE',
                          autosize=False, width=700, height=450)
avg_mape_monthly.update_layout(title='Average MAPE per month',
                           boxmode='group', xaxis_type='category',
                           xaxis_title='Forecasted month',
                           yaxis_title='Average MAPE [%]',
                           autosize=False, width=700, height=450)
avg_mae_monthly.show()
avg_mape_monthly.show()
```

<img src="plotly/avg_mae_monthly.png">

<img src="plotly/avg_mape_monthly.png">