### About

- This notebook import the csvs from multiple models cross validation outputs and then compares the performance for each model and channel.
- Plotting the model validation results against the real value allows to validate the model fitting visually. This is done in the last step of the notebook. JPG examples of the results are included in the repo.

In [1]:
#basics
import pandas as pd
import numpy as np
from datetime import datetime
import calendar

#graphs
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt

import sys
sys.path.append('../..')
from src import funciones
from src import prophetaux

#set options
pd.set_option('display.float_format', '{:.2f}'.format)
pd.options.display.max_columns = None
pd.set_option('display.max_rows', 200)

#metrics
from math import sqrt
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

pd.options.mode.chained_assignment = None  # default='warn'

import os
import glob
import pandas as pd

In [3]:
extension = 'csv'
all_filenames = [i for i in glob.glob('*.{}'.format(extension))]
all_filenames
#combine all files in the list
results = pd.concat([pd.read_csv(f) for f in all_filenames ])
#export to csv
#combined_csv.to_csv( "combined_csv.csv", index=False, encoding='utf-8-sig')
results.sort_values(by=['model','ds'],inplace=True)
results = results[(results.sem_dia != 'Sat') & (results.sem_dia != 'Sun')]
results['perc_error'] = abs(results['y'] - results['yhat']) / results['y']
results['perc_error_over'] = (results['y'] - results['yhat']) / results['y']
results['SLA'] = results['perc_error'] < 0.1
results['SLA_over'] = results['perc_error_over'] < 0.1

### Results Group by

In [4]:
groupedby_model =results.groupby('model')['SLA','perc_error','SLA_over'].agg(['sum','median','count']).reset_index().reset_index()
groupedby_model.sort_values(by='model')#.to_csv('posfe_validation.csv')


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



Unnamed: 0_level_0,index,model,SLA,SLA,SLA,perc_error,perc_error,perc_error,SLA_over,SLA_over,SLA_over
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,sum,median,count,sum,median,count,sum,median,count
0,0,AB_1b,0.0,False,9,6.61,0.78,9,0.0,False,9
1,1,AB_2,0.0,False,9,6.06,0.63,9,0.0,False,9
2,2,AB_3,0.0,False,9,7.69,0.91,9,0.0,False,9
3,3,MB_1b,0.0,False,5,1.29,0.25,5,0.0,False,5
4,4,MB_2,2.0,False,5,0.56,0.1,5,3.0,True,5
5,5,MB_3,4.0,True,5,0.4,0.07,5,5.0,True,5


### Plotting

In [5]:
models = results.model.unique()

In [6]:
model_dict = {}
for x in models:
    model_dict["{0}".format(x)] = results[results.model == x]
model_dict.keys()

dict_keys(['AB_1b', 'AB_2', 'AB_3', 'MB_1b', 'MB_2', 'MB_3'])

In [7]:
AB_trx = results[results['model'].str.contains("AB",na = False)].dropna(subset=['ds', 'y']).drop_duplicates(subset=['ds'], keep='last')
MB_trx = results[results['model'].str.contains("MB",na = False)].dropna(subset=['ds', 'y']).drop_duplicates(subset=['ds'], keep='last')

In [None]:
fig = go.Figure()

for i in model_dict:

    fig.add_trace(go.Scatter(x=model_dict[i].ds, y=model_dict[i].yhat , name=i,
                    text=["tweak line smoothness<br>with 'smoothing' in line object"],
                    line_shape='spline'))
   



AB_trx['upper_graph'] = AB_trx.y * 1.1
AB_trx['lower_graph'] = AB_trx.y * 0.9
MB_trx['upper_graph'] = MB_trx.y * 1.1
MB_trx['lower_graph'] = MB_trx.y * 0.9




ab_real1 = fig.add_trace(go.Scatter(x=AB_trx.ds, y=AB_trx.y , name='trx_reales_AB',
                    text=["tweak line smoothness<br>with 'smoothing' in line object"],
                     line=dict(color='black', width=2, dash='dot')))
ab_real2 = fig.add_trace(go.Scatter(x=AB_trx.ds, y=AB_trx['upper_graph'],
                                    name='upper_ab', fill='tonexty', 
                                    fillcolor='rgba(60,60,60,0.1)',
                    text=["tweak line smoothness<br>with 'smoothing' in line object"],
                                    mode='lines', line=dict(width=0)))
ab_real2 = fig.add_trace(go.Scatter(x=AB_trx.ds, y=AB_trx['lower_graph'],
                                    name='lower_ab', fill='tonexty', 
                                    fillcolor='rgba(60,60,60,0.1)',
                    text=["tweak line smoothness<br>with 'smoothing' in line object"],
                                    mode='lines', line=dict(width=0)))




mb_real1 = fig.add_trace(go.Scatter(x=MB_trx.ds, y=MB_trx.y , name='trx_reales_MB',
                    text=["tweak line smoothness<br>with 'smoothing' in line object"],
                     line=dict(color='black', width=2, dash='dot')))
mb_real2 = fig.add_trace(go.Scatter(x=MB_trx.ds, y=MB_trx['upper_graph'],
                                    name='upper_mb', fill='tonexty', 
                                    fillcolor='rgba(60,60,60,0.1)',
                    text=["tweak line smoothness<br>with 'smoothing' in line object"],
                                    mode='lines', line=dict(width=0)))
mb_real2 = fig.add_trace(go.Scatter(x=MB_trx.ds, y=MB_trx['lower_graph'],
                                    name='lower_mb', fill='tonexty', 
                                    fillcolor='rgba(60,60,60,0.1)',
                    text=["tweak line smoothness<br>with 'smoothing' in line object"],
                                    mode='lines', line=dict(width=0)))



fig.update_traces(mode="markers+lines", hovertemplate=None)
fig.update_layout(hovermode="x")

fig.update_layout(legend=dict(y=0.5, traceorder='reversed', font_size=13))

fig.write_html("plot_forecasts_s3.html")

           
fig.show()