In [None]:
import pandas as pd
import json
import datetime
import time
import os
import numpy as np
from lgbm_consumption_module import data_preprocessing_interventions, lgbm_regression_efecto_acumulado_con_linea_base_del_experimento
import matplotlib
from auxiliary import graph_check_cumulative_bsts, graph_check_gbm_dist, graph_check_gbm_timeseries, prepare_data_synthetic_bsts, prepare_data_control_bsts
from config import DATA_GBM_CONSUMPTION_PROCESSED_FILE_AGGREGATED
import seaborn as sns
from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings('ignore')
from causalimpact import CausalImpact

In [None]:
#get data in format ready
data_mean = data_preprocessing_interventions()

In [None]:
get_best_features = False
get_best_parameters = False
use_best_features = True
for alpha in ["0.975", "0.50", "0.025"]:
    print("QUARTILE {}".format(alpha))
    data_mean = lgbm_regression_efecto_acumulado_con_linea_base_del_experimento(alpha,
                                                                    data_mean,
                                                                    get_best_parameters=get_best_parameters,
                                                                    get_best_features=get_best_features,
                                                                    use_best_features=use_best_features)

In [None]:
# now calculate BSTS linear model one covatiate (GBM) and check plots
X_names = ['GBM_consumption_kWh_0.50','GBM_consumption_kWh_0.975','GBM_consumption_kWh_0.025']#
graph_check_cumulative_bsts(data_mean, X_names)

In [None]:
interventions_list = data_mean['INTERVENTION'].unique()
data_mean['valid1lp'] = np.log1p(data_mean['GBM_consumption_kWh_0.50'])
data_mean['measured1lp'] = np.log1p(data_mean['CONSUMPTION_kWh'])
total_rmsle = 0.0
for intervention in interventions_list:
    test = data_mean[data_mean['INTERVENTION']==intervention]
    rmsle = round(np.sqrt(mean_squared_error(test.measured1lp, test.valid1lp)),4)
    print("the RSMLE for intervention {} is {}".format(intervention, rmsle))
    total_rmsle += rmsle
total_rmsle = round(total_rmsle/len(interventions_list),4)
print("THE AVERAGE RSMLE FOR ALL INTERVENTIONS IS {}".format(total_rmsle))

In [None]:
# Check behavior of timeseries per group (observation and GBM model)
X_names = ['GBM_consumption_kWh_0.50']
graph_check_gbm_timeseries(data_mean, X_names, y_limits=[7.5,20])

In [None]:
# Check behavior of timeseries per group distribution (observation and GBM model)
X_names = ['GBM_consumption_kWh_0.50']
graph_check_gbm_dist(data_mean, X_names)

In [None]:
data_mean.to_csv(DATA_GBM_CONSUMPTION_PROCESSED_FILE_AGGREGATED)