# Analyzing the forecast confidence interval

In [1]:
import numpy as np
import pandas as pd
from code.util import calculate_confidence_interval

## Reading data

In [2]:
df_forecasting = pd.read_csv('data/df_forecasting.csv')
df_forecasting['max_date_to_fit'] = pd.to_datetime(df_forecasting['max_date_to_fit']) 
df_forecasting['date'] = pd.to_datetime(df_forecasting['date'])

## Calculating 

In [3]:
df_forecasting.columns

Index(['Unnamed: 0', 'forecast', 'municipality', 'municipality_id',
       'max_date_to_fit', 'date', 'execution'],
      dtype='object')

In [4]:
df_confidence_interval = pd.DataFrame()
for municipality in df_forecasting['municipality'].unique():    
    df_municipality = df_forecasting.loc[df_forecasting['municipality'] == municipality]
    df_municipality = df_municipality.sort_values('date')
    municipality_id = df_municipality['municipality_id'].values[0]
    for date in df_municipality['date'].unique():
        df_municipality_date = df_municipality.loc[df_municipality['date'] == date]
        max_date_to_fit = df_municipality_date['max_date_to_fit'].values[0]
        forecast = df_municipality_date['forecast'].values
        lower_bound, upper_bound = calculate_confidence_interval(forecast)
        mean = np.mean(forecast)
        entry = {'municipality': municipality, 'municipality_id': municipality_id, 'date': date, 'forecast': mean, 'lower_bound': lower_bound, 'upper_bound': upper_bound, 'max_date_to_fit': max_date_to_fit}
        df_confidence_interval = pd.concat([df_confidence_interval, pd.DataFrame.from_records([entry])])

## Saving data

In [5]:
df_confidence_interval.to_csv('data/df_forecast_death_confidence_interval.csv')

In [6]:
df_confidence_interval

Unnamed: 0,municipality,municipality_id,date,forecast,lower_bound,upper_bound,max_date_to_fit
0,Aparecida de Goiânia,520140,2020-04-26,0.073103,0.070685,0.075141,2020-04-25
0,Aparecida de Goiânia,520140,2020-04-27,0.071621,0.069892,0.073442,2020-04-25
0,Aparecida de Goiânia,520140,2020-04-28,0.073044,0.071063,0.075218,2020-04-25
0,Aparecida de Goiânia,520140,2020-04-29,0.071074,0.068863,0.073110,2020-04-25
0,Aparecida de Goiânia,520140,2020-04-30,0.071983,0.069963,0.074003,2020-04-25
...,...,...,...,...,...,...,...
0,Uberlândia,317020,2022-05-17,0.166675,0.164570,0.168552,2022-02-26
0,Uberlândia,317020,2022-05-18,0.165722,0.164023,0.167560,2022-02-26
0,Uberlândia,317020,2022-05-19,0.164480,0.162974,0.166117,2022-02-26
0,Uberlândia,317020,2022-05-20,0.164638,0.163272,0.166209,2022-02-26


In [7]:
df_confidence_interval.columns

Index(['municipality', 'municipality_id', 'date', 'forecast', 'lower_bound',
       'upper_bound', 'max_date_to_fit'],
      dtype='object')