# Analyzing the forecast confidence interval

In [1]:
import numpy as np
import pandas as pd
from code.util import calculate_confidence_interval

## Reading data

In [2]:
df_forecasting = pd.read_csv('data/df_deaths_forecasted.csv')
df_forecasting['max_date_to_fit'] = pd.to_datetime(df_forecasting['max_date_to_fit']) 
df_forecasting['date'] = pd.to_datetime(df_forecasting['date'])

## Calculating 

In [3]:
df_forecasting.columns

Index(['forecast', 'municipality', 'municipality_id', 'max_date_to_fit',
       'estimation', 'date'],
      dtype='object')

In [4]:
df_confidence_interval = pd.DataFrame()
for municipality in df_forecasting['municipality'].unique():    
    df_municipality = df_forecasting.loc[df_forecasting['municipality'] == municipality]
    df_municipality = df_municipality.sort_values('date')
    municipality_id = df_municipality['municipality_id'].values[0]
    for date in df_municipality['date'].unique():
        df_municipality_date = df_municipality.loc[df_municipality['date'] == date]
        max_date_to_fit = df_municipality_date['max_date_to_fit'].values[0]
        forecast = df_municipality_date['forecast'].values
        lower_bound, upper_bound = calculate_confidence_interval(forecast)
        mean = np.mean(forecast)
        entry = {'municipality': municipality, 'municipality_id': municipality_id, 'date': date, 'forecast': mean, 'lower_bound': lower_bound, 'upper_bound': upper_bound, 'max_date_to_fit': max_date_to_fit}
        df_confidence_interval = pd.concat([df_confidence_interval, pd.DataFrame.from_records([entry])])

## Saving data

In [5]:
df_confidence_interval.to_csv('data/df_forecast_death_confidence_interval.csv')

In [6]:
df_confidence_interval

Unnamed: 0,municipality,municipality_id,date,forecast,lower_bound,upper_bound,max_date_to_fit
0,Aracaju,280030.0,2020-04-26,0.027166,0.026555,0.027729,2020-04-25
0,Aracaju,280030.0,2020-04-27,0.028970,0.028350,0.029591,2020-04-25
0,Aracaju,280030.0,2020-04-28,0.030849,0.030213,0.031497,2020-04-25
0,Aracaju,280030.0,2020-04-29,0.032926,0.032265,0.033561,2020-04-25
0,Aracaju,280030.0,2020-04-30,0.035095,0.034306,0.035845,2020-04-25
...,...,...,...,...,...,...,...
0,Feira de Santana,291080.0,2022-05-17,0.032853,0.019744,0.047410,2022-02-26
0,Feira de Santana,291080.0,2022-05-18,0.033142,0.019960,0.047280,2022-02-26
0,Feira de Santana,291080.0,2022-05-19,0.033436,0.020459,0.047109,2022-02-26
0,Feira de Santana,291080.0,2022-05-20,0.033737,0.020380,0.047439,2022-02-26


In [7]:
df_confidence_interval.columns

Index(['municipality', 'municipality_id', 'date', 'forecast', 'lower_bound',
       'upper_bound', 'max_date_to_fit'],
      dtype='object')