In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt

### Relação entre os municípios e as regionais 

In [2]:
df_reg_muni = pd.read_csv('regional_to_muni.csv.gz')

df_reg_muni.head()

Unnamed: 0,geocodigo,uf,id_regional,regional,macroregional_id,macroregional
0,1100189,Rondônia,11002,Cafe,1101,II – CACOAL
1,1100296,Rondônia,11005,Zona Da Mata,1101,II – CACOAL
2,1101476,Rondônia,11002,Cafe,1101,II – CACOAL
3,1301902,Amazonas,13004,Medio Amazonas,1303,LESTE
4,1302702,Amazonas,13003,Rio Madeira,1303,LESTE


### Agregando os dados para as regionais de saúde do Oeste do Paraná (41007, 41008, 41009, 41010, 41020)

In [25]:
df = pd.read_csv('PR_dengue.csv')

df = df.merge(df_reg_muni, right_on = 'geocodigo', left_on = 'municipio_geocodigo')

df = df.loc[df.id_regional.isin([41007, 41008, 41009, 41010, 41020])]

df.data_iniSE = pd.to_datetime(df.data_iniSE)

df.set_index('data_iniSE', inplace = True)

df = df[['casos', 'casos_est', 'id_regional']].groupby('id_regional').resample('W-SUN').sum().drop(['id_regional'], axis =1).reset_index()

df.head()

Unnamed: 0,id_regional,data_iniSE,casos,casos_est
0,41007,2010-01-03,1,1.0
1,41007,2010-01-10,2,2.0
2,41007,2010-01-17,1,1.0
3,41007,2010-01-24,2,2.0
4,41007,2010-01-31,0,0.0


### Aplicando uma média móvel de 3 semanas: 

In [48]:
df_res = df.pivot(index ='data_iniSE', columns = 'id_regional', values = 'casos')
df_res.columns = 'casos_' + df_res.columns.astype(str)
df_res = df_res.rolling(window = 3).mean().dropna()
df_res.head()

id_regional,casos_41007,casos_41008,casos_41009,casos_41010,casos_41020
data_iniSE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-17,1.333333,2.333333,33.0,11.333333,19.666667
2010-01-24,1.666667,3.333333,51.666667,11.0,23.333333
2010-01-31,1.0,2.666667,76.0,10.333333,24.666667
2010-02-07,1.0,2.666667,125.333333,9.0,35.0
2010-02-14,0.666667,2.333333,219.333333,12.0,49.0


In [49]:
df_res.to_csv('susp_data_regions_rw.csv')

### Aplicando o mesmo processo com os dados de clima:

In [5]:
df_clima = pd.read_csv('PR_clima.csv')

df_clima = df_clima.merge(df_reg_muni, right_on = 'geocodigo', left_on = 'geocode')

df_clima = df_clima.loc[df_clima.id_regional.isin([41007, 41008, 41009, 41010, 41020])]

df_clima.date = pd.to_datetime(df_clima.date)

df_clima.set_index('date', inplace = True)

df_clima = df_clima[['temp_med', 'id_regional']].groupby('id_regional').resample('W-SUN').mean().drop(['id_regional'], axis =1).reset_index()

df_clima.head()

Unnamed: 0,id_regional,date,temp_med
0,41007,2010-01-03,22.695727
1,41007,2010-01-10,23.380302
2,41007,2010-01-17,21.824306
3,41007,2010-01-24,21.726857
4,41007,2010-01-31,22.127619


In [6]:
df_clima_res = df_clima.pivot(index ='date', columns = 'id_regional', values = 'temp_med')
df_clima_res.columns = 'temp_med_' + df_clima_res.columns.astype(str)
df_clima_res = df_clima_res.rolling(window = 3).mean().dropna()
df_clima_res.head()

id_regional,temp_med_41007,temp_med_41008,temp_med_41009,temp_med_41010,temp_med_41020
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-17,22.633445,24.262371,26.315642,24.507677,25.948526
2010-01-24,22.310488,23.83318,25.840116,24.12851,25.55422
2010-01-31,21.892927,23.336393,25.282332,23.556058,24.990313
2010-02-07,22.999331,24.577727,26.697249,24.75493,26.357255
2010-02-14,23.31041,24.88125,26.862902,24.915139,26.450872


In [7]:
df_clima_res.to_csv('clima_regions_rw.csv')