# Pré-processamento dos dados meteorológicos sobre a ocorrência de dengue em MG 

## Período: 2014 a 2021

### Bibliotecas

In [1]:
import pandas as pd
import itertools
import os
from io import StringIO

### Variáveis globais

In [2]:
path_meteo = '..\\dataset\\meteo\\'
path_base = '..\\dataset\\base\\'

### Leitura dos dados

In [36]:
def read_base(filename_var):
    file_path = os.path.join(path_base, filename_var) + '.csv'
    return pd.read_csv(file_path, sep=';', encoding = "latin-1")

In [37]:
def read_folder(path_data):
    files = os.listdir(path_data)
    files.sort()
    return [file_name for file_name in files]

In [38]:
def list_mun():
    dic_munic = {}
    filename_var = 'municipio'
    df = read_base(filename_var)
    for i, t in df.iterrows():
        dic_munic.update({t.MUNICIPIO:t.MACRORREGIAO})
    return dic_munic

In [55]:
def break_test(s):
    return 'PRECIPITAÇÃO' not in s.upper()

def read_file(file_path):
    with open(file_path, encoding='latin-1') as fp:
        txt_lines = fp.readlines()
        md = [s for s in txt_lines]
        data_lines = list(itertools.dropwhile(break_test, txt_lines))
        return data_lines

In [58]:
def clean_data(df):
    df['UMIDADE_RELATIVA'].fillna(value=df['UMIDADE_RELATIVA'].mean(), inplace=True)
    for c in df.columns:   
        df = df[df[c] != -9999]
    return df

In [69]:
def totalizar_mun(df):
    return df.groupby(['ANO', 'MES'], group_keys=False).mean().reset_index()

def totalizar_reg(df):
    return df.groupby(['ANO', 'MES', 'MACRORREGIAO'], group_keys=False).mean().reset_index()

In [42]:
lst_files = read_folder(path_meteo)
dic_munic = list_mun()

In [72]:
def create_meteo_df():
    lst = []
    for k_mun, v_reg in dic_munic.items():
        files_mun = [f for f in lst_files if k_mun in f]
        for f in files_mun:
            full_name = os.path.join(path_meteo, f)
            lines = read_file(full_name)
            df = pd.read_csv(StringIO(''.join(lines)), sep=';', decimal=",", encoding = "latin-1")

            col_names = {
                'Data':'DATA_COLETA',
                'DATA (YYYY-MM-DD)':'DATA_COLETA',
                'PRECIPITAÇÃO TOTAL, HORÁRIO (mm)':'PRECIPITACAO',
                'PRESSAO ATMOSFERICA AO NIVEL DA ESTACAO, HORARIA (mB)':'PRESSAO_ATM',
                'TEMPERATURA DO AR - BULBO SECO, HORARIA (°C)':'TEMPERATURA',
                'UMIDADE RELATIVA DO AR, HORARIA (%)':'UMIDADE_RELATIVA'
            }
            df.rename(columns=col_names, inplace=True)

            df = df[['DATA_COLETA', 'PRECIPITACAO', 'PRESSAO_ATM', 'TEMPERATURA', 'UMIDADE_RELATIVA']]

            df['ANO'] = df['DATA_COLETA'].apply(lambda x: x[0:4])
            df['MES'] = df['DATA_COLETA'].apply(lambda x: x[5:7])

            df = clean_data(df)
            df = totalizar_mun(df)

            df['MUNICIPIO'] = k_mun
            df['MACRORREGIAO'] = v_reg

            lst.append(df)    
    return pd.concat(lst)

In [78]:
df_meteo = create_meteo_df()
df_meteo = totalizar_reg(df_meteo)
df_meteo['UMIDADE_RELATIVA'] = df_meteo['UMIDADE_RELATIVA'].apply(lambda x: round(x, 0))
full_name = os.path.join(path_base, 'meteo_mg.csv')
df_meteo.to_csv(full_name, sep=';', encoding='latin-1')
print(df_meteo.info())
df_meteo.sample(10)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1344 entries, 0 to 1343
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   ANO               1344 non-null   object 
 1   MES               1344 non-null   object 
 2   MACRORREGIAO      1344 non-null   int64  
 3   PRECIPITACAO      1344 non-null   float64
 4   PRESSAO_ATM       1344 non-null   float64
 5   TEMPERATURA       1344 non-null   float64
 6   UMIDADE_RELATIVA  1344 non-null   float64
dtypes: float64(4), int64(1), object(2)
memory usage: 73.6+ KB
None


Unnamed: 0,ANO,MES,MACRORREGIAO,PRECIPITACAO,PRESSAO_ATM,TEMPERATURA,UMIDADE_RELATIVA
111,2014,8,3114,0.016935,957.303495,20.650739,65.0
340,2016,1,3105,0.422524,922.32518,23.199293,81.0
308,2015,11,3101,0.32963,908.494687,22.217857,80.0
1313,2021,10,3112,0.214982,921.604923,24.735399,67.0
59,2014,5,3104,0.019758,890.575403,17.963038,76.0
1238,2021,5,3107,0.008154,950.503397,21.198119,71.0
471,2016,10,3110,0.125237,929.460748,20.922003,75.0
1110,2020,8,3105,0.015484,926.494731,18.95457,62.0
528,2017,2,3111,0.171257,968.096989,25.693084,70.0
823,2018,11,3112,0.485008,921.068784,22.705374,77.0
