# Inicialização

In [2]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

from plotly import graph_objects as go
import plotly as py

from datetime import datetime
from datetime import timedelta

import xgboost
from sklearn.model_selection import train_test_split
from sklearn.metrics import plot_confusion_matrix, accuracy_score, f1_score, confusion_matrix, recall_score, precision_score

from sklearn.utils import resample

# Funções

In [3]:
def upsampleData(X, label):
    # Separar verdadeiro e falso
    false_label = X[X[label]==0].copy()
    true_label = X[X[label]==1].copy()
    
    # Realizar upsample para os valores verdadeiros
    label_upsampled = resample(true_label,
                            replace=True, # sample with replacement
                            n_samples=len(false_label), # match number in majority class
                            random_state=378) # reproducible results
    upsampled = pd.concat([false_label, label_upsampled])
    
    # Separar x e y
    x = upsampled[[c for c in X.columns if label not in c]]
    y = upsampled[label]
    
    return x, y

In [4]:
def trainXGB(df, cols_rem, label, verbose=True):
    xgb = xgboost.XGBClassifier()

    # Separar x e y e remover colunas desnecessárias
    x = df[[c for c in df.columns if c not in cols_rem]]
    y = df[label]
    
    # Separar dados de treinamento e teste
    x_treino, x_teste, y_treino, y_teste = train_test_split(x, y, test_size=0.3, random_state = 378, stratify=y)
    
    # Upsample
    X = pd.concat([x_treino, y_treino], axis=1)
    x_treino, y_treino = upsampleData(X, label)

    # Parâmetros do XGBClassifier
    param = {'max_depth':50, 'eta':1, 'objective':'binary:logistic', 'min_child_weight': 1, 'lambda': 1, 'alpha': 0, 'gamma': 0}

    # Gerar DMatrix com dados de treinamento e teste
    df_train = xgboost.DMatrix(data=x_treino, label=y_treino)
    df_test = xgboost.DMatrix(data=x_teste, label=y_teste)

    # Treinar modelo e predizer em cima dos dados de treinamento e teste
    bst = xgboost.train(param, df_train, 2, feval=f1_score)
    y_teste_pred = bst.predict(xgboost.DMatrix(data=x_teste, label=y_teste))
    y_teste_pred = [1 if i>0.5 else 0 for i in y_teste_pred]
    y_treino_pred = bst.predict(xgboost.DMatrix(data=x_treino, label=y_treino))
    y_treino_pred = [1 if i>0.5 else 0 for i in y_treino_pred]
    
    # Mostrar resultados se verbose é verdadeiro
    if verbose:
        print(f"Treino: {accuracy_score(y_treino, y_treino_pred)}")
        print(f"Teste: {accuracy_score(y_teste, y_teste_pred)}")
        print(f"Precisão: {precision_score(y_teste, y_teste_pred)}")
        print(f"Recall: {recall_score(y_teste, y_teste_pred)}")
        print(f"F1: {f1_score(y_teste, y_teste_pred)}")
        display(confusion_matrix(y_teste, y_teste_pred, normalize='true'))
        display(confusion_matrix(y_teste, y_teste_pred,))
        
    # Salvar resultados em um dict
    results = {
        'Features': list(x.columns),
        'Train_Acc': accuracy_score(y_treino, y_treino_pred),
        'Test_Acc': accuracy_score(y_teste, y_teste_pred),
        'Precision': precision_score(y_teste, y_teste_pred),
        'Recall': recall_score(y_teste, y_teste_pred),
        'F1': f1_score(y_teste, y_teste_pred),
        'Ver_Pos': confusion_matrix(y_teste, y_teste_pred, normalize='true')[1,1]
    }
    
    return bst, results, y_treino_pred, y_teste_pred

# Prepped Data

## Carregar dados

In [1]:
df_p = pd.read_csv('../../../data/cleandata/Info pluviometricas/Merged Data/prepped_data.csv', sep=';')
df_p.groupby('Label').count()

NameError: name 'pd' is not defined

In [10]:
df_p = df_p.sort_values(['Data_Hora', 'Local'])
# Shiftar label 6 horas para frente
df_p['Label'] = df_p['Label'].shift(-5*6, fill_value = 0)

## Treinar modelo

In [11]:
# Parâmetros
label = 'Label'
cols_rem = ['LocalMax', 'Label', 'Label_Old', 'Cluster', 'Data', 'Hora', 'Data_Hora', 'Ordens', 'Minuto'] + [c for c in df_p.columns if 'Hora_' in c]
# Conjunto de resultados
prepped_models = {}

# Gerar um modelo para cada local e um modelo geral
for l in range(6):
    if l != 0:
        df_train = df_p[df_p['Local'] == l]
    else:
        df_train = df_p.copy()
        
    print(f'----- LOCAL {l} -----')
    model, training_res, y_treino_pred, y_teste_pred = trainXGB(df_train, cols_rem, label)
    
    # Salvar modelo e resultados
    prepped_models[l] = {
        'model': model,
        'results': training_res,
        'y_treino': y_treino_pred,
        'y_teste': y_teste_pred
    }

----- LOCAL 0 -----
Treino: 0.9813408203358016
Teste: 0.9623797901792219
Precisão: 0.3252005347593583
Recall: 0.9543894065718489
F1: 0.4851053221986788


array([[0.96253097, 0.03746903],
       [0.04561059, 0.95438941]])

array([[103731,   4038],
       [    93,   1946]], dtype=int64)

----- LOCAL 1 -----
Treino: 0.9802352660177942
Teste: 0.9577451962480649
Precisão: 0.3102661596958175
Recall: 0.951048951048951
F1: 0.4678899082568807


array([[0.9578786 , 0.0421214 ],
       [0.04895105, 0.95104895]])

array([[20626,   907],
       [   21,   408]], dtype=int64)

----- LOCAL 2 -----
Treino: 0.9836959761549926
Teste: 0.9668973681814043
Precisão: 0.3473877176901925
Recall: 0.9619289340101523
F1: 0.5104377104377106


array([[0.96698813, 0.03301187],
       [0.03807107, 0.96192893]])

array([[20856,   712],
       [   15,   379]], dtype=int64)

----- LOCAL 3 -----
Treino: 0.9804565756823821
Teste: 0.9616610509061105
Precisão: 0.3028523489932886
Recall: 0.9704301075268817
F1: 0.46163682864450134


array([[0.96150996, 0.03849004],
       [0.02956989, 0.97043011]])

array([[20759,   831],
       [   11,   361]], dtype=int64)

----- LOCAL 4 -----
Treino: 0.9791069152326323
Teste: 0.9572443311173846
Precisão: 0.3181484202792065
Recall: 0.9752252252252253
F1: 0.47977839335180056


array([[0.95687332, 0.04312668],
       [0.02477477, 0.97522523]])

array([[20590,   928],
       [   11,   433]], dtype=int64)

----- LOCAL 5 -----
Treino: 0.982269221594975
Teste: 0.9608414534195429
Precisão: 0.31295843520782396
Recall: 0.9576059850374065
F1: 0.47174447174447176


array([[0.96090163, 0.03909837],
       [0.04239401, 0.95760599]])

array([[20718,   843],
       [   17,   384]], dtype=int64)

In [12]:
prepped_models[0]['results']

{'Features': ['Mes', 'Dia', 'Local', 'Precipitacao', 'PrecSum'],
 'Train_Acc': 0.9813408203358016,
 'Test_Acc': 0.9623797901792219,
 'Precision': 0.3252005347593583,
 'Recall': 0.9543894065718489,
 'F1': 0.4851053221986788,
 'Ver_Pos': 0.9543894065718489}

# Full Data

## Carregar dados

In [13]:
df_f = pd.read_csv('../../../data/cleandata/Info pluviometricas/Merged Data/full_data.csv', sep=';')
display(df_f.head())
df_f.shape

Unnamed: 0,Data_Hora,LocalMax_d_All,LocalMax_d_ow,Local_d_Null,LocalMax_h_All,LocalMax_h_ow,Local_h_Null,Local,UmidadeRelativa,PressaoAtmosferica,...,TemperaturaInterna,PontoDeOrvalho,RadiacaoSolar,DirecaoDoVento,VelocidadeDoVento,Precipitacao,LocalMax_d,LocalMax_h,Local_d,Local_h
0,2011-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0,RM,86.6,926.7,...,23.3,17.8,0.0,131.0,2.5,0.0,0.0,0.0,0.0,0.0
1,2011-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0,Erasmo,88.8,920.5,...,22.9,17.7,0.0,133.0,0.7,0.0,0.0,0.0,0.0,0.0
2,2011-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0,Camilopolis,84.8,920.6,...,22.8,17.5,0.0,137.0,1.5,0.0,0.0,0.0,0.0,0.0
3,2011-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0,Paraiso,92.3,919.1,...,22.7,18.0,0.0,101.0,2.6,0.0,0.0,0.0,0.0,0.0
4,2011-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0,Vitoria,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


(1464030, 21)

## Preparar dados

In [14]:
# Filtrar por hora -> minuto 0
df_f['Data_Hora'] = pd.to_datetime(df_f['Data_Hora'], yearfirst=True)
df_f = df_f[df_f['Data_Hora'].dt.minute == 0]
# Remover colunas desnecessárias
df_f = df_f.drop(columns = ['LocalMax_d_ow', 'LocalMax_h_All', 'LocalMax_h', 'LocalMax_h_ow', 'LocalMax_d'] + [c for c in df_f.columns if 'Local_' in c])
df_f = df_f.rename(columns = {'LocalMax_d_All': 'Label'})
# Colunas de data
df_f['Dia'] = df_f['Data_Hora'].dt.day
df_f['Mes'] = df_f['Data_Hora'].dt.month
df_f['Data'] = df_f['Data_Hora'].dt.strftime('%Y-%m-%d')
# Codificar local
df_f['Local'] = df_f['Local'].replace({'Camilopolis': 1, 'Erasmo': 2, 'Paraiso': 3, 'RM': 4, 'Vitoria': 5})

In [15]:
# Soma da precipitação do dia
df_prec_sum = df_f.groupby(['Data', 'Local']).sum().reset_index()[['Data', 'Local', 'Precipitacao']]
df_prec_sum.columns = ['Data', 'Local', 'PrecSum']
df_f = df_f.merge(df_prec_sum, on=['Data', 'Local'])
df_f.loc[(df_f['Label'] == 1) & (df_f['PrecSum'] <= 10), 'Label'] = 0

In [16]:
# Realizar OHE para colunas categóricas
cols_dummies = ['Local', 'Mes',]# 'Dia']

df_f_ohe = df_f.copy()

for c in cols_dummies:
    df_f_ohe = pd.concat([df_f_ohe, pd.get_dummies(df_f[c], prefix=c)], axis=1)
    
df_f_ohe = df_f_ohe.sort_values(['Data', 'Local'])

# Shiftar label 6 horas para frente
df_f_ohe['Label_Old'] = df_f_ohe['Label']
df_f_ohe['Label'] = df_f_ohe['Label'].shift(-5*6, fill_value = 0)

In [17]:
df_f_ohe.columns

Index(['Data_Hora', 'Local', 'UmidadeRelativa', 'PressaoAtmosferica',
       'TemperaturaDoAr', 'TemperaturaInterna', 'PontoDeOrvalho',
       'RadiacaoSolar', 'DirecaoDoVento', 'VelocidadeDoVento', 'Precipitacao',
       'Label', 'Dia', 'Mes', 'Data', 'PrecSum', 'Local_1', 'Local_2',
       'Local_3', 'Local_4', 'Local_5', 'Mes_1', 'Mes_2', 'Mes_3', 'Mes_4',
       'Mes_5', 'Mes_6', 'Mes_7', 'Mes_8', 'Mes_9', 'Mes_10', 'Mes_11',
       'Mes_12', 'Label_Old'],
      dtype='object')

## Treinar modelo

In [18]:
# Testar continuamente removendo colunas
test_cases = [
    [],
    ['DirecaoDoVento', 'VelocidadeDoVento'],
    ['DirecaoDoVento', 'VelocidadeDoVento', 'TemperaturaInterna'],
    ['DirecaoDoVento', 'VelocidadeDoVento', 'TemperaturaInterna', 'PontoDeOrvalho'],
    ['DirecaoDoVento', 'VelocidadeDoVento', 'TemperaturaInterna', 'PontoDeOrvalho', 'UmidadeRelativa'],
    ['DirecaoDoVento', 'VelocidadeDoVento', 'TemperaturaInterna', 'PontoDeOrvalho', 'UmidadeRelativa', 'PressaoAtmosferica'],
    ['DirecaoDoVento', 'VelocidadeDoVento', 'TemperaturaInterna', 'PontoDeOrvalho', 'UmidadeRelativa', 'PressaoAtmosferica', 'RadiacaoSolar'],
    ['DirecaoDoVento', 'VelocidadeDoVento', 'TemperaturaInterna', 'PontoDeOrvalho', 'UmidadeRelativa', 'PressaoAtmosferica', 'RadiacaoSolar', 'TemperaturaDoAr'],
    ['DirecaoDoVento', 'VelocidadeDoVento', 'TemperaturaInterna', 'PontoDeOrvalho', 'UmidadeRelativa', 'PressaoAtmosferica', 'RadiacaoSolar', 'TemperaturaDoAr', 'Local'],
    ['DirecaoDoVento', 'VelocidadeDoVento', 'TemperaturaInterna', 'PontoDeOrvalho', 'UmidadeRelativa', 'PressaoAtmosferica', 'RadiacaoSolar', 'TemperaturaDoAr', 'PrecSum'],
    ['DirecaoDoVento', 'VelocidadeDoVento', 'TemperaturaInterna', 'PontoDeOrvalho', 'UmidadeRelativa', 'PressaoAtmosferica', 'RadiacaoSolar', 'TemperaturaDoAr', 'Precipitacao']
]

# Salvar todos os resultados em um dataframe
df_training_result = pd.DataFrame(columns = ['Removed_Cols', 'Local', 'Features', 'Train_Acc', 'Test_Acc', 'Precision', 'Recall', 'F1', 'Ver_Pos'])
label = 'Label'

for case in test_cases:
    print(f'---------- CASE ----------')
    print(case)
    print(f'--------------------------')
    for l in range(6):
        if l != 0:
            df_train = df_f_ohe[df_f_ohe['Local'] == l].drop(columns = [c for c in df_f_ohe.columns if 'Local' in c])
        else:
            df_train = df_f_ohe.copy()

        cols_rem = ['Label', 'Label_Old', 'Data', 'Data_Hora'] + cols_dummies
        cols_rem = cols_rem + case
            
        print(f'----- LOCAL {l} -----')
        model, training_res, y_treino_pred, y_teste_pred = trainXGB(df_train, cols_rem, label)
        
        df_training_result = df_training_result.append(
            {**{'Model': model, 'Removed_Cols': case, 'Local': l}, **training_res},
            ignore_index=True
        )

---------- CASE ----------
[]
--------------------------
----- LOCAL 0 -----
Treino: 0.9968458579383808
Teste: 0.9898185924522803
Precisão: 0.1080568720379147
Recall: 0.3917525773195876
F1: 0.16939078751857356


array([[0.99140773, 0.00859227],
       [0.60824742, 0.39175258]])

array([[108576,    941],
       [   177,    114]], dtype=int64)

----- LOCAL 1 -----
Treino: 0.9979749163552407
Teste: 0.9919861579091157
Precisão: 0.13125
Recall: 0.3620689655172414
F1: 0.19266055045871558


array([[0.99365413, 0.00634587],
       [0.63793103, 0.36206897]])

array([[21765,   139],
       [   37,    21]], dtype=int64)

----- LOCAL 2 -----
Treino: 0.995989984742381
Teste: 0.9887077679628449
Precisão: 0.09836065573770492
Recall: 0.46153846153846156
F1: 0.16216216216216217


array([[0.98995892, 0.01004108],
       [0.53846154, 0.46153846]])

array([[21690,   220],
       [   28,    24]], dtype=int64)

----- LOCAL 3 -----
Treino: 0.9982978223865704
Teste: 0.9930789545578727
Precisão: 0.13636363636363635
Recall: 0.32142857142857145
F1: 0.19148936170212766


array([[0.99479595, 0.00520405],
       [0.67857143, 0.32142857]])

array([[21792,   114],
       [   38,    18]], dtype=int64)

----- LOCAL 4 -----
Treino: 0.9978770862274746
Teste: 0.9922138238776068
Precisão: 0.12582781456953643
Recall: 0.3275862068965517
F1: 0.18181818181818185


array([[0.9939737 , 0.0060263 ],
       [0.67241379, 0.32758621]])

array([[21772,   132],
       [   39,    19]], dtype=int64)

----- LOCAL 5 -----
Treino: 0.995791328178526
Teste: 0.9856570439850652
Precisão: 0.06968641114982578
Recall: 0.29411764705882354
F1: 0.11267605633802816


array([[0.98780488, 0.01219512],
       [0.70588235, 0.29411765]])

array([[21627,   267],
       [   48,    20]], dtype=int64)

---------- CASE ----------
['DirecaoDoVento', 'VelocidadeDoVento']
--------------------------
----- LOCAL 0 -----
Treino: 0.9966717148592963
Teste: 0.9900280489581815
Precisão: 0.12569832402234637
Recall: 0.4639175257731959
F1: 0.19780219780219782


array([[0.99142599, 0.00857401],
       [0.53608247, 0.46391753]])

array([[108578,    939],
       [   156,    135]], dtype=int64)

----- LOCAL 1 -----
Treino: 0.9983271048151989
Teste: 0.9923504234587014
Precisão: 0.1518987341772152
Recall: 0.41379310344827586
F1: 0.22222222222222224


array([[0.9938824, 0.0061176],
       [0.5862069, 0.4137931]])

array([[21770,   134],
       [   34,    24]], dtype=int64)

----- LOCAL 2 -----
Treino: 0.9956085442666562
Teste: 0.9885256351880521
Precisão: 0.109375
Recall: 0.5384615384615384
F1: 0.18181818181818182


array([[0.98959379, 0.01040621],
       [0.46153846, 0.53846154]])

array([[21682,   228],
       [   24,    28]], dtype=int64)

----- LOCAL 3 -----
Treino: 0.9980336913775899
Teste: 0.9915308259721337
Precisão: 0.09375
Recall: 0.26785714285714285
F1: 0.1388888888888889


array([[0.99338081, 0.00661919],
       [0.73214286, 0.26785714]])

array([[21761,   145],
       [   41,    15]], dtype=int64)

----- LOCAL 4 -----
Treino: 0.9977303410358254
Teste: 0.9921682906839085
Precisão: 0.125
Recall: 0.3275862068965517
F1: 0.18095238095238095


array([[0.99392805, 0.00607195],
       [0.67241379, 0.32758621]])

array([[21771,   133],
       [   39,    19]], dtype=int64)

----- LOCAL 5 -----
Treino: 0.995654301654106
Teste: 0.9866132410527274
Precisão: 0.08759124087591241
Recall: 0.35294117647058826
F1: 0.14035087719298248


array([[0.98858135, 0.01141865],
       [0.64705882, 0.35294118]])

array([[21644,   250],
       [   44,    24]], dtype=int64)

---------- CASE ----------
['DirecaoDoVento', 'VelocidadeDoVento', 'TemperaturaInterna']
--------------------------
----- LOCAL 0 -----
Treino: 0.9971667508032105
Teste: 0.991448710476468
Precisão: 0.15086206896551724
Recall: 0.48109965635738833
F1: 0.22969647251845776


array([[0.99280477, 0.00719523],
       [0.51890034, 0.48109966]])

array([[108729,    788],
       [   151,    140]], dtype=int64)

----- LOCAL 1 -----
Treino: 0.9982097086618795
Teste: 0.9923048902650032
Precisão: 0.14193548387096774
Recall: 0.3793103448275862
F1: 0.20657276995305163


array([[0.99392805, 0.00607195],
       [0.62068966, 0.37931034]])

array([[21771,   133],
       [   36,    22]], dtype=int64)

----- LOCAL 2 -----
Treino: 0.9957063495168421
Teste: 0.9887988343502413
Precisão: 0.10569105691056911
Recall: 0.5
F1: 0.174496644295302


array([[0.98995892, 0.01004108],
       [0.5       , 0.5       ]])

array([[21690,   220],
       [   26,    26]], dtype=int64)

----- LOCAL 3 -----
Treino: 0.9977597777386472
Teste: 0.9918950915217193
Precisão: 0.1038961038961039
Recall: 0.2857142857142857
F1: 0.15238095238095237


array([[0.99370036, 0.00629964],
       [0.71428571, 0.28571429]])

array([[21768,   138],
       [   40,    16]], dtype=int64)

----- LOCAL 4 -----
Treino: 0.9978477371891448
Teste: 0.9923048902650032
Precisão: 0.15527950310559005
Recall: 0.43103448275862066
F1: 0.228310502283105


array([[0.99379109, 0.00620891],
       [0.56896552, 0.43103448]])

array([[21768,   136],
       [   33,    25]], dtype=int64)

----- LOCAL 5 -----
Treino: 0.9956151512185573
Teste: 0.9857936435661597
Precisão: 0.0821917808219178
Recall: 0.35294117647058826
F1: 0.13333333333333333


array([[0.9877592 , 0.0122408 ],
       [0.64705882, 0.35294118]])

array([[21626,   268],
       [   44,    24]], dtype=int64)

---------- CASE ----------
['DirecaoDoVento', 'VelocidadeDoVento', 'TemperaturaInterna', 'PontoDeOrvalho']
--------------------------
----- LOCAL 0 -----
Treino: 0.9969671710945969
Teste: 0.99081123415416
Precisão: 0.13663967611336034
Recall: 0.4639175257731959
F1: 0.2111024237685692


array([[0.99221125, 0.00778875],
       [0.53608247, 0.46391753]])

array([[108664,    853],
       [   156,    135]], dtype=int64)

----- LOCAL 1 -----
Treino: 0.9982390577002094
Teste: 0.9927146890082871
Precisão: 0.1357142857142857
Recall: 0.3275862068965517
F1: 0.19191919191919193


array([[0.99447589, 0.00552411],
       [0.67241379, 0.32758621]])

array([[21783,   121],
       [   39,    19]], dtype=int64)

----- LOCAL 2 -----
Treino: 0.9959508626423066
Teste: 0.9894362990620162
Precisão: 0.11538461538461539
Recall: 0.5192307692307693
F1: 0.1888111888111888


array([[0.99055226, 0.00944774],
       [0.48076923, 0.51923077]])

array([[21703,   207],
       [   25,    27]], dtype=int64)

----- LOCAL 3 -----
Treino: 0.9980141261176655
Teste: 0.9920772242965121
Precisão: 0.14457831325301204
Recall: 0.42857142857142855
F1: 0.21621621621621623


array([[0.99351776, 0.00648224],
       [0.57142857, 0.42857143]])

array([[21764,   142],
       [   32,    24]], dtype=int64)

----- LOCAL 4 -----
Treino: 0.9977890391124851
Teste: 0.9915308259721337
Precisão: 0.11904761904761904
Recall: 0.3448275862068966
F1: 0.17699115044247785


array([[0.99324324, 0.00675676],
       [0.65517241, 0.34482759]])

array([[21756,   148],
       [   38,    20]], dtype=int64)

----- LOCAL 5 -----
Treino: 0.9959479299207203
Teste: 0.9880247700573718
Precisão: 0.10526315789473684
Recall: 0.38235294117647056
F1: 0.16507936507936505


array([[0.98990591, 0.01009409],
       [0.61764706, 0.38235294]])

array([[21673,   221],
       [   42,    26]], dtype=int64)

---------- CASE ----------
['DirecaoDoVento', 'VelocidadeDoVento', 'TemperaturaInterna', 'PontoDeOrvalho', 'UmidadeRelativa']
--------------------------
----- LOCAL 0 -----
Treino: 0.9968145513174217
Teste: 0.9907839137403468
Precisão: 0.1441263573543929
Recall: 0.5017182130584192
F1: 0.22392638036809812


array([[0.99208342, 0.00791658],
       [0.49828179, 0.50171821]])

array([[108650,    867],
       [   145,    146]], dtype=int64)

----- LOCAL 1 -----
Treino: 0.9976716429591657
Teste: 0.9908022948729623
Precisão: 0.14
Recall: 0.4827586206896552
F1: 0.2170542635658915


array([[0.99214755, 0.00785245],
       [0.51724138, 0.48275862]])

array([[21732,   172],
       [   30,    28]], dtype=int64)

----- LOCAL 2 -----
Treino: 0.995637885841712
Teste: 0.9883890356069575
Precisão: 0.1198501872659176
Recall: 0.6153846153846154
F1: 0.20062695924764892


array([[0.9892743 , 0.0107257 ],
       [0.38461538, 0.61538462]])

array([[21675,   235],
       [   20,    32]], dtype=int64)

----- LOCAL 3 -----
Treino: 0.9980141261176655
Teste: 0.9926691558145888
Precisão: 0.1568627450980392
Recall: 0.42857142857142855
F1: 0.22966507177033496


array([[0.9941112 , 0.0058888 ],
       [0.57142857, 0.42857143]])

array([[21777,   129],
       [   32,    24]], dtype=int64)

----- LOCAL 4 -----
Treino: 0.997182492320335
Teste: 0.9900737637737911
Precisão: 0.11904761904761904
Recall: 0.43103448275862066
F1: 0.18656716417910446


array([[0.99155405, 0.00844595],
       [0.56896552, 0.43103448]])

array([[21719,   185],
       [   33,    25]], dtype=int64)

----- LOCAL 5 -----
Treino: 0.995654301654106
Teste: 0.9872051725708041
Precisão: 0.09505703422053231
Recall: 0.36764705882352944
F1: 0.1510574018126888


array([[0.98912944, 0.01087056],
       [0.63235294, 0.36764706]])

array([[21656,   238],
       [   43,    25]], dtype=int64)

---------- CASE ----------
['DirecaoDoVento', 'VelocidadeDoVento', 'TemperaturaInterna', 'PontoDeOrvalho', 'UmidadeRelativa', 'PressaoAtmosferica']
--------------------------
----- LOCAL 0 -----
Treino: 0.9966834548421559
Teste: 0.9905471368206323
Precisão: 0.14462416745956233
Recall: 0.5223367697594502
F1: 0.2265275707898659


array([[0.99179123, 0.00820877],
       [0.47766323, 0.52233677]])

array([[108618,    899],
       [   139,    152]], dtype=int64)

----- LOCAL 1 -----
Treino: 0.9976031618697294
Teste: 0.9912576268099444
Precisão: 0.1358695652173913
Recall: 0.43103448275862066
F1: 0.2066115702479339


array([[0.99274105, 0.00725895],
       [0.56896552, 0.43103448]])

array([[21745,   159],
       [   33,    25]], dtype=int64)

----- LOCAL 2 -----
Treino: 0.9953542506161731
Teste: 0.9883435024132593
Precisão: 0.11363636363636363
Recall: 0.5769230769230769
F1: 0.18987341772151897


array([[0.98931995, 0.01068005],
       [0.42307692, 0.57692308]])

array([[21676,   234],
       [   22,    30]], dtype=int64)

----- LOCAL 3 -----
Treino: 0.9979456477079298
Teste: 0.9921682906839085
Precisão: 0.13291139240506328
Recall: 0.375
F1: 0.19626168224299062


array([[0.99374601, 0.00625399],
       [0.625     , 0.375     ]])

array([[21769,   137],
       [   35,    21]], dtype=int64)

----- LOCAL 4 -----
Treino: 0.9973683695630906
Teste: 0.9905290957107732
Precisão: 0.1323529411764706
Recall: 0.46551724137931033
F1: 0.20610687022900762


array([[0.99191928, 0.00808072],
       [0.53448276, 0.46551724]])

array([[21727,   177],
       [   31,    27]], dtype=int64)

----- LOCAL 5 -----
Treino: 0.9955466379563472
Teste: 0.9864311082779346
Precisão: 0.11148648648648649
Recall: 0.4852941176470588
F1: 0.18131868131868134


array([[0.98798758, 0.01201242],
       [0.51470588, 0.48529412]])

array([[21631,   263],
       [   35,    33]], dtype=int64)

---------- CASE ----------
['DirecaoDoVento', 'VelocidadeDoVento', 'TemperaturaInterna', 'PontoDeOrvalho', 'UmidadeRelativa', 'PressaoAtmosferica', 'RadiacaoSolar']
--------------------------
----- LOCAL 0 -----
Treino: 0.9964760484783025
Teste: 0.9903285735101268
Precisão: 0.1447004608294931
Recall: 0.5395189003436426
F1: 0.22819767441860464


array([[0.99152643, 0.00847357],
       [0.4604811 , 0.5395189 ]])

array([[108589,    928],
       [   134,    157]], dtype=int64)

----- LOCAL 1 -----
Treino: 0.9980140484063472
Teste: 0.9928968217830799
Precisão: 0.1643835616438356
Recall: 0.41379310344827586
F1: 0.23529411764705882


array([[0.99443024, 0.00556976],
       [0.5862069 , 0.4137931 ]])

array([[21782,   122],
       [   34,    24]], dtype=int64)

----- LOCAL 2 -----
Treino: 0.9949825906654669
Teste: 0.9876605045077862
Precisão: 0.11578947368421053
Recall: 0.6346153846153846
F1: 0.19584569732937684


array([[0.9884984 , 0.0115016 ],
       [0.36538462, 0.63461538]])

array([[21658,   252],
       [   19,    33]], dtype=int64)

----- LOCAL 3 -----
Treino: 0.997094558901215
Teste: 0.9918950915217193
Precisão: 0.17894736842105263
Recall: 0.6071428571428571
F1: 0.2764227642276423


array([[0.99287866, 0.00712134],
       [0.39285714, 0.60714286]])

array([[21750,   156],
       [   22,    34]], dtype=int64)

----- LOCAL 4 -----
Treino: 0.9972020583458882
Teste: 0.9914852927784354
Precisão: 0.12716763005780346
Recall: 0.3793103448275862
F1: 0.1904761904761905


array([[0.99310628, 0.00689372],
       [0.62068966, 0.37931034]])

array([[21753,   151],
       [   36,    22]], dtype=int64)

----- LOCAL 5 -----
Treino: 0.9949202309875698
Teste: 0.9866132410527274
Precisão: 0.11301369863013698
Recall: 0.4852941176470588
F1: 0.18333333333333335


array([[0.98817027, 0.01182973],
       [0.51470588, 0.48529412]])

array([[21635,   259],
       [   35,    33]], dtype=int64)

---------- CASE ----------
['DirecaoDoVento', 'VelocidadeDoVento', 'TemperaturaInterna', 'PontoDeOrvalho', 'UmidadeRelativa', 'PressaoAtmosferica', 'RadiacaoSolar', 'TemperaturaDoAr']
--------------------------
----- LOCAL 0 -----
Treino: 0.9876241014021453
Teste: 0.9750018213609208
Precisão: 0.078060522696011
Recall: 0.7800687285223368
F1: 0.1419193497968115


array([[0.97551978, 0.02448022],
       [0.21993127, 0.78006873]])

array([[106836,   2681],
       [    64,    227]], dtype=int64)

----- LOCAL 1 -----
Treino: 0.9939149660529457
Teste: 0.9848829796921956
Precisão: 0.10632183908045977
Recall: 0.6379310344827587
F1: 0.18226600985221672


array([[0.98580168, 0.01419832],
       [0.36206897, 0.63793103]])

array([[21593,   311],
       [   21,    37]], dtype=int64)

----- LOCAL 2 -----
Treino: 0.9876080748014553
Teste: 0.9734541480739459
Precisão: 0.06688417618270799
Recall: 0.7884615384615384
F1: 0.12330827067669171


array([[0.9738932 , 0.0261068 ],
       [0.21153846, 0.78846154]])

array([[21338,   572],
       [   11,    41]], dtype=int64)

----- LOCAL 3 -----
Treino: 0.9906184578662127
Teste: 0.9804207267097714
Precisão: 0.09347826086956522
Recall: 0.7678571428571429
F1: 0.16666666666666669


array([[0.98096412, 0.01903588],
       [0.23214286, 0.76785714]])

array([[21489,   417],
       [   13,    43]], dtype=int64)

----- LOCAL 4 -----
Treino: 0.9866755365982508
Teste: 0.9705855568709589
Precisão: 0.0625
Recall: 0.7241379310344828
F1: 0.11506849315068493


array([[0.97123813, 0.02876187],
       [0.27586207, 0.72413793]])

array([[21274,   630],
       [   16,    42]], dtype=int64)

----- LOCAL 5 -----
Treino: 0.9800724283057649
Teste: 0.9583826609598397
Precisão: 0.060291060291060294
Recall: 0.8529411764705882
F1: 0.11262135922330097


array([[0.95871015, 0.04128985],
       [0.14705882, 0.85294118]])

array([[20990,   904],
       [   10,    58]], dtype=int64)

---------- CASE ----------
['DirecaoDoVento', 'VelocidadeDoVento', 'TemperaturaInterna', 'PontoDeOrvalho', 'UmidadeRelativa', 'PressaoAtmosferica', 'RadiacaoSolar', 'TemperaturaDoAr', 'Local']
--------------------------
----- LOCAL 0 -----
Treino: 0.9876241014021453
Teste: 0.9750018213609208
Precisão: 0.078060522696011
Recall: 0.7800687285223368
F1: 0.1419193497968115


array([[0.97551978, 0.02448022],
       [0.21993127, 0.78006873]])

array([[106836,   2681],
       [    64,    227]], dtype=int64)

----- LOCAL 1 -----
Treino: 0.9939149660529457
Teste: 0.9848829796921956
Precisão: 0.10632183908045977
Recall: 0.6379310344827587
F1: 0.18226600985221672


array([[0.98580168, 0.01419832],
       [0.36206897, 0.63793103]])

array([[21593,   311],
       [   21,    37]], dtype=int64)

----- LOCAL 2 -----
Treino: 0.9876080748014553
Teste: 0.9734541480739459
Precisão: 0.06688417618270799
Recall: 0.7884615384615384
F1: 0.12330827067669171


array([[0.9738932 , 0.0261068 ],
       [0.21153846, 0.78846154]])

array([[21338,   572],
       [   11,    41]], dtype=int64)

----- LOCAL 3 -----
Treino: 0.9906184578662127
Teste: 0.9804207267097714
Precisão: 0.09347826086956522
Recall: 0.7678571428571429
F1: 0.16666666666666669


array([[0.98096412, 0.01903588],
       [0.23214286, 0.76785714]])

array([[21489,   417],
       [   13,    43]], dtype=int64)

----- LOCAL 4 -----
Treino: 0.9866755365982508
Teste: 0.9705855568709589
Precisão: 0.0625
Recall: 0.7241379310344828
F1: 0.11506849315068493


array([[0.97123813, 0.02876187],
       [0.27586207, 0.72413793]])

array([[21274,   630],
       [   16,    42]], dtype=int64)

----- LOCAL 5 -----
Treino: 0.9800724283057649
Teste: 0.9583826609598397
Precisão: 0.060291060291060294
Recall: 0.8529411764705882
F1: 0.11262135922330097


array([[0.95871015, 0.04128985],
       [0.14705882, 0.85294118]])

array([[20990,   904],
       [   10,    58]], dtype=int64)

---------- CASE ----------
['DirecaoDoVento', 'VelocidadeDoVento', 'TemperaturaInterna', 'PontoDeOrvalho', 'UmidadeRelativa', 'PressaoAtmosferica', 'RadiacaoSolar', 'TemperaturaDoAr', 'PrecSum']
--------------------------
----- LOCAL 0 -----
Treino: 0.9394216884443348
Teste: 0.8769033221623197
Precisão: 0.015673062838728578
Recall: 0.7353951890034365
F1: 0.030692004302617426


array([[0.87727933, 0.12272067],
       [0.26460481, 0.73539519]])

array([[96077, 13440],
       [   77,   214]], dtype=int64)

----- LOCAL 1 -----
Treino: 0.9419671682091216
Teste: 0.8831618249704034
Precisão: 0.012441679626749611
Recall: 0.5517241379310345
F1: 0.024334600760456272


array([[0.88403944, 0.11596056],
       [0.44827586, 0.55172414]])

array([[19364,  2540],
       [   26,    32]], dtype=int64)

----- LOCAL 2 -----
Treino: 0.9436446148429247
Teste: 0.8851197522994263
Precisão: 0.01643835616438356
Recall: 0.8076923076923077
F1: 0.03222094361334867


array([[0.88530351, 0.11469649],
       [0.19230769, 0.80769231]])

array([[19397,  2513],
       [   10,    42]], dtype=int64)

----- LOCAL 3 -----
Treino: 0.9352389896499775
Teste: 0.8714142609962663
Precisão: 0.016083916083916083
Recall: 0.8214285714285714
F1: 0.03155006858710562


array([[0.87154204, 0.12845796],
       [0.17857143, 0.82142857]])

array([[19092,  2814],
       [   10,    46]], dtype=int64)

----- LOCAL 4 -----
Treino: 0.9319395801130916
Teste: 0.8603496949276023
Precisão: 0.012949174490126255
Recall: 0.6896551724137931
F1: 0.02542103590721322


array([[0.86080168, 0.13919832],
       [0.31034483, 0.68965517]])

array([[18855,  3049],
       [   18,    40]], dtype=int64)

----- LOCAL 5 -----
Treino: 0.9353430556914946
Teste: 0.86827247063109
Precisão: 0.018411183088987385
Recall: 0.7941176470588235
F1: 0.03598800399866711


array([[0.86850279, 0.13149721],
       [0.20588235, 0.79411765]])

array([[19015,  2879],
       [   14,    54]], dtype=int64)

---------- CASE ----------
['DirecaoDoVento', 'VelocidadeDoVento', 'TemperaturaInterna', 'PontoDeOrvalho', 'UmidadeRelativa', 'PressaoAtmosferica', 'RadiacaoSolar', 'TemperaturaDoAr', 'Precipitacao']
--------------------------
----- LOCAL 0 -----
Treino: 0.9860078970951369
Teste: 0.9724428092670844
Precisão: 0.07727975270479134
Recall: 0.8591065292096219
F1: 0.14180374361883155


array([[0.97274396, 0.02725604],
       [0.14089347, 0.85910653]])

array([[106532,   2985],
       [    41,    250]], dtype=int64)

----- LOCAL 1 -----
Treino: 0.9923007689448042
Teste: 0.9816045897459248
Precisão: 0.10681818181818181
Recall: 0.8103448275862069
F1: 0.18875502008032127


array([[0.98205807, 0.01794193],
       [0.18965517, 0.81034483]])

array([[21511,   393],
       [   11,    47]], dtype=int64)

----- LOCAL 2 -----
Treino: 0.986444192324244
Teste: 0.971268554776432
Precisão: 0.06596701649175413
Recall: 0.8461538461538461
F1: 0.12239221140472879


array([[0.9715655 , 0.0284345 ],
       [0.15384615, 0.84615385]])

array([[21287,   623],
       [    8,    44]], dtype=int64)

----- LOCAL 3 -----
Treino: 0.9893956291209328
Teste: 0.9780530006374647
Precisão: 0.09195402298850575
Recall: 0.8571428571428571
F1: 0.1660899653979239


array([[0.97836209, 0.02163791],
       [0.14285714, 0.85714286]])

array([[21432,   474],
       [    8,    48]], dtype=int64)

----- LOCAL 4 -----
Treino: 0.9847776321195876
Teste: 0.966851834987706
Precisão: 0.06718346253229975
Recall: 0.896551724137931
F1: 0.125


array([[0.96703798, 0.03296202],
       [0.10344828, 0.89655172]])

array([[21182,   722],
       [    6,    52]], dtype=int64)

----- LOCAL 5 -----
Treino: 0.979553685034746
Teste: 0.9579273290228577
Precisão: 0.06592292089249494
Recall: 0.9558823529411765
F1: 0.12333965844402278


array([[0.95793368, 0.04206632],
       [0.04411765, 0.95588235]])

array([[20973,   921],
       [    3,    65]], dtype=int64)

In [19]:
#df_training_result.to_csv('../../../data/analysis/training_test_shift.csv', index=False, sep=';', decimal=',')
df_training_result

Unnamed: 0,Removed_Cols,Local,Features,Train_Acc,Test_Acc,Precision,Recall,F1,Ver_Pos,Model
0,[],0,"[UmidadeRelativa, PressaoAtmosferica, Temperat...",0.996846,0.989819,0.108057,0.391753,0.169391,0.391753,<xgboost.core.Booster object at 0x000001D8523D...
1,[],1,"[UmidadeRelativa, PressaoAtmosferica, Temperat...",0.997975,0.991986,0.131250,0.362069,0.192661,0.362069,<xgboost.core.Booster object at 0x000001D8523D...
2,[],2,"[UmidadeRelativa, PressaoAtmosferica, Temperat...",0.995990,0.988708,0.098361,0.461538,0.162162,0.461538,<xgboost.core.Booster object at 0x000001D8523D...
3,[],3,"[UmidadeRelativa, PressaoAtmosferica, Temperat...",0.998298,0.993079,0.136364,0.321429,0.191489,0.321429,<xgboost.core.Booster object at 0x000001D8523D...
4,[],4,"[UmidadeRelativa, PressaoAtmosferica, Temperat...",0.997877,0.992214,0.125828,0.327586,0.181818,0.327586,<xgboost.core.Booster object at 0x000001D8523D...
...,...,...,...,...,...,...,...,...,...,...
61,"[DirecaoDoVento, VelocidadeDoVento, Temperatur...",1,"[Dia, PrecSum, Mes_1, Mes_2, Mes_3, Mes_4, Mes...",0.992301,0.981605,0.106818,0.810345,0.188755,0.810345,<xgboost.core.Booster object at 0x000001D83E6B...
62,"[DirecaoDoVento, VelocidadeDoVento, Temperatur...",2,"[Dia, PrecSum, Mes_1, Mes_2, Mes_3, Mes_4, Mes...",0.986444,0.971269,0.065967,0.846154,0.122392,0.846154,<xgboost.core.Booster object at 0x000001D83E6B...
63,"[DirecaoDoVento, VelocidadeDoVento, Temperatur...",3,"[Dia, PrecSum, Mes_1, Mes_2, Mes_3, Mes_4, Mes...",0.989396,0.978053,0.091954,0.857143,0.166090,0.857143,<xgboost.core.Booster object at 0x000001D83E6B...
64,"[DirecaoDoVento, VelocidadeDoVento, Temperatur...",4,"[Dia, PrecSum, Mes_1, Mes_2, Mes_3, Mes_4, Mes...",0.984778,0.966852,0.067183,0.896552,0.125000,0.896552,<xgboost.core.Booster object at 0x000001D83E6B...


### Melhores modelos para cada local

In [20]:
# Identificar melhores modelos
df_best_local = pd.DataFrame(columns = df_training_result.columns)

for l in range(6):
    df_best_local = df_best_local.append(df_training_result[(df_training_result['Local'] == l)].sort_values('F1', ascending=False).reset_index(drop=True).loc[0])
    
df_best_local

Unnamed: 0,Removed_Cols,Local,Features,Train_Acc,Test_Acc,Precision,Recall,F1,Ver_Pos,Model
0,"[DirecaoDoVento, VelocidadeDoVento, Temperatur...",0,"[UmidadeRelativa, PressaoAtmosferica, Temperat...",0.997167,0.991449,0.150862,0.4811,0.229696,0.4811,<xgboost.core.Booster object at 0x000001D8523D...
0,"[DirecaoDoVento, VelocidadeDoVento, Temperatur...",1,"[TemperaturaDoAr, Precipitacao, Dia, PrecSum, ...",0.998014,0.992897,0.164384,0.413793,0.235294,0.413793,<xgboost.core.Booster object at 0x000001D83E6B...
0,"[DirecaoDoVento, VelocidadeDoVento, Temperatur...",2,"[PressaoAtmosferica, TemperaturaDoAr, Radiacao...",0.995638,0.988389,0.11985,0.615385,0.200627,0.615385,<xgboost.core.Booster object at 0x000001D8523D...
0,"[DirecaoDoVento, VelocidadeDoVento, Temperatur...",3,"[TemperaturaDoAr, Precipitacao, Dia, PrecSum, ...",0.997095,0.991895,0.178947,0.607143,0.276423,0.607143,<xgboost.core.Booster object at 0x000001D83E6B...
0,"[DirecaoDoVento, VelocidadeDoVento, Temperatur...",4,"[UmidadeRelativa, PressaoAtmosferica, Temperat...",0.997848,0.992305,0.15528,0.431034,0.228311,0.431034,<xgboost.core.Booster object at 0x000001D8523D...
0,"[DirecaoDoVento, VelocidadeDoVento, Temperatur...",5,"[TemperaturaDoAr, Precipitacao, Dia, PrecSum, ...",0.99492,0.986613,0.113014,0.485294,0.183333,0.485294,<xgboost.core.Booster object at 0x000001D83E6B...


In [21]:
#df_best_local.to_csv('../../../data/analysis/best_shift_local.csv', index=False, sep=';', decimal=',')

# OWM

## Carregar dados

In [22]:
df_owm = pd.read_csv('../../../data/cleandata/Info pluviometricas/Merged Data/OpenWeatherMapSantoAndre.csv')

## Preparar dados

In [23]:
# Preparar data hora
df_owm['Data_Hora'] = pd.to_datetime(df_owm['dt_iso'].str[:-10])
df_owm['Data_Hora'] = df_owm.apply(lambda x: x['Data_Hora'] + pd.Timedelta(hours = x['timezone'] / 3600), axis = 1)
# Filtrar nas datas que temos labels
df_owm = df_owm[(datetime.strptime('2019-08-30', '%Y-%m-%d') >= df_owm['Data_Hora']) & (df_owm['Data_Hora'] >= datetime.strptime('2010-01-01', '%Y-%m-%d'))]
# Remover colunas e valores nulos
df_owm = df_owm.drop(columns = ['sea_level', 'grnd_level', 'rain_3h', 'snow_1h', 'snow_3h'])
df_owm = df_owm.fillna(0)
df_owm = df_owm.drop_duplicates(subset='Data_Hora')

In [24]:
# Carregar labels e selecionar LocalMax
df_loc = pd.read_csv('../../../data/cleandata/Ordens de serviço/labels_day.csv', sep=';')
df_loc['Data'] = pd.to_datetime(df_loc['Data'], yearfirst=True)
df_loc = df_loc[['Data', 'LocalMax']]
df_loc.columns = ['Data', 'Label']
df_loc.head()

Unnamed: 0,Data,Label
0,2010-01-07,1
1,2010-01-08,0
2,2010-01-11,0
3,2010-01-19,0
4,2010-01-20,1


In [25]:
# Juntar dados com labels
df_owm['Data'] = pd.to_datetime(df_owm['Data_Hora'].dt.strftime('%Y-%m-%d'), yearfirst=True)
df = df_owm.merge(df_loc, on='Data', how='left')
df = df.fillna(0)

In [26]:
# Soma da precipitação do dia
df_g = df.groupby('Data').sum().reset_index()[['Data', 'rain_1h']]
df_g.columns = ['Data', 'rain_sum']
df = df.merge(df_g, on='Data')

In [27]:
# Colunas de data
df['Mes'] = df['Data_Hora'].dt.month
df['Dia'] = df['Data_Hora'].dt.day
# df = df.drop(columns = ['dt', 'dt_iso', 'timezone', 'city_name', 'lat', 'lon', 'weather_icon', 'weather_id', 'weather_main',
#                         'Vitoria', 'Erasmo', 'Paraiso', 'RM', 'Null', 'Camilopolis'])
# Remover colunas desnecessárias
df = df.drop(columns = ['dt', 'dt_iso', 'timezone', 'city_name', 'lat', 'lon', 'weather_icon', 'weather_id', 'weather_main'])
# df = df.drop(columns = ['dt', 'dt_iso', 'timezone', 'city_name', 'lat', 'lon', 'weather_icon', 'weather_id', 'weather_main',
#                         'Data_Hora', 'Data'])
# df['weather_description'] = df['weather_description'].rank(method='dense', ascending=False).astype(int)

In [28]:
# Realizar OHE para colunas categóricas
cols_dummies = ['Mes', 'weather_description', 'Dia']

df_ohe = df.copy()

for c in cols_dummies:
    df_ohe = pd.concat([df_ohe, pd.get_dummies(df_ohe[c], prefix=c)], axis=1)
    
df_ohe = df_ohe.sort_values(['Data'])

# Shiftar label 6 horas para frente
df_ohe['Label_Old'] = df_ohe['Label']
df_ohe['Label'] = df_ohe['Label'].shift(-1*6, fill_value = 0)

In [29]:
df_ohe.columns

Index(['temp', 'feels_like', 'temp_min', 'temp_max', 'pressure', 'humidity',
       'wind_speed', 'wind_deg', 'rain_1h', 'clouds_all',
       'weather_description', 'Data_Hora', 'Data', 'Label', 'rain_sum', 'Mes',
       'Dia', 'Mes_1', 'Mes_2', 'Mes_3', 'Mes_4', 'Mes_5', 'Mes_6', 'Mes_7',
       'Mes_8', 'Mes_9', 'Mes_10', 'Mes_11', 'Mes_12',
       'weather_description_broken clouds', 'weather_description_drizzle',
       'weather_description_few clouds', 'weather_description_fog',
       'weather_description_haze',
       'weather_description_heavy intensity drizzle',
       'weather_description_heavy intensity rain',
       'weather_description_heavy intesity shower rain',
       'weather_description_light intensity drizzle',
       'weather_description_light intensity drizzle rain',
       'weather_description_light intensity shower rain',
       'weather_description_light rain', 'weather_description_mist',
       'weather_description_moderate rain',
       'weather_description_ov

## Treinar modelo

In [30]:
# Remover colunas de passo em passo
test_cases = [
    [],
    ['feels_like'],
    ['feels_like', 'temp_min', 'temp_max'],
    ['feels_like', 'temp_min', 'temp_max', 'pressure'],
    ['feels_like', 'temp_min', 'temp_max', 'pressure', 'humidity'],
    ['feels_like', 'temp_min', 'temp_max', 'pressure', 'humidity', 'wind_speed', 'wind_deg'],
    ['feels_like', 'temp_min', 'temp_max', 'pressure', 'humidity', 'wind_speed', 'wind_deg', 'clouds_all'],
    ['feels_like', 'temp_min', 'temp_max', 'pressure', 'humidity', 'wind_speed', 'wind_deg', 'clouds_all'] + [c for c in df_ohe.columns if 'weather_description' in c],
    ['temp_min', 'temp_max', 'pressure', 'humidity', 'wind_speed', 'wind_deg', 'clouds_all'] + [c for c in df_ohe.columns if 'weather_description' in c],
]

# Salvar todos os resultados em um dataframe
df_training_result_owm = pd.DataFrame(columns = ['Removed_Cols', 'Features', 'Train_Acc', 'Test_Acc', 'Precision', 'Recall', 'F1', 'Ver_Pos'])
label = 'Label'

for case in test_cases:
    print(f'---------- CASE ----------')
    print(case)
    print(f'--------------------------')
    
    df_train = df_ohe.copy()

    cols_rem = ['Label', 'Label_Old', 'Data', 'Data_Hora'] + cols_dummies
    cols_rem = cols_rem + case

    model, training_res, y_treino_pred, y_teste_pred = trainXGB(df_train, cols_rem, label)

    df_training_result_owm = df_training_result_owm.append(
        {**{'Removed_Cols': case}, **training_res},
        ignore_index=True
    )

---------- CASE ----------
[]
--------------------------
Treino: 0.9929923158314686
Teste: 0.9678346456692913
Precisão: 0.4446397188049209
Recall: 0.7322720694645442
F1: 0.5533078184800437


array([[0.97442228, 0.02557772],
       [0.26772793, 0.73227207]])

array([[24077,   632],
       [  185,   506]], dtype=int64)

---------- CASE ----------
['feels_like']
--------------------------
Treino: 0.9924892889975889
Teste: 0.9660629921259842
Precisão: 0.42493415276558383
Recall: 0.7004341534008683
F1: 0.5289617486338798


array([[0.97349144, 0.02650856],
       [0.29956585, 0.70043415]])

array([[24054,   655],
       [  207,   484]], dtype=int64)

---------- CASE ----------
['feels_like', 'temp_min', 'temp_max']
--------------------------
Treino: 0.9928275311789908
Teste: 0.9682283464566929
Precisão: 0.44876325088339225
Recall: 0.7351664254703328
F1: 0.5573230938014263


array([[0.97474604, 0.02525396],
       [0.26483357, 0.73516643]])

array([[24085,   624],
       [  183,   508]], dtype=int64)

---------- CASE ----------
['feels_like', 'temp_min', 'temp_max', 'pressure']
--------------------------
Treino: 0.9931744462368389
Teste: 0.9705511811023622
Precisão: 0.473972602739726
Recall: 0.7510853835021708
F1: 0.5811870100783875


array([[0.97668866, 0.02331134],
       [0.24891462, 0.75108538]])

array([[24133,   576],
       [  172,   519]], dtype=int64)

---------- CASE ----------
['feels_like', 'temp_min', 'temp_max', 'pressure', 'humidity']
--------------------------
Treino: 0.993477996912456
Teste: 0.9711811023622047
Precisão: 0.4806054872280038
Recall: 0.7351664254703328
F1: 0.5812356979405034


array([[0.97778138, 0.02221862],
       [0.26483357, 0.73516643]])

array([[24160,   549],
       [  183,   508]], dtype=int64)

---------- CASE ----------
['feels_like', 'temp_min', 'temp_max', 'pressure', 'humidity', 'wind_speed', 'wind_deg']
--------------------------
Treino: 0.992619382144282
Teste: 0.9719291338582677
Precisão: 0.48996350364963503
Recall: 0.7771345875542692
F1: 0.6010072747621713


array([[0.97737666, 0.02262334],
       [0.22286541, 0.77713459]])

array([[24150,   559],
       [  154,   537]], dtype=int64)

---------- CASE ----------
['feels_like', 'temp_min', 'temp_max', 'pressure', 'humidity', 'wind_speed', 'wind_deg', 'clouds_all']
--------------------------
Treino: 0.9926714194029592
Teste: 0.9721653543307086
Precisão: 0.49280575539568344
Recall: 0.7930535455861071
F1: 0.6078757626178591


array([[0.97717431, 0.02282569],
       [0.20694645, 0.79305355]])

array([[24145,   564],
       [  143,   548]], dtype=int64)

---------- CASE ----------
['feels_like', 'temp_min', 'temp_max', 'pressure', 'humidity', 'wind_speed', 'wind_deg', 'clouds_all', 'weather_description', 'weather_description_broken clouds', 'weather_description_drizzle', 'weather_description_few clouds', 'weather_description_fog', 'weather_description_haze', 'weather_description_heavy intensity drizzle', 'weather_description_heavy intensity rain', 'weather_description_heavy intesity shower rain', 'weather_description_light intensity drizzle', 'weather_description_light intensity drizzle rain', 'weather_description_light intensity shower rain', 'weather_description_light rain', 'weather_description_mist', 'weather_description_moderate rain', 'weather_description_overcast clouds', 'weather_description_proximity shower rain', 'weather_description_proximity thunderstorm', 'weather_description_rain and drizzle', 'weather_description_scattered clouds', 'weather_description_shower rain', 'weather_description_sky is clear', 'weather_descriptio

array([[0.97551499, 0.02448501],
       [0.17800289, 0.82199711]])

array([[24104,   605],
       [  123,   568]], dtype=int64)

---------- CASE ----------
['temp_min', 'temp_max', 'pressure', 'humidity', 'wind_speed', 'wind_deg', 'clouds_all', 'weather_description', 'weather_description_broken clouds', 'weather_description_drizzle', 'weather_description_few clouds', 'weather_description_fog', 'weather_description_haze', 'weather_description_heavy intensity drizzle', 'weather_description_heavy intensity rain', 'weather_description_heavy intesity shower rain', 'weather_description_light intensity drizzle', 'weather_description_light intensity drizzle rain', 'weather_description_light intensity shower rain', 'weather_description_light rain', 'weather_description_mist', 'weather_description_moderate rain', 'weather_description_overcast clouds', 'weather_description_proximity shower rain', 'weather_description_proximity thunderstorm', 'weather_description_rain and drizzle', 'weather_description_scattered clouds', 'weather_description_shower rain', 'weather_description_sky is clear', 'weather_description_thunderstorm

array([[0.97770043, 0.02229957],
       [0.21997106, 0.78002894]])

array([[24158,   551],
       [  152,   539]], dtype=int64)

In [31]:
df_training_result_owm

Unnamed: 0,Removed_Cols,Features,Train_Acc,Test_Acc,Precision,Recall,F1,Ver_Pos
0,[],"[temp, feels_like, temp_min, temp_max, pressur...",0.992992,0.967835,0.44464,0.732272,0.553308,0.732272
1,[feels_like],"[temp, temp_min, temp_max, pressure, humidity,...",0.992489,0.966063,0.424934,0.700434,0.528962,0.700434
2,"[feels_like, temp_min, temp_max]","[temp, pressure, humidity, wind_speed, wind_de...",0.992828,0.968228,0.448763,0.735166,0.557323,0.735166
3,"[feels_like, temp_min, temp_max, pressure]","[temp, humidity, wind_speed, wind_deg, rain_1h...",0.993174,0.970551,0.473973,0.751085,0.581187,0.751085
4,"[feels_like, temp_min, temp_max, pressure, hum...","[temp, wind_speed, wind_deg, rain_1h, clouds_a...",0.993478,0.971181,0.480605,0.735166,0.581236,0.735166
5,"[feels_like, temp_min, temp_max, pressure, hum...","[temp, rain_1h, clouds_all, rain_sum, Mes_1, M...",0.992619,0.971929,0.489964,0.777135,0.601007,0.777135
6,"[feels_like, temp_min, temp_max, pressure, hum...","[temp, rain_1h, rain_sum, Mes_1, Mes_2, Mes_3,...",0.992671,0.972165,0.492806,0.793054,0.607876,0.793054
7,"[feels_like, temp_min, temp_max, pressure, hum...","[temp, rain_1h, rain_sum, Mes_1, Mes_2, Mes_3,...",0.991145,0.971339,0.484228,0.821997,0.609442,0.821997
8,"[temp_min, temp_max, pressure, humidity, wind_...","[temp, feels_like, rain_1h, rain_sum, Mes_1, M...",0.992862,0.972323,0.494495,0.780029,0.605278,0.780029


# Teste "real"

In [32]:
df_best_local = df_best_local.reset_index(drop=True)
df_best_local

Unnamed: 0,Removed_Cols,Local,Features,Train_Acc,Test_Acc,Precision,Recall,F1,Ver_Pos,Model
0,"[DirecaoDoVento, VelocidadeDoVento, Temperatur...",0,"[TemperaturaDoAr, RadiacaoSolar, Precipitacao,...",0.996536,0.990019,0.667853,0.920059,0.773927,0.920059,<xgboost.core.Booster object at 0x0000015C4D6F...
1,"[DirecaoDoVento, VelocidadeDoVento, Temperatur...",1,"[TemperaturaDoAr, RadiacaoSolar, Precipitacao,...",0.998361,0.994582,0.788009,0.948454,0.860819,0.948454,<xgboost.core.Booster object at 0x0000015C4D6F...
2,"[DirecaoDoVento, VelocidadeDoVento, Temperatur...",2,"[PressaoAtmosferica, TemperaturaDoAr, Radiacao...",0.996801,0.993216,0.733075,0.971795,0.835722,0.971795,<xgboost.core.Booster object at 0x0000015C4D6F...
3,"[DirecaoDoVento, VelocidadeDoVento, Temperatur...",3,"[TemperaturaDoAr, Precipitacao, Dia, PrecSum, ...",0.998119,0.995173,0.823762,0.960739,0.886994,0.960739,<xgboost.core.Booster object at 0x0000015C4D6F...
4,"[DirecaoDoVento, VelocidadeDoVento, Temperatur...",4,"[PressaoAtmosferica, TemperaturaDoAr, Radiacao...",0.99661,0.986932,0.614068,0.793612,0.69239,0.793612,<xgboost.core.Booster object at 0x0000015C4D6F...
5,"[DirecaoDoVento, VelocidadeDoVento, Temperatur...",5,"[UmidadeRelativa, PressaoAtmosferica, Temperat...",0.996588,0.988526,0.659176,0.834123,0.736402,0.834123,<xgboost.core.Booster object at 0x0000015C4D6F...


In [33]:
df_m = df_f_ohe[(df_f_ohe['Label'] == 1) | (df_f_ohe['Label_Old'] == 1)].copy()
df_m['Data'] = df_m['Data_Hora'].dt.strftime('%Y-%m-%d')

In [34]:
def getPrecMomento(row):
    prec_momento = df_m.loc[(df_m['Data_Hora'] <= row['Data_Hora']) & (df_m['Local'] == row['Local']) & (df_m['Data'] == row['Data']), 'Precipitacao'].sum()
    return prec_momento

df_m['PrecMomento'] = df_m.apply(getPrecMomento, axis=1)

df_m = df_m.rename(columns = {'PrecSum': 'PrecSumOld', 'PrecMomento': 'PrecSum'})

In [35]:
df_m_2 = df_m.copy()
df_m_2['Label_Pred'] = 0

for l in range(6):
    label_pred = df_best_local.loc[l,'Model'].predict(xgboost.DMatrix(data=df_m_2.loc[df_m_2['Local'] == l, df_best_local.loc[l, 'Features']]))
    df_m_2.loc[df_m_2['Local'] == l, 'Label_Pred'] = [1 if i>0.5 else 0 for i in label_pred]

In [36]:
print(df_m_2[df_m_2['Label_Pred'] == 1].shape)
print(df_m_2.shape)

(2834, 36)
(9848, 36)


In [37]:
df_m_2[['Local', 'Data_Hora', 'Precipitacao', 'PrecSum', 'PrecSumOld', 'Label', 'Label_Pred']].sort_values(by=['Local', 'Data_Hora']
).to_csv('../../../data/analysis/labels_prediction_shift.csv', index=False, sep=';', decimal=',')

In [38]:
pd.set_option("display.max_rows", 200)
df_f_ohe[(df_f_ohe['Data_Hora'] >= datetime(2018,12,23)) & (df_f_ohe['Data_Hora'] <= datetime(2018,12,30)) & (df_f_ohe['Local'] == 4)][['Data_Hora', 'Precipitacao', 'PrecSum']]

Unnamed: 0,Data_Hora,Precipitacao,PrecSum
343418,2018-12-23 00:00:00,0.0,78.4
343419,2018-12-23 01:00:00,0.0,78.4
343420,2018-12-23 02:00:00,0.0,78.4
343421,2018-12-23 03:00:00,1.0,78.4
343422,2018-12-23 04:00:00,0.6,78.4
343423,2018-12-23 05:00:00,0.0,78.4
343424,2018-12-23 06:00:00,0.0,78.4
343425,2018-12-23 07:00:00,0.0,78.4
343426,2018-12-23 08:00:00,0.0,78.4
343427,2018-12-23 09:00:00,0.0,78.4


In [39]:
df_f_ohe['Local']

48        1
49        1
50        1
51        1
52        1
         ..
365972    5
365973    5
365974    5
365975    5
365976    5
Name: Local, Length: 366025, dtype: int64

# Usar modelo 1 para gerar novas labels

## Treinar modelo

In [117]:
df_p = pd.read_csv('../../../data/cleandata/Info pluviometricas/Merged Data/prepped_data.csv', sep=';')
df_p['Data_Hora'] = pd.to_datetime(df_p['Data_Hora'], yearfirst=True)
df_p.groupby('Label').count()

Unnamed: 0_level_0,Data_Hora,Mes,Dia,Local,Precipitacao,PrecSum
Label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0.0,359227,359227,359227,359227,359227,359227
1.0,6798,6798,6798,6798,6798,6798


In [118]:
df_p = df_p.sort_values(['Data_Hora', 'Local'])

In [64]:
# Parâmetros
label = 'Label'
cols_rem = ['LocalMax', 'Label', 'Label_Old', 'Cluster', 'Data', 'Hora', 'Data_Hora', 'Ordens', 'Minuto'] + [c for c in df_p.columns if 'Hora_' in c]
# Conjunto de resultados
prepped_models = {}

# Gerar um modelo para cada local e um modelo geral
for l in range(6):
    if l != 0:
        df_train = df_p[df_p['Local'] == l]
    else:
        df_train = df_p.copy()
        
    print(f'----- LOCAL {l} -----')
    model, training_res, y_treino_pred, y_teste_pred = trainXGB(df_train, cols_rem, label)
    
    # Salvar modelo e resultados
    prepped_models[l] = {
        'model': model,
        'results': training_res,
        'y_treino': y_treino_pred,
        'y_teste': y_teste_pred
    }

----- LOCAL 0 -----
Treino: 0.9997236118954259
Teste: 0.9995173393559668
Precisão: 0.9778633301251203
Recall: 0.9965669445806769
F1: 0.9871265484576148


array([[9.99573161e-01, 4.26838887e-04],
       [3.43305542e-03, 9.96566945e-01]])

array([[107723,     46],
       [     7,   2032]], dtype=int64)

----- LOCAL 1 -----
Treino: 0.999791007344599
Teste: 0.9994080684819233
Precisão: 0.9837209302325581
Recall: 0.986013986013986
F1: 0.9848661233993015


array([[9.99674918e-01, 3.25082432e-04],
       [1.39860140e-02, 9.86013986e-01]])

array([[21526,     7],
       [    6,   423]], dtype=int64)

----- LOCAL 2 -----
Treino: 1.0
Teste: 1.0
Precisão: 1.0
Recall: 1.0
F1: 1.0


array([[1., 0.],
       [0., 1.]])

array([[21568,     0],
       [    0,   394]], dtype=int64)

----- LOCAL 3 -----
Treino: 1.0
Teste: 1.0
Precisão: 1.0
Recall: 1.0
F1: 1.0


array([[1., 0.],
       [0., 1.]])

array([[21590,     0],
       [    0,   372]], dtype=int64)

----- LOCAL 4 -----
Treino: 0.9997609942638623
Teste: 0.9989072033512431
Precisão: 0.9487179487179487
Recall: 1.0
F1: 0.9736842105263158


array([[0.99888465, 0.00111535],
       [0.        , 1.        ]])

array([[21494,    24],
       [    0,   444]], dtype=int64)

----- LOCAL 5 -----
Treino: 1.0
Teste: 0.999772334031509
Precisão: 0.9974874371859297
Recall: 0.9900249376558603
F1: 0.9937421777221527


array([[9.99953620e-01, 4.63800380e-05],
       [9.97506234e-03, 9.90024938e-01]])

array([[21560,     1],
       [    4,   397]], dtype=int64)

In [65]:
prepped_models[0]['results']

{'Features': ['Mes', 'Dia', 'Local', 'Precipitacao', 'PrecSum'],
 'Train_Acc': 0.9997236118954259,
 'Test_Acc': 0.9995173393559668,
 'Precision': 0.9778633301251203,
 'Recall': 0.9965669445806769,
 'F1': 0.9871265484576148,
 'Ver_Pos': 0.9965669445806769}

In [66]:
prepped_models[0]['model']

<xgboost.core.Booster at 0x1d83e697a00>

## Obter precipitação até aquele momento do dia

In [119]:
df_m = df_p[df_p['Label'] == 1].copy()
df_m['Data'] = df_m['Data_Hora'].dt.strftime("%Y-%m-%d")

def getPrecMomento(row):
    prec_momento = df_m.loc[(df_m['Data_Hora'] <= row['Data_Hora']) & (df_m['Local'] == row['Local']) & (df_m['Data'] == row['Data']), 'Precipitacao'].sum()
    return prec_momento

df_m['PrecMomento'] = df_m.apply(getPrecMomento, axis=1)

df_m = df_m.rename(columns = {'PrecSum': 'PrecSumOld', 'PrecMomento': 'PrecSum'})

## Prever com acumulo do dia

In [120]:
label_pred = prepped_models[0]['model'].predict(xgboost.DMatrix(data=df_m[prepped_models[0]['results']['Features']]))
df_m['Label_Pred'] = [1 if i>0.5 else 0 for i in label_pred]

In [121]:
df_m['Label_Pred']

816       0
744       0
817       0
745       0
818       0
         ..
362471    1
362519    1
362496    1
362472    1
362520    1
Name: Label_Pred, Length: 6798, dtype: int64

Teste: se, com essa estratégia, o modelo consegue prever em pelo menos um momento do dia com label que vai ocorrer alagamento

In [122]:
df_g = df_m.groupby(['Data', 'Local']).max()
print(df_g[df_g['Label'] == df_g['Label_Pred']].shape)
print(df_g.shape)

(285, 8)
(285, 8)


Próximo passo: obter o primeiro momento em que o modelo prevê um alagamento

In [123]:
df_m.columns

Index(['Data_Hora', 'Mes', 'Dia', 'Local', 'Precipitacao', 'PrecSumOld',
       'Label', 'Data', 'PrecSum', 'Label_Pred'],
      dtype='object')

In [124]:
df_g = df_m.groupby(['Data', 'Local', 'Label_Pred']).min().reset_index()
#df_g[df_g['Label_Pred'] == 1]

df_g = df_g.loc[df_g['Label_Pred'] == 1, ['Data', 'Local', 'Data_Hora']].rename(columns={'Data_Hora':'Min_Hora'})
df_g['Min_Hora'] = df_g['Min_Hora'].dt.hour

## Treinar modelo com todos os dias

In [125]:
df_p_new = df_p.copy()
df_p_new['Data'] = df_p_new['Data_Hora'].dt.strftime('%Y-%m-%d')
df_p_new = df_p_new.merge(df_g, on=['Local', 'Data'], how='left').fillna(24)

In [126]:
df_p_new['Label_New'] = 0
df_p_new.loc[(df_p_new['Label'] == 1) & (df_p_new['Data_Hora'].dt.hour >= df_p_new['Min_Hora']), 'Label_New'] = 1
df_p_new = df_p_new.rename(columns = {'Label': 'Label_Old', 'Label_New': 'Label'})

In [127]:
df_p_new['Label'].value_counts()

0    364052
1      1973
Name: Label, dtype: int64

In [128]:
df_p_new['Label'] = df_p_new['Label'].shift(-5*6, fill_value = 0)

...

In [129]:
df_p_new[['Data_Hora', 'Local', 'Label']]

Unnamed: 0,Data_Hora,Local,Label
0,2011-01-01 00:00:00,1,0
1,2011-01-01 00:00:00,2,0
2,2011-01-01 00:00:00,3,0
3,2011-01-01 00:00:00,4,0
4,2011-01-01 00:00:00,5,0
...,...,...,...
366020,2019-06-30 23:00:00,1,0
366021,2019-06-30 23:00:00,2,0
366022,2019-06-30 23:00:00,3,0
366023,2019-06-30 23:00:00,4,0


In [133]:
# Parâmetros
label = 'Label'
cols_rem = ['LocalMax', 'Label', 'Label_Old', 'Cluster', 'Data', 'Hora', 'Data_Hora', 'Ordens', 'Minuto', 'Min_Hora'] + [c for c in df_p.columns if 'Hora_' in c]
# Conjunto de resultados
prepped_models_new = {}

# Gerar um modelo para cada local e um modelo geral
for l in range(6):
    if l != 0:
        df_train = df_p_new[df_p_new['Local'] == l]
    else:
        df_train = df_p_new.copy()
        
    print(f'----- LOCAL {l} -----')
    model, training_res, y_treino_pred, y_teste_pred = trainXGB(df_train, cols_rem, label)
    
    # Salvar modelo e resultados
    prepped_models_new[l] = {
        'model': model,
        'results': training_res,
        'y_treino': y_treino_pred,
        'y_teste': y_teste_pred
    }

----- LOCAL 0 -----
Treino: 0.997835863064873
Teste: 0.9918038758560397
Precisão: 0.3605072463768116
Recall: 0.6722972972972973
F1: 0.46933962264150936


array([[0.99353575, 0.00646425],
       [0.3277027 , 0.6722973 ]])

array([[108510,    706],
       [   194,    398]], dtype=int64)

----- LOCAL 1 -----
Treino: 0.9978422064419946
Teste: 0.9894362990620162
Precisão: 0.2601626016260163
Recall: 0.5614035087719298
F1: 0.35555555555555557


array([[0.99166972, 0.00833028],
       [0.43859649, 0.56140351]])

array([[21666,   182],
       [   50,    64]], dtype=int64)

----- LOCAL 2 -----


ValueError: With n_samples=0, test_size=0.3 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

### Treinar modelo com mais features

In [134]:
df_f_ohe.drop(columns='Label').merge(df_p_new[['Data_Hora', 'Local', 'Label']], on=['Data_Hora', 'Local'])

Unnamed: 0,Data_Hora,Local,UmidadeRelativa,PressaoAtmosferica,TemperaturaDoAr,TemperaturaInterna,PontoDeOrvalho,RadiacaoSolar,DirecaoDoVento,VelocidadeDoVento,...,Mes_5,Mes_6,Mes_7,Mes_8,Mes_9,Mes_10,Mes_11,Mes_12,Label_Old,Label
0,2011-01-01 00:00:00,1,84.8,920.6,20.2,22.800000,17.5,0.0,137.0,1.5,...,0,0,0,0,0,0,0,0,0.0,0
1,2011-01-01 01:00:00,1,84.0,919.9,20.1,22.800000,17.3,0.0,151.0,1.8,...,0,0,0,0,0,0,0,0,0.0,0
2,2011-01-01 02:00:00,1,85.8,919.2,19.8,22.935631,17.4,0.0,155.0,1.8,...,0,0,0,0,0,0,0,0,0.0,0
3,2011-01-01 03:00:00,1,87.6,918.6,19.5,22.400000,17.3,0.0,152.0,1.5,...,0,0,0,0,0,0,0,0,0.0,0
4,2011-01-01 04:00:00,1,87.4,918.4,19.4,21.815844,17.2,0.0,147.0,1.6,...,0,0,0,0,0,0,0,0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
366020,2019-06-30 19:00:00,5,26.9,925.2,23.7,30.300000,4.2,0.0,0.0,0.0,...,0,1,0,0,0,0,0,0,0.0,0
366021,2019-06-30 20:00:00,5,28.6,925.7,22.9,29.500000,4.3,0.0,0.0,0.0,...,0,1,0,0,0,0,0,0,0.0,0
366022,2019-06-30 21:00:00,5,30.6,925.9,22.3,29.000000,4.8,0.0,0.0,0.0,...,0,1,0,0,0,0,0,0,0.0,0
366023,2019-06-30 22:00:00,5,34.4,925.9,21.4,28.300000,5.7,0.0,0.0,0.0,...,0,1,0,0,0,0,0,0,0.0,0
