In [1]:
# Imports e instalações

import pandas as pd
import numpy as np

# Viz
import matplotlib.pyplot as plt
import seaborn as sns

#!pip install scikit-learn
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer


# Modelos
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier

# Métricas
from sklearn.metrics import (accuracy_score, precision_score, recall_score, roc_auc_score, confusion_matrix, classification_report, f1_score)
from sklearn.metrics import RocCurveDisplay, PrecisionRecallDisplay

In [2]:
# Carregando o DF

# df recebe o dataset
df = pd.read_csv(r'C:\Users\JacyzinGuilherme(Bip\mentoria-bip\dados_editados\australia_clima_v6.csv', sep=',')
df

Unnamed: 0,data,localidade,temp_min,temp_max,chuva_mm,evaporacao,horas_sol,vento_rajada_direcao,vento_rajada_velocidade,vento_direcao_9h,...,pressao_15h_isna,choveu_hoje,amplitude_termica,umidade_media,variacao_pressao,latitude,longitude,indice_total_enso,anomalia_enso,evento_enso
0,2007-11-01,Canberra,8.0,24.3,0.0,3.4,6.3,NW,30.0,SW,...,False,0,16.3,48.5,4.7,-35.2931,149.1269,25.17,-1.50,la_nina
1,2007-11-02,Canberra,14.0,26.9,3.6,4.4,9.7,ENE,39.0,E,...,False,1,12.9,58.0,4.0,-35.2931,149.1269,25.17,-1.50,la_nina
2,2007-11-03,Canberra,13.7,23.4,3.6,5.8,3.3,NW,85.0,N,...,False,1,9.7,75.5,2.3,-35.2931,149.1269,25.17,-1.50,la_nina
3,2007-11-04,Canberra,13.3,15.5,39.8,7.2,9.1,NW,54.0,WNW,...,False,1,2.2,59.0,-1.5,-35.2931,149.1269,25.17,-1.50,la_nina
4,2007-11-05,Canberra,7.6,16.1,2.8,5.6,10.6,SSE,50.0,SSE,...,False,1,8.5,58.5,-0.2,-35.2931,149.1269,25.17,-1.50,la_nina
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50154,2017-06-25,Sydney,6.8,19.5,0.0,3.4,9.4,WSW,46.0,NW,...,False,0,12.7,54.5,3.2,-33.8678,151.2100,27.97,0.39,neutro
50155,2017-06-25,Perth,6.3,17.0,0.0,1.6,7.9,E,26.0,SE,...,False,0,10.7,62.0,2.6,-31.9559,115.8606,27.97,0.39,neutro
50156,2017-06-25,Sydney,7.6,19.3,0.0,3.4,9.4,W,35.0,W,...,False,0,11.7,52.5,3.2,-33.8678,151.2100,27.97,0.39,neutro
50157,2017-06-25,Perth,4.9,16.8,0.0,1.6,7.9,E,39.0,E,...,False,0,11.9,56.5,2.5,-31.9559,115.8606,27.97,0.39,neutro


In [3]:
# Verificação de tamanho do DF
df.shape

(50159, 42)

In [4]:
# Verificação tipos de dados
df.dtypes

data                        object
localidade                  object
temp_min                   float64
temp_max                   float64
chuva_mm                   float64
evaporacao                 float64
horas_sol                  float64
vento_rajada_direcao        object
vento_rajada_velocidade    float64
vento_direcao_9h            object
vento_direcao_15h           object
vento_veloc_9h             float64
vento_veloc_15h            float64
umidade_9h                 float64
umidade_15h                float64
pressao_9h                 float64
pressao_15h                float64
nuvem_9h                   float64
nuvem_15h                  float64
temp_9h                    float64
temp_15h                   float64
choveu_hoje_fex             object
chove_amanha_vtr             int64
ano                          int64
mes                          int64
dia                          int64
trimestre_quarter            int64
horas_sol_isna                bool
evaporacao_isna     

In [5]:
# Verificação de valores nulos
df.isnull().sum()

data                       0
localidade                 0
temp_min                   0
temp_max                   0
chuva_mm                   0
evaporacao                 0
horas_sol                  0
vento_rajada_direcao       0
vento_rajada_velocidade    0
vento_direcao_9h           0
vento_direcao_15h          0
vento_veloc_9h             0
vento_veloc_15h            0
umidade_9h                 0
umidade_15h                0
pressao_9h                 0
pressao_15h                0
nuvem_9h                   0
nuvem_15h                  0
temp_9h                    0
temp_15h                   0
choveu_hoje_fex            0
chove_amanha_vtr           0
ano                        0
mes                        0
dia                        0
trimestre_quarter          0
horas_sol_isna             0
evaporacao_isna            0
nuvem_9h_isna              0
nuvem_15h_isna             0
pressao_9h_isna            0
pressao_15h_isna           0
choveu_hoje                0
amplitude_term

In [6]:
# Convertendo a coluna de data para datetime e ordenando do mais antigo para o mais recente // split temporal
df['data'] = pd.to_datetime(df['data'], errors='coerce')
df = df.sort_values(by='data').reset_index(drop=True)
df.dtypes, display(df.head(3))

Unnamed: 0,data,localidade,temp_min,temp_max,chuva_mm,evaporacao,horas_sol,vento_rajada_direcao,vento_rajada_velocidade,vento_direcao_9h,...,pressao_15h_isna,choveu_hoje,amplitude_termica,umidade_media,variacao_pressao,latitude,longitude,indice_total_enso,anomalia_enso,evento_enso
0,2007-11-01,Canberra,8.0,24.3,0.0,3.4,6.3,NW,30.0,SW,...,False,0,16.3,48.5,4.7,-35.2931,149.1269,25.17,-1.5,la_nina
1,2007-11-02,Canberra,14.0,26.9,3.6,4.4,9.7,ENE,39.0,E,...,False,1,12.9,58.0,4.0,-35.2931,149.1269,25.17,-1.5,la_nina
2,2007-11-03,Canberra,13.7,23.4,3.6,5.8,3.3,NW,85.0,N,...,False,1,9.7,75.5,2.3,-35.2931,149.1269,25.17,-1.5,la_nina


(data                       datetime64[ns]
 localidade                         object
 temp_min                          float64
 temp_max                          float64
 chuva_mm                          float64
 evaporacao                        float64
 horas_sol                         float64
 vento_rajada_direcao               object
 vento_rajada_velocidade           float64
 vento_direcao_9h                   object
 vento_direcao_15h                  object
 vento_veloc_9h                    float64
 vento_veloc_15h                   float64
 umidade_9h                        float64
 umidade_15h                       float64
 pressao_9h                        float64
 pressao_15h                       float64
 nuvem_9h                          float64
 nuvem_15h                         float64
 temp_9h                           float64
 temp_15h                          float64
 choveu_hoje_fex                    object
 chove_amanha_vtr                    int64
 ano       

In [7]:
df.describe(include='all')

Unnamed: 0,data,localidade,temp_min,temp_max,chuva_mm,evaporacao,horas_sol,vento_rajada_direcao,vento_rajada_velocidade,vento_direcao_9h,...,pressao_15h_isna,choveu_hoje,amplitude_termica,umidade_media,variacao_pressao,latitude,longitude,indice_total_enso,anomalia_enso,evento_enso
count,50159,50159,50159.0,50159.0,50159.0,50159.0,50159.0,50159,50159.0,50159,...,50159,50159.0,50159.0,50159.0,50159.0,50159.0,50159.0,50159.0,50159.0,50159
unique,,20,,,,,,16,,16,...,1,,,,,,,,,3
top,,Perth,,,,,,E,,N,...,False,,,,,,,,,neutro
freq,,5943,,,,,,4047,,4555,...,50159,,,,,,,,,23535
mean,2012-09-14 20:55:12.123447552,,13.47748,24.500787,2.114372,5.582643,7.845571,,40.859228,,...,,0.214458,11.023308,56.626089,2.50826,-30.953285,141.124346,27.107321,0.08463,
min,2007-11-01 00:00:00,,-6.7,4.1,0.0,0.0,0.0,,11.0,,...,,0.0,-3.2,0.0,-10.8,-42.8806,115.8606,25.0,-1.64,
25%,2010-07-15 00:00:00,,8.4,18.8,0.0,2.8,5.2,,31.0,,...,,0.0,7.5,45.5,1.5,-37.8142,136.8,26.33,-0.57,
50%,2012-07-26 00:00:00,,13.0,24.4,0.0,5.0,8.8,,39.0,,...,,0.0,10.4,58.0,2.8,-33.8678,144.9631,27.21,-0.14,
75%,2014-10-06 00:00:00,,18.6,30.2,0.6,7.6,10.8,,48.0,,...,,0.0,14.2,69.0,3.7,-27.4678,147.3689,27.82,0.58,
max,2017-06-25 00:00:00,,31.4,48.1,206.2,81.2,14.5,,124.0,,...,,1.0,31.4,100.0,15.1,-12.4381,153.1189,29.26,2.75,


In [8]:
# Criação da var mes_sin
# transformando o mes em um valor cíclico usando a função seno // entre -1 e 1 // isso faz com que o modelo entenda a ciclicidade dos meses do ano // dezembro e janeiro estão próximos
df['mes_sin'] = np.sin(2 * np.pi * df['data'].dt.month / 12)

# Criação da var mes_cos
# transformando o mes em um valor cíclico usando a função cosseno // entre -1 e 1 // isso faz com que o modelo entenda a ciclicidade dos meses do ano // dezembro e janeiro estão próximos
df['mes_cos'] = np.cos(2 * np.pi * df['mes'] / 12)
df.head(3)
#df.isna().sum()

Unnamed: 0,data,localidade,temp_min,temp_max,chuva_mm,evaporacao,horas_sol,vento_rajada_direcao,vento_rajada_velocidade,vento_direcao_9h,...,amplitude_termica,umidade_media,variacao_pressao,latitude,longitude,indice_total_enso,anomalia_enso,evento_enso,mes_sin,mes_cos
0,2007-11-01,Canberra,8.0,24.3,0.0,3.4,6.3,NW,30.0,SW,...,16.3,48.5,4.7,-35.2931,149.1269,25.17,-1.5,la_nina,-0.5,0.866025
1,2007-11-02,Canberra,14.0,26.9,3.6,4.4,9.7,ENE,39.0,E,...,12.9,58.0,4.0,-35.2931,149.1269,25.17,-1.5,la_nina,-0.5,0.866025
2,2007-11-03,Canberra,13.7,23.4,3.6,5.8,3.3,NW,85.0,N,...,9.7,75.5,2.3,-35.2931,149.1269,25.17,-1.5,la_nina,-0.5,0.866025


In [9]:
df['choveu_hoje_fex'] = df['choveu_hoje'].map({'Yes': 1, 'No': 0})

In [10]:
df

Unnamed: 0,data,localidade,temp_min,temp_max,chuva_mm,evaporacao,horas_sol,vento_rajada_direcao,vento_rajada_velocidade,vento_direcao_9h,...,amplitude_termica,umidade_media,variacao_pressao,latitude,longitude,indice_total_enso,anomalia_enso,evento_enso,mes_sin,mes_cos
0,2007-11-01,Canberra,8.0,24.3,0.0,3.4,6.3,NW,30.0,SW,...,16.3,48.5,4.7,-35.2931,149.1269,25.17,-1.50,la_nina,-5.000000e-01,0.866025
1,2007-11-02,Canberra,14.0,26.9,3.6,4.4,9.7,ENE,39.0,E,...,12.9,58.0,4.0,-35.2931,149.1269,25.17,-1.50,la_nina,-5.000000e-01,0.866025
2,2007-11-03,Canberra,13.7,23.4,3.6,5.8,3.3,NW,85.0,N,...,9.7,75.5,2.3,-35.2931,149.1269,25.17,-1.50,la_nina,-5.000000e-01,0.866025
3,2007-11-04,Canberra,13.3,15.5,39.8,7.2,9.1,NW,54.0,WNW,...,2.2,59.0,-1.5,-35.2931,149.1269,25.17,-1.50,la_nina,-5.000000e-01,0.866025
4,2007-11-05,Canberra,7.6,16.1,2.8,5.6,10.6,SSE,50.0,SSE,...,8.5,58.5,-0.2,-35.2931,149.1269,25.17,-1.50,la_nina,-5.000000e-01,0.866025
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50154,2017-06-25,Perth,4.9,16.8,0.0,1.6,7.9,E,39.0,E,...,11.9,56.5,2.5,-31.9559,115.8606,27.97,0.39,neutro,1.224647e-16,-1.000000
50155,2017-06-25,Melbourne,8.6,14.3,0.0,2.8,3.8,NW,35.0,N,...,5.7,61.5,2.4,-37.8142,144.9631,27.97,0.39,neutro,1.224647e-16,-1.000000
50156,2017-06-25,Melbourne,5.5,13.9,0.0,2.8,3.8,NNW,44.0,N,...,8.4,65.0,2.3,-37.8142,144.9631,27.97,0.39,neutro,1.224647e-16,-1.000000
50157,2017-06-25,Sydney,6.8,19.5,0.0,3.4,9.4,WSW,46.0,NW,...,12.7,54.5,3.2,-33.8678,151.2100,27.97,0.39,neutro,1.224647e-16,-1.000000


In [11]:
#df.to_csv(r'C:\Users\JacyzinGuilherme(Bip\mentoria-bip\dados_editados\australia_clima_v7.csv', sep=',', index=False)
#df.to_excel(r'C:\Users\JacyzinGuilherme(Bip\mentoria-bip\dados_editados\australia_clima_v7.xlsx', index=False)

In [12]:
df_gas_carbo = pd.read_excel(r'C:\Users\JacyzinGuilherme(Bip\mentoria-bip\dados_brutos\emissao_gas_carbo.xlsx')

In [13]:
df_gas_carbo

Unnamed: 0,ano,au_mt
0,1990,620.8589
1,1991,601.5758
2,1992,562.4539
3,1993,541.3763
4,1994,530.7538
5,1995,518.2441
6,1996,520.0307
7,1997,518.5591
8,1998,536.806
9,1999,557.462


In [14]:
df_2 = pd.read_csv(r'C:\Users\JacyzinGuilherme(Bip\mentoria-bip\dados_editados\australia_clima_v7.csv', sep=',')

In [15]:
df_2.tail()

Unnamed: 0,data,localidade,temp_min,temp_max,chuva_mm,evaporacao,horas_sol,vento_rajada_direcao,vento_rajada_velocidade,vento_direcao_9h,...,amplitude_termica,umidade_media,variacao_pressao,latitude,longitude,indice_total_enso,anomalia_enso,evento_enso,mes_sin,mes_cos
50154,2017-06-25,Perth,4.9,16.8,0.0,1.6,7.9,E,39.0,E,...,11.9,56.5,2.5,-31.9559,115.8606,27.97,0.39,neutro,1.224647e-16,-1.0
50155,2017-06-25,Melbourne,8.6,14.3,0.0,2.8,3.8,NW,35.0,N,...,5.7,61.5,2.4,-37.8142,144.9631,27.97,0.39,neutro,1.224647e-16,-1.0
50156,2017-06-25,Melbourne,5.5,13.9,0.0,2.8,3.8,NNW,44.0,N,...,8.4,65.0,2.3,-37.8142,144.9631,27.97,0.39,neutro,1.224647e-16,-1.0
50157,2017-06-25,Sydney,6.8,19.5,0.0,3.4,9.4,WSW,46.0,NW,...,12.7,54.5,3.2,-33.8678,151.21,27.97,0.39,neutro,1.224647e-16,-1.0
50158,2017-06-25,Brisbane,11.0,24.2,0.0,2.2,9.8,ENE,20.0,SSW,...,13.2,60.5,3.2,-27.4678,153.0281,27.97,0.39,neutro,1.224647e-16,-1.0


In [16]:
df_merge = pd.merge(df_2, df_gas_carbo, on='ano', how='left')

In [17]:
df_merge

Unnamed: 0,data,localidade,temp_min,temp_max,chuva_mm,evaporacao,horas_sol,vento_rajada_direcao,vento_rajada_velocidade,vento_direcao_9h,...,umidade_media,variacao_pressao,latitude,longitude,indice_total_enso,anomalia_enso,evento_enso,mes_sin,mes_cos,au_mt
0,2007-11-01,Canberra,8.0,24.3,0.0,3.4,6.3,NW,30.0,SW,...,48.5,4.7,-35.2931,149.1269,25.17,-1.50,la_nina,-5.000000e-01,0.866025,630.7811
1,2007-11-02,Canberra,14.0,26.9,3.6,4.4,9.7,ENE,39.0,E,...,58.0,4.0,-35.2931,149.1269,25.17,-1.50,la_nina,-5.000000e-01,0.866025,630.7811
2,2007-11-03,Canberra,13.7,23.4,3.6,5.8,3.3,NW,85.0,N,...,75.5,2.3,-35.2931,149.1269,25.17,-1.50,la_nina,-5.000000e-01,0.866025,630.7811
3,2007-11-04,Canberra,13.3,15.5,39.8,7.2,9.1,NW,54.0,WNW,...,59.0,-1.5,-35.2931,149.1269,25.17,-1.50,la_nina,-5.000000e-01,0.866025,630.7811
4,2007-11-05,Canberra,7.6,16.1,2.8,5.6,10.6,SSE,50.0,SSE,...,58.5,-0.2,-35.2931,149.1269,25.17,-1.50,la_nina,-5.000000e-01,0.866025,630.7811
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50154,2017-06-25,Perth,4.9,16.8,0.0,1.6,7.9,E,39.0,E,...,56.5,2.5,-31.9559,115.8606,27.97,0.39,neutro,1.224647e-16,-1.000000,536.7209
50155,2017-06-25,Melbourne,8.6,14.3,0.0,2.8,3.8,NW,35.0,N,...,61.5,2.4,-37.8142,144.9631,27.97,0.39,neutro,1.224647e-16,-1.000000,536.7209
50156,2017-06-25,Melbourne,5.5,13.9,0.0,2.8,3.8,NNW,44.0,N,...,65.0,2.3,-37.8142,144.9631,27.97,0.39,neutro,1.224647e-16,-1.000000,536.7209
50157,2017-06-25,Sydney,6.8,19.5,0.0,3.4,9.4,WSW,46.0,NW,...,54.5,3.2,-33.8678,151.2100,27.97,0.39,neutro,1.224647e-16,-1.000000,536.7209


In [18]:
df_merge = df_merge.rename(columns={'au_mt': 'emissoes_co2'})

In [19]:
df_merge

Unnamed: 0,data,localidade,temp_min,temp_max,chuva_mm,evaporacao,horas_sol,vento_rajada_direcao,vento_rajada_velocidade,vento_direcao_9h,...,umidade_media,variacao_pressao,latitude,longitude,indice_total_enso,anomalia_enso,evento_enso,mes_sin,mes_cos,emissoes_co2
0,2007-11-01,Canberra,8.0,24.3,0.0,3.4,6.3,NW,30.0,SW,...,48.5,4.7,-35.2931,149.1269,25.17,-1.50,la_nina,-5.000000e-01,0.866025,630.7811
1,2007-11-02,Canberra,14.0,26.9,3.6,4.4,9.7,ENE,39.0,E,...,58.0,4.0,-35.2931,149.1269,25.17,-1.50,la_nina,-5.000000e-01,0.866025,630.7811
2,2007-11-03,Canberra,13.7,23.4,3.6,5.8,3.3,NW,85.0,N,...,75.5,2.3,-35.2931,149.1269,25.17,-1.50,la_nina,-5.000000e-01,0.866025,630.7811
3,2007-11-04,Canberra,13.3,15.5,39.8,7.2,9.1,NW,54.0,WNW,...,59.0,-1.5,-35.2931,149.1269,25.17,-1.50,la_nina,-5.000000e-01,0.866025,630.7811
4,2007-11-05,Canberra,7.6,16.1,2.8,5.6,10.6,SSE,50.0,SSE,...,58.5,-0.2,-35.2931,149.1269,25.17,-1.50,la_nina,-5.000000e-01,0.866025,630.7811
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50154,2017-06-25,Perth,4.9,16.8,0.0,1.6,7.9,E,39.0,E,...,56.5,2.5,-31.9559,115.8606,27.97,0.39,neutro,1.224647e-16,-1.000000,536.7209
50155,2017-06-25,Melbourne,8.6,14.3,0.0,2.8,3.8,NW,35.0,N,...,61.5,2.4,-37.8142,144.9631,27.97,0.39,neutro,1.224647e-16,-1.000000,536.7209
50156,2017-06-25,Melbourne,5.5,13.9,0.0,2.8,3.8,NNW,44.0,N,...,65.0,2.3,-37.8142,144.9631,27.97,0.39,neutro,1.224647e-16,-1.000000,536.7209
50157,2017-06-25,Sydney,6.8,19.5,0.0,3.4,9.4,WSW,46.0,NW,...,54.5,3.2,-33.8678,151.2100,27.97,0.39,neutro,1.224647e-16,-1.000000,536.7209


In [20]:
df_merge = df_merge.drop('choveu_hoje_fex', axis=1)

In [21]:
df_merge

Unnamed: 0,data,localidade,temp_min,temp_max,chuva_mm,evaporacao,horas_sol,vento_rajada_direcao,vento_rajada_velocidade,vento_direcao_9h,...,umidade_media,variacao_pressao,latitude,longitude,indice_total_enso,anomalia_enso,evento_enso,mes_sin,mes_cos,emissoes_co2
0,2007-11-01,Canberra,8.0,24.3,0.0,3.4,6.3,NW,30.0,SW,...,48.5,4.7,-35.2931,149.1269,25.17,-1.50,la_nina,-5.000000e-01,0.866025,630.7811
1,2007-11-02,Canberra,14.0,26.9,3.6,4.4,9.7,ENE,39.0,E,...,58.0,4.0,-35.2931,149.1269,25.17,-1.50,la_nina,-5.000000e-01,0.866025,630.7811
2,2007-11-03,Canberra,13.7,23.4,3.6,5.8,3.3,NW,85.0,N,...,75.5,2.3,-35.2931,149.1269,25.17,-1.50,la_nina,-5.000000e-01,0.866025,630.7811
3,2007-11-04,Canberra,13.3,15.5,39.8,7.2,9.1,NW,54.0,WNW,...,59.0,-1.5,-35.2931,149.1269,25.17,-1.50,la_nina,-5.000000e-01,0.866025,630.7811
4,2007-11-05,Canberra,7.6,16.1,2.8,5.6,10.6,SSE,50.0,SSE,...,58.5,-0.2,-35.2931,149.1269,25.17,-1.50,la_nina,-5.000000e-01,0.866025,630.7811
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50154,2017-06-25,Perth,4.9,16.8,0.0,1.6,7.9,E,39.0,E,...,56.5,2.5,-31.9559,115.8606,27.97,0.39,neutro,1.224647e-16,-1.000000,536.7209
50155,2017-06-25,Melbourne,8.6,14.3,0.0,2.8,3.8,NW,35.0,N,...,61.5,2.4,-37.8142,144.9631,27.97,0.39,neutro,1.224647e-16,-1.000000,536.7209
50156,2017-06-25,Melbourne,5.5,13.9,0.0,2.8,3.8,NNW,44.0,N,...,65.0,2.3,-37.8142,144.9631,27.97,0.39,neutro,1.224647e-16,-1.000000,536.7209
50157,2017-06-25,Sydney,6.8,19.5,0.0,3.4,9.4,WSW,46.0,NW,...,54.5,3.2,-33.8678,151.2100,27.97,0.39,neutro,1.224647e-16,-1.000000,536.7209


In [22]:
# crio um dicionário onde a chave é o mês em número o valor é a estação correspondente
dados_estacoes = {
    "mes": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    "estacao": [
        "verao",      # janeiro
        "verao",      # fevereiro
        "outono",     # março
        "outono",     # abril
        "outono",     # maio
        "inverno",    # junho
        "inverno",    # julho
        "inverno",    # agosto
        "primavera",  # setembro
        "primavera",  # outubro
        "primavera",  # novembro
        "verao"       # dezembro
    ]
}

# converto o dicionário em DataFrame
df_estacoes = pd.DataFrame(dados_estacoes)


In [23]:
df_estacoes

Unnamed: 0,mes,estacao
0,1,verao
1,2,verao
2,3,outono
3,4,outono
4,5,outono
5,6,inverno
6,7,inverno
7,8,inverno
8,9,primavera
9,10,primavera


In [24]:
df_merge_2 = df_merge.merge(df_estacoes, on="mes", how="left")

In [25]:
df_merge_2

Unnamed: 0,data,localidade,temp_min,temp_max,chuva_mm,evaporacao,horas_sol,vento_rajada_direcao,vento_rajada_velocidade,vento_direcao_9h,...,variacao_pressao,latitude,longitude,indice_total_enso,anomalia_enso,evento_enso,mes_sin,mes_cos,emissoes_co2,estacao
0,2007-11-01,Canberra,8.0,24.3,0.0,3.4,6.3,NW,30.0,SW,...,4.7,-35.2931,149.1269,25.17,-1.50,la_nina,-5.000000e-01,0.866025,630.7811,primavera
1,2007-11-02,Canberra,14.0,26.9,3.6,4.4,9.7,ENE,39.0,E,...,4.0,-35.2931,149.1269,25.17,-1.50,la_nina,-5.000000e-01,0.866025,630.7811,primavera
2,2007-11-03,Canberra,13.7,23.4,3.6,5.8,3.3,NW,85.0,N,...,2.3,-35.2931,149.1269,25.17,-1.50,la_nina,-5.000000e-01,0.866025,630.7811,primavera
3,2007-11-04,Canberra,13.3,15.5,39.8,7.2,9.1,NW,54.0,WNW,...,-1.5,-35.2931,149.1269,25.17,-1.50,la_nina,-5.000000e-01,0.866025,630.7811,primavera
4,2007-11-05,Canberra,7.6,16.1,2.8,5.6,10.6,SSE,50.0,SSE,...,-0.2,-35.2931,149.1269,25.17,-1.50,la_nina,-5.000000e-01,0.866025,630.7811,primavera
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50154,2017-06-25,Perth,4.9,16.8,0.0,1.6,7.9,E,39.0,E,...,2.5,-31.9559,115.8606,27.97,0.39,neutro,1.224647e-16,-1.000000,536.7209,inverno
50155,2017-06-25,Melbourne,8.6,14.3,0.0,2.8,3.8,NW,35.0,N,...,2.4,-37.8142,144.9631,27.97,0.39,neutro,1.224647e-16,-1.000000,536.7209,inverno
50156,2017-06-25,Melbourne,5.5,13.9,0.0,2.8,3.8,NNW,44.0,N,...,2.3,-37.8142,144.9631,27.97,0.39,neutro,1.224647e-16,-1.000000,536.7209,inverno
50157,2017-06-25,Sydney,6.8,19.5,0.0,3.4,9.4,WSW,46.0,NW,...,3.2,-33.8678,151.2100,27.97,0.39,neutro,1.224647e-16,-1.000000,536.7209,inverno


In [26]:
#df_merge_2.to_csv(r'C:\Users\JacyzinGuilherme(Bip\mentoria-bip\dados_editados\australia_clima_v8.csv', sep=',', index=False)
#df_merge_2.to_excel(r'C:\Users\JacyzinGuilherme(Bip\mentoria-bip\dados_editados\australia_clima_v8.xlsx', index=False)

In [27]:
df_merge_2

Unnamed: 0,data,localidade,temp_min,temp_max,chuva_mm,evaporacao,horas_sol,vento_rajada_direcao,vento_rajada_velocidade,vento_direcao_9h,...,variacao_pressao,latitude,longitude,indice_total_enso,anomalia_enso,evento_enso,mes_sin,mes_cos,emissoes_co2,estacao
0,2007-11-01,Canberra,8.0,24.3,0.0,3.4,6.3,NW,30.0,SW,...,4.7,-35.2931,149.1269,25.17,-1.50,la_nina,-5.000000e-01,0.866025,630.7811,primavera
1,2007-11-02,Canberra,14.0,26.9,3.6,4.4,9.7,ENE,39.0,E,...,4.0,-35.2931,149.1269,25.17,-1.50,la_nina,-5.000000e-01,0.866025,630.7811,primavera
2,2007-11-03,Canberra,13.7,23.4,3.6,5.8,3.3,NW,85.0,N,...,2.3,-35.2931,149.1269,25.17,-1.50,la_nina,-5.000000e-01,0.866025,630.7811,primavera
3,2007-11-04,Canberra,13.3,15.5,39.8,7.2,9.1,NW,54.0,WNW,...,-1.5,-35.2931,149.1269,25.17,-1.50,la_nina,-5.000000e-01,0.866025,630.7811,primavera
4,2007-11-05,Canberra,7.6,16.1,2.8,5.6,10.6,SSE,50.0,SSE,...,-0.2,-35.2931,149.1269,25.17,-1.50,la_nina,-5.000000e-01,0.866025,630.7811,primavera
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50154,2017-06-25,Perth,4.9,16.8,0.0,1.6,7.9,E,39.0,E,...,2.5,-31.9559,115.8606,27.97,0.39,neutro,1.224647e-16,-1.000000,536.7209,inverno
50155,2017-06-25,Melbourne,8.6,14.3,0.0,2.8,3.8,NW,35.0,N,...,2.4,-37.8142,144.9631,27.97,0.39,neutro,1.224647e-16,-1.000000,536.7209,inverno
50156,2017-06-25,Melbourne,5.5,13.9,0.0,2.8,3.8,NNW,44.0,N,...,2.3,-37.8142,144.9631,27.97,0.39,neutro,1.224647e-16,-1.000000,536.7209,inverno
50157,2017-06-25,Sydney,6.8,19.5,0.0,3.4,9.4,WSW,46.0,NW,...,3.2,-33.8678,151.2100,27.97,0.39,neutro,1.224647e-16,-1.000000,536.7209,inverno


In [28]:
df_merge_2.isna().sum()

data                       0
localidade                 0
temp_min                   0
temp_max                   0
chuva_mm                   0
evaporacao                 0
horas_sol                  0
vento_rajada_direcao       0
vento_rajada_velocidade    0
vento_direcao_9h           0
vento_direcao_15h          0
vento_veloc_9h             0
vento_veloc_15h            0
umidade_9h                 0
umidade_15h                0
pressao_9h                 0
pressao_15h                0
nuvem_9h                   0
nuvem_15h                  0
temp_9h                    0
temp_15h                   0
chove_amanha_vtr           0
ano                        0
mes                        0
dia                        0
trimestre_quarter          0
horas_sol_isna             0
evaporacao_isna            0
nuvem_9h_isna              0
nuvem_15h_isna             0
pressao_9h_isna            0
pressao_15h_isna           0
choveu_hoje                0
amplitude_termica          0
umidade_media 

In [29]:
mapeamento_geo = {
    'Canberra': [575, 110], 'Hobart': [13, 0], 'Melbourne': [31, 0],
    'Brisbane': [28, 0], 'Darwin': [31, 0], 'Perth': [31, 0],
    'MountGambier': [63, 15], 'AliceSprings': [545, 1100], 'Cairns': [8, 0],
    'Townsville': [10, 0], 'WaggaWagga': [147, 220], 'Sale': [10, 15],
    'CoffsHarbour': [10, 0], 'Cobar': [264, 450], 'Moree': [212, 300],
    'Sydney': [39, 0], 'Mildura': [50, 350], 'Nuriootpa': [274, 60],
    'Portland': [15, 0], 'Woomera': [167, 150]
}

# 1. Criando as colunas de Altitude e Distância da Costa no seu df_merger_2
df_merge_2['altitude'] = df_merge_2['localidade'].map(lambda x: mapeamento_geo[x][0])
df_merge_2['distancia_costa'] = df_merge_2['localidade'].map(lambda x: mapeamento_geo[x][1])

# 2. Calculando o Ponto de Orvalho às 15h (variável preditora fortíssima)
# Fórmula: Temperatura - ((100 - Umidade) / 5)
df_merge_2['ponto_orvalho_15h'] = df_merge_2['temp_15h'] - ((100 - df_merge_2['umidade_15h']) / 5)

# 3. Calculando a 'Depressão do Ponto de Orvalho' (quão perto o ar está de saturar)
df_merge_2['depressao_orvalho_15h'] = df_merge_2['temp_15h'] - df_merge_2['ponto_orvalho_15h']

In [30]:
df_merge_2

Unnamed: 0,data,localidade,temp_min,temp_max,chuva_mm,evaporacao,horas_sol,vento_rajada_direcao,vento_rajada_velocidade,vento_direcao_9h,...,anomalia_enso,evento_enso,mes_sin,mes_cos,emissoes_co2,estacao,altitude,distancia_costa,ponto_orvalho_15h,depressao_orvalho_15h
0,2007-11-01,Canberra,8.0,24.3,0.0,3.4,6.3,NW,30.0,SW,...,-1.50,la_nina,-5.000000e-01,0.866025,630.7811,primavera,575,110,9.4,14.2
1,2007-11-02,Canberra,14.0,26.9,3.6,4.4,9.7,ENE,39.0,E,...,-1.50,la_nina,-5.000000e-01,0.866025,630.7811,primavera,575,110,12.9,12.8
2,2007-11-03,Canberra,13.7,23.4,3.6,5.8,3.3,NW,85.0,N,...,-1.50,la_nina,-5.000000e-01,0.866025,630.7811,primavera,575,110,14.0,6.2
3,2007-11-04,Canberra,13.3,15.5,39.8,7.2,9.1,NW,54.0,WNW,...,-1.50,la_nina,-5.000000e-01,0.866025,630.7811,primavera,575,110,5.3,8.8
4,2007-11-05,Canberra,7.6,16.1,2.8,5.6,10.6,SSE,50.0,SSE,...,-1.50,la_nina,-5.000000e-01,0.866025,630.7811,primavera,575,110,5.2,10.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50154,2017-06-25,Perth,4.9,16.8,0.0,1.6,7.9,E,39.0,E,...,0.39,neutro,1.224647e-16,-1.000000,536.7209,inverno,31,0,5.0,11.2
50155,2017-06-25,Melbourne,8.6,14.3,0.0,2.8,3.8,NW,35.0,N,...,0.39,neutro,1.224647e-16,-1.000000,536.7209,inverno,31,0,4.6,8.8
50156,2017-06-25,Melbourne,5.5,13.9,0.0,2.8,3.8,NNW,44.0,N,...,0.39,neutro,1.224647e-16,-1.000000,536.7209,inverno,31,0,3.8,8.4
50157,2017-06-25,Sydney,6.8,19.5,0.0,3.4,9.4,WSW,46.0,NW,...,0.39,neutro,1.224647e-16,-1.000000,536.7209,inverno,39,0,6.3,11.6


In [31]:
df_merge_2.to_csv(r'C:\Users\JacyzinGuilherme(Bip\mentoria-bip\dados_editados\australia_clima_v9.csv', sep=',', index=False)
df_merge_2.to_excel(r'C:\Users\JacyzinGuilherme(Bip\mentoria-bip\dados_editados\australia_clima_v9.xlsx', index=False)