# Algoritmo de classificação de possibilidade de chuva em Brasília

### Instalando e importando as bibliotecas necessárias

In [63]:
%pip install -r requirements.txt

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.2.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [64]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px 
from ydata_profiling import ProfileReport

### Carregando os dados

In [65]:
path = r'C:\Users\ModalGR\Documents\Projetos\GitHub\classificacao-brasilia\dl\2nd\dados_A001_D_2001-01-01_2019-12-31.csv'

In [66]:
df = pd.read_csv(path, sep=";", encoding="latin1", skiprows=9, decimal=",")

### Analisando os dados

In [67]:
df

Unnamed: 0,Data Medicao,"PRECIPITACAO TOTAL, DIARIO (AUT)(mm)",PRESSAO ATMOSFERICA MEDIA DIARIA (AUT)(mB),TEMPERATURA DO PONTO DE ORVALHO MEDIA DIARIA (AUT)(Â°C),"TEMPERATURA MAXIMA, DIARIA (AUT)(Â°C)","TEMPERATURA MEDIA, DIARIA (AUT)(Â°C)","TEMPERATURA MINIMA, DIARIA (AUT)(Â°C)","UMIDADE RELATIVA DO AR, MEDIA DIARIA (AUT)(%)","UMIDADE RELATIVA DO AR, MINIMA DIARIA (AUT)(%)","VENTO, RAJADA MAXIMA DIARIA (AUT)(m/s)","VENTO, VELOCIDADE MEDIA DIARIA (AUT)(m/s)",Unnamed: 11
0,2001-01-01,22.4,885.4,18.8,24.6,20.7,18.7,89.3,69.0,7.4,1.7,
1,2001-01-02,37.2,885.9,18.3,24.7,20.7,18.5,87,67.0,9,2.4,
2,2001-01-03,,886.8,17.8,26.5,,18.7,77.2,55.0,9,2.8,
3,2001-01-04,,888.5,17.5,26.3,,17.4,80.2,59.0,10.1,3,
4,2001-01-05,,887.9,17,27.2,,18.5,72.1,43.0,9.6,3.4,
...,...,...,...,...,...,...,...,...,...,...,...,...
6934,2019-12-27,0,887,17,28.5,23.8,19.6,67.7,43.0,9.7,2.9,
6935,2019-12-28,.2,886.4,15.4,29,23.9,19.3,60.5,32.0,8.9,3,
6936,2019-12-29,0,885.4,15.2,29.8,24.2,18.4,59.5,34.0,7.8,2.5,
6937,2019-12-30,0,884,15.1,29.1,24,18.4,58.7,38.0,8.1,2.2,


In [68]:
def dfInfo(dataframe):
    print("Informações sobre o dataset:")
    print(dataframe.info())
    print("\n")
    print("Formato do dataset:")
    print(dataframe.shape)
    print("\n")
    print("Cabeçalho do dataset:")
    print(dataframe.head(3))

In [69]:
#ProfileReport(df, title="Profiling Report").to_file("report.html")

In [70]:
dfInfo(df)

Informações sobre o dataset:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6939 entries, 0 to 6938
Data columns (total 12 columns):
 #   Column                                                   Non-Null Count  Dtype  
---  ------                                                   --------------  -----  
 0   Data Medicao                                             6939 non-null   object 
 1   PRECIPITACAO TOTAL, DIARIO (AUT)(mm)                     6611 non-null   object 
 2   PRESSAO ATMOSFERICA MEDIA DIARIA (AUT)(mB)               6712 non-null   object 
 3   TEMPERATURA DO PONTO DE ORVALHO MEDIA DIARIA (AUT)(Â°C)  6713 non-null   object 
 4   TEMPERATURA MAXIMA, DIARIA (AUT)(Â°C)                    6757 non-null   object 
 5   TEMPERATURA MEDIA, DIARIA (AUT)(Â°C)                     6632 non-null   object 
 6   TEMPERATURA MINIMA, DIARIA (AUT)(Â°C)                    6764 non-null   object 
 7   UMIDADE RELATIVA DO AR, MEDIA DIARIA (AUT)(%)            6797 non-null   object 
 8  

In [71]:
df.isna().sum()

Data Medicao                                                  0
PRECIPITACAO TOTAL, DIARIO (AUT)(mm)                        328
PRESSAO ATMOSFERICA MEDIA DIARIA (AUT)(mB)                  227
TEMPERATURA DO PONTO DE ORVALHO MEDIA DIARIA (AUT)(Â°C)     226
TEMPERATURA MAXIMA, DIARIA (AUT)(Â°C)                       182
TEMPERATURA MEDIA, DIARIA (AUT)(Â°C)                        307
TEMPERATURA MINIMA, DIARIA (AUT)(Â°C)                       175
UMIDADE RELATIVA DO AR, MEDIA DIARIA (AUT)(%)               142
UMIDADE RELATIVA DO AR, MINIMA DIARIA (AUT)(%)               62
VENTO, RAJADA MAXIMA DIARIA (AUT)(m/s)                       84
VENTO, VELOCIDADE MEDIA DIARIA (AUT)(m/s)                   251
Unnamed: 11                                                6939
dtype: int64

### Limpando os dados

In [72]:
df.drop(['Unnamed: 11', 'TEMPERATURA MAXIMA, DIARIA (AUT)(Â°C)', 'TEMPERATURA MINIMA, DIARIA (AUT)(Â°C)', 'UMIDADE RELATIVA DO AR, MINIMA DIARIA (AUT)(%)', 'VENTO, RAJADA MAXIMA DIARIA (AUT)(m/s)'], axis=1, inplace=True)

In [73]:
df

Unnamed: 0,Data Medicao,"PRECIPITACAO TOTAL, DIARIO (AUT)(mm)",PRESSAO ATMOSFERICA MEDIA DIARIA (AUT)(mB),TEMPERATURA DO PONTO DE ORVALHO MEDIA DIARIA (AUT)(Â°C),"TEMPERATURA MEDIA, DIARIA (AUT)(Â°C)","UMIDADE RELATIVA DO AR, MEDIA DIARIA (AUT)(%)","VENTO, VELOCIDADE MEDIA DIARIA (AUT)(m/s)"
0,2001-01-01,22.4,885.4,18.8,20.7,89.3,1.7
1,2001-01-02,37.2,885.9,18.3,20.7,87,2.4
2,2001-01-03,,886.8,17.8,,77.2,2.8
3,2001-01-04,,888.5,17.5,,80.2,3
4,2001-01-05,,887.9,17,,72.1,3.4
...,...,...,...,...,...,...,...
6934,2019-12-27,0,887,17,23.8,67.7,2.9
6935,2019-12-28,.2,886.4,15.4,23.9,60.5,3
6936,2019-12-29,0,885.4,15.2,24.2,59.5,2.5
6937,2019-12-30,0,884,15.1,24,58.7,2.2


In [74]:
df = df.apply(pd.to_numeric, errors='coerce')

In [75]:
df

Unnamed: 0,Data Medicao,"PRECIPITACAO TOTAL, DIARIO (AUT)(mm)",PRESSAO ATMOSFERICA MEDIA DIARIA (AUT)(mB),TEMPERATURA DO PONTO DE ORVALHO MEDIA DIARIA (AUT)(Â°C),"TEMPERATURA MEDIA, DIARIA (AUT)(Â°C)","UMIDADE RELATIVA DO AR, MEDIA DIARIA (AUT)(%)","VENTO, VELOCIDADE MEDIA DIARIA (AUT)(m/s)"
0,,22.4,885.4,18.8,20.7,89.3,1.7
1,,37.2,885.9,18.3,20.7,87.0,2.4
2,,,886.8,17.8,,77.2,2.8
3,,,888.5,17.5,,80.2,3.0
4,,,887.9,17.0,,72.1,3.4
...,...,...,...,...,...,...,...
6934,,0.0,887.0,17.0,23.8,67.7,2.9
6935,,0.2,886.4,15.4,23.9,60.5,3.0
6936,,0.0,885.4,15.2,24.2,59.5,2.5
6937,,0.0,884.0,15.1,24.0,58.7,2.2


In [76]:
df = df[df >= 0].dropna()

In [77]:
df

Unnamed: 0,Data Medicao,"PRECIPITACAO TOTAL, DIARIO (AUT)(mm)",PRESSAO ATMOSFERICA MEDIA DIARIA (AUT)(mB),TEMPERATURA DO PONTO DE ORVALHO MEDIA DIARIA (AUT)(Â°C),"TEMPERATURA MEDIA, DIARIA (AUT)(Â°C)","UMIDADE RELATIVA DO AR, MEDIA DIARIA (AUT)(%)","VENTO, VELOCIDADE MEDIA DIARIA (AUT)(m/s)"
