In [1]:
import glob, os
import pandas as pd
import zipfile, requests
from io import BytesIO

In [2]:
# Dados Históricos para o ano de 2022
def download_serieHistorica_inmet(ano:int, chunk_size=1024):
    url = f'https://portal.inmet.gov.br/uploads/dadoshistoricos/{ano}.zip'
    extension_file = os.path.splitext(url)[1]
    save_path = './dados/inmet/'
    filename = url.split('/')[-1]
    if os.path.isfile(os.path.join(save_path,str(ano)+extension_file)):
        print("Arquivo solicitado já existe e não será sobrescrevido.")
    else:
        print("Arquivo não exite.")
        print("Iniciando o download...")
        r = requests.get(url, stream=True)
        z = zipfile.ZipFile(BytesIO(r.content))
        print("Downloading completed.")
        print("Iniciando a extração dos arquivos CSV...")
        z.extractall(save_path)
        print("Extração Finalizada.")

In [4]:
pi_2022 = download_serieHistorica_inmet(2022)

Arquivo solicitado já existe e não será sobrescrevido.


In [5]:
# INMET_NE_PI_A308_PARNAIBA_01-01-2022_A_31-12-2022.CSV
path_dados = ".\\dados\\inmet"
files = glob.glob(os.path.join(path_dados, "INMET_NE_PI_*.CSV"))
print(f"Contém {len(files)} arquivos")
files

Contém 21 arquivos


['.\\dados\\inmet\\INMET_NE_PI_A308_PARNAIBA_01-01-2022_A_31-12-2022.CSV',
 '.\\dados\\inmet\\INMET_NE_PI_A311_FLORIANO_01-01-2022_A_31-12-2022.CSV',
 '.\\dados\\inmet\\INMET_NE_PI_A312_TERESINA_01-01-2022_A_31-12-2022.CSV',
 '.\\dados\\inmet\\INMET_NE_PI_A326_BOM JESUS DO PIAUI_01-01-2022_A_31-12-2022.CSV',
 '.\\dados\\inmet\\INMET_NE_PI_A330_PAULISTANA_01-01-2022_A_31-12-2022.CSV',
 '.\\dados\\inmet\\INMET_NE_PI_A331_SAO JOAO DO PIAUI_01-01-2022_A_31-12-2022.CSV',
 '.\\dados\\inmet\\INMET_NE_PI_A335_PIRIPIRI_01-01-2022_A_31-12-2022.CSV',
 '.\\dados\\inmet\\INMET_NE_PI_A336_ALVORADA DO GURGUEIA_01-01-2022_A_31-12-2022.CSV',
 '.\\dados\\inmet\\INMET_NE_PI_A337_CARACOL_01-01-2022_A_31-12-2022.CSV',
 '.\\dados\\inmet\\INMET_NE_PI_A343_PICOS_01-01-2022_A_31-12-2022.CSV',
 '.\\dados\\inmet\\INMET_NE_PI_A345_SAO RAIMUNDO NONATO_01-01-2022_A_31-12-2022.CSV',
 '.\\dados\\inmet\\INMET_NE_PI_A346_URUCUI_01-01-2022_A_31-12-2022.CSV',
 '.\\dados\\inmet\\INMET_NE_PI_A354_OEIRAS_01-01-2022_A_31-12-

In [20]:
pd.read_csv(files[0], sep=';', skiprows=8, decimal=',', encoding='ANSI', parse_dates=True).dtypes

Data                                                      object
Hora UTC                                                  object
PRECIPITAÇÃO TOTAL, HORÁRIO (mm)                         float64
PRESSAO ATMOSFERICA AO NIVEL DA ESTACAO, HORARIA (mB)    float64
PRESSÃO ATMOSFERICA MAX.NA HORA ANT. (AUT) (mB)          float64
PRESSÃO ATMOSFERICA MIN. NA HORA ANT. (AUT) (mB)         float64
RADIACAO GLOBAL (Kj/m²)                                  float64
TEMPERATURA DO AR - BULBO SECO, HORARIA (°C)             float64
TEMPERATURA DO PONTO DE ORVALHO (°C)                     float64
TEMPERATURA MÁXIMA NA HORA ANT. (AUT) (°C)               float64
TEMPERATURA MÍNIMA NA HORA ANT. (AUT) (°C)               float64
TEMPERATURA ORVALHO MAX. NA HORA ANT. (AUT) (°C)         float64
TEMPERATURA ORVALHO MIN. NA HORA ANT. (AUT) (°C)         float64
UMIDADE REL. MAX. NA HORA ANT. (AUT) (%)                 float64
UMIDADE REL. MIN. NA HORA ANT. (AUT) (%)                 float64
UMIDADE RELATIVA DO AR, H

In [7]:
new_df = pd.DataFrame()
for file in files:
    # Criando um dataframe a partir da 9 linha
    df_file = pd.read_csv(file, sep=';', skiprows=8, decimal=',', encoding='ANSI', infer_datetime_format=True)
    cols4rename1 = {
        'Data':'data',
        'Hora UTC':'hora_utc',
        'PRECIPITAÇÃO TOTAL, HORÁRIO (mm)':'precip_tt_mm'}
    df_file = df_file.drop(columns=df_file.columns[3:]).rename(columns=cols4rename1)
    
    # Abrindo o CSV e extraindo as 8 primerias linhas
    with open(file, 'r') as f:
        lines = [next(f).strip() for _ in range(8)]
    dados_lines_1_8 = {}
    for linha in lines:
        chave, valor = linha.split(';')
        dados_lines_1_8[chave] = [valor]
        
    # Criando um dataframe das 8primeiras linhas
    df_1_8 = pd.DataFrame(dados_lines_1_8).drop(columns=['DATA DE FUNDACAO:'])
    cols4rename2 = {
        'REGIAO:':'regiao',
        'UF:':'uf',
        'ESTACAO:':'estacao',
        'CODIGO (WMO):':'codEst',
        'LATITUDE:':'latitude',
        'LONGITUDE:':'longitude',
        'ALTITUDE:':'altitude'}
    dados = pd.concat([df_1_8] * len(df_file), ignore_index=True).ffill().fillna(df_file).rename(columns=cols4rename2)
    
    if new_df.empty:
        new_df = pd.concat([dados, df_file], axis=1)
    else:
        new_df = pd.concat([new_df, pd.concat([dados, df_file], axis=1)])

In [8]:
new_df.shape

(183960, 10)

In [9]:
new_df.head()

Unnamed: 0,regiao,uf,estacao,codEst,latitude,longitude,altitude,data,hora_utc,precip_tt_mm
0,NE,PI,PARNAIBA,A308,-308666666,-4178305554,5211,2022/01/01,0000 UTC,
1,NE,PI,PARNAIBA,A308,-308666666,-4178305554,5211,2022/01/01,0100 UTC,0.0
2,NE,PI,PARNAIBA,A308,-308666666,-4178305554,5211,2022/01/01,0200 UTC,
3,NE,PI,PARNAIBA,A308,-308666666,-4178305554,5211,2022/01/01,0300 UTC,0.0
4,NE,PI,PARNAIBA,A308,-308666666,-4178305554,5211,2022/01/01,0400 UTC,0.0


In [15]:
new_df.dtypes

regiao           object
uf               object
estacao          object
codEst           object
latitude         object
longitude        object
altitude         object
data             object
hora_utc         object
precip_tt_mm    float64
dtype: object

In [84]:
new_df.insert(loc=8, column='mes', value=new_df['data'].dt.month)

AttributeError: Can only use .dt accessor with datetimelike values

In [81]:
new_df.loc[new_df.estacao == 'PARNAIBA']

Unnamed: 0,regiao,uf,estacao,codEst,latitude,longitude,altitude,data,hora_utc,precip_tt_mm
0,NE,PI,PARNAIBA,A308,-308666666,-4178305554,5211,2022/01/01,0000 UTC,
1,NE,PI,PARNAIBA,A308,-308666666,-4178305554,5211,2022/01/01,0100 UTC,0.0
2,NE,PI,PARNAIBA,A308,-308666666,-4178305554,5211,2022/01/01,0200 UTC,
3,NE,PI,PARNAIBA,A308,-308666666,-4178305554,5211,2022/01/01,0300 UTC,0.0
4,NE,PI,PARNAIBA,A308,-308666666,-4178305554,5211,2022/01/01,0400 UTC,0.0
...,...,...,...,...,...,...,...,...,...,...
8755,NE,PI,PARNAIBA,A308,-308666666,-4178305554,5211,2022/12/31,1900 UTC,0.0
8756,NE,PI,PARNAIBA,A308,-308666666,-4178305554,5211,2022/12/31,2000 UTC,0.0
8757,NE,PI,PARNAIBA,A308,-308666666,-4178305554,5211,2022/12/31,2100 UTC,0.0
8758,NE,PI,PARNAIBA,A308,-308666666,-4178305554,5211,2022/12/31,2200 UTC,0.0
