# Brazilian Hydrometric Data

The goal of this notebook is to collect Brazilian Hydrometric Data from different stations. After this, we want to convert the information in CSV format to perform data analysis.

## 1. Import the necessary libraries

In [1]:
import requests
import time
import csv
import pandas as pd
import json

## 2. Search for all stations for a specific river

In [2]:
json_info = []
for page in range(4):    
    print(f'Loading page {page}')
    while(True):
        all_stations = requests.get(f'https://www.snirh.gov.br/hidroweb/rest/api/estacaotelemetrica?tipoEstacao=F&size=5000&page={page}')
        if(all_stations.status_code != 200):
            print(f'FAILED! error: {all_stations.status_code}')
        else:
            print('Successful')
            break

    byte_string = all_stations.content 
    json_data = json.loads(byte_string.decode('utf-8'))
    if(len(json_data['content']) == 0):
        break
    json_info.append(json_data)


Loading page 0
Successful
Loading page 1
Successful
Loading page 2
Successful
Loading page 3
Successful


In [12]:
# Obtain all the stations
stations_records = []
for js in json_info:
    for stat in js['content']:
        stations_records.append({**stat})

# Convert to a pandas Dataframe
df_stations = pd.DataFrame(stations_records).drop_duplicates()

## 3. Collect data from the stations and convert to CSV format

In [40]:
df_stations = pd.read_csv('stations.csv', sep=',')
df_stations['ultimaAtualizacao'] = pd.to_datetime(df_stations['ultimaAtualizacao'], format='%Y-%m-%dT%H:%M:%S.%f%z')

Collect rivers with the most recent stations updates

In [59]:
query_date = pd.to_datetime('2024-01-01T00:00:00.000+0000', format='%Y-%m-%dT%H:%M:%S.%f%z')
df_stations[df_stations.ultimaAtualizacao > query_date].groupby('nomeRio').count().sort_values(by='id', ascending=False).head(5)

Unnamed: 0_level_0,id,codigoAdicional,aneelPlu,aneelFlu,nome,latitude,longitude,altitude,ultimaAtualizacao,baciaCodigo,...,rioCodigo,estadoCodigo,municipioCodigo,responsavelCodigo,operadoraCodigo,menorDataPeriodo,maiorDataPeriodo,responsavelSigla,operadoraSigla,tipoEstacao
nomeRio,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
RIO PARAÍBA DO SUL,5,5,4,3,5,5,5,5,5,5,...,5,5,5,5,5,3,3,5,5,5
RIO PARAMIRIM,5,5,5,0,5,5,5,0,5,5,...,5,5,5,5,5,5,5,5,5,5
RIO CHAPECOZINHO,4,4,0,4,4,4,4,4,4,4,...,4,4,4,4,4,4,4,4,4,4
RIO PAJEÚ,4,4,3,1,4,4,4,3,4,4,...,4,4,4,4,4,3,3,4,4,4
RIACHO QUIXERE,4,4,3,1,4,4,4,4,4,4,...,4,4,4,4,4,3,3,4,4,4


In [65]:
arr_stations = df_stations[(df_stations.ultimaAtualizacao > query_date) & (df_stations.nomeRio == 'RIO PARAMIRIM')].id.values

In [80]:
periodoInicial = '2023-01-01T04:00:00.000Z'
periodoFinal = '2024-01-01T04:00:00.000Z'
count_try = 15

for stat in arr_stations:
    print(f'STATION: {stat}')
    success = False
    for count in range(1, count_try):
        x = requests.get(f'https://www.snirh.gov.br/hidroweb/rest/api/documento/gerarTelemetricas?codigosEstacoes={stat}&tipoArquivo=3&periodoInicial={periodoInicial}&periodoFinal={periodoFinal}')
        if(x.status_code == 200):
            success = True
            break
        print(f'Try: {count}   Response: {x.status_code}')
    if(success):
        saveMetrisRecord(x.content, stat)
        print('Successful\n')
    else:
        print('Failed\n')

STATION: 132642130
Successful

STATION: 132642120
Successful

STATION: 130242350
Successful

STATION: 132542090
Successful

STATION: 130042330
Try: 1   Response: 504
Successful



In [79]:
def saveMetrisRecord(byte_string, station_number):
    json_data = json.loads(byte_string.decode('utf-8'))
    medicoes_records = []
    for record in json_data:
        for medicoes in record.pop('medicoes', []):
            medicoes_records.append({**record, **medicoes})

    # Create a DataFrame from the expanded records
    medicoes_df = pd.DataFrame(medicoes_records)
    medicoes_df.to_csv(f'{station_number}.csv', sep=',', index=False)


In [None]:
display(medicoes_df.iloc[:,36:].head())
len(medicoes_df)

Unnamed: 0,horQChuva,horChuva,horQNivelAdotado,horNivelAdotado,horQVazao,horVazao
0,0.0,,0.0,25709.0,,
1,0.0,,0.0,25709.0,,
2,0.0,,0.0,25709.0,,
3,0.0,,0.0,25709.0,,
4,0.0,,0.0,25710.0,,


8747

In [85]:
test = pd.read_csv('132642120.csv', sep=',')
test.iloc[:,36:]

Unnamed: 0,horQChuva,horChuva,horQNivelAdotado,horNivelAdotado,horQVazao,horVazao
0,,,0.0,67010.0,,
1,,,0.0,67009.0,,
2,,,0.0,67007.0,,
3,,,0.0,67006.0,,
4,,,0.0,67011.0,,
...,...,...,...,...,...,...
626,,,0.0,66428.0,,
627,,,0.0,66427.0,,
628,,,0.0,66425.0,,
629,,,0.0,66423.0,,
