# Brazilian Hydrometric Data

The goal of this notebook is to collect Brazilian Hydrometric Data from different stations. After this, we want to convert the information in CSV format to perform data analysis.

## 1. Import the necessary libraries

In [2]:
import requests
import time
import csv
import pandas as pd
import json

## 2. Search for all stations for a specific river

In [26]:
all_stations = requests.get('https://www.snirh.gov.br/hidroweb/rest/api/estacaotelemetrica?tipoEstacao=F&size=5000&page=1')
if(all_stations.status_code != 200):
    print(f'FAILED! error: {all_stations.status_code}')
print('Successful')
all_stations.content

Successful


b'{"content":[{"id":74137050,"codigoAdicional":"38851400","aneelPlu":"38851400","aneelFlu":null,"nome":"A\xc3\x87UDE PRATA II","latitude":-7.6936,"longitude":-37.0881,"altitude":null,"ultimaAtualizacao":"2024-02-09T15:22:51.107+0000","baciaCodigo":3,"codigoNomeBacia":"3 - Atl\xc3\xa2ntico, Trecho Norte/Nordeste","codigoNomeSubBacia":"38 - RIOS PARAIBA,POTENJI E OUTROS","nomeRio":"RIO DA PRATA","nomeEstado":"PARA\xc3\x8dBA","nomeMunicipio":"PRATA","subBaciaCodigo":38,"rioCodigo":38231000,"estadoCodigo":11,"municipioCodigo":11122000,"responsavelCodigo":1,"operadoraCodigo":1,"menorDataPeriodo":"2018-02-15T10:00:00.000+0000","maiorDataPeriodo":"2024-05-29T10:00:00.000+0000","responsavelSigla":"ANA","operadoraSigla":"AESA-PB","tipoEstacao":"Fluviom\xc3\xa9trica"},{"id":64835340,"codigoAdicional":"38630000","aneelPlu":"38630000","aneelFlu":null,"nome":"A\xc3\x87UDE PIRPIRITUBA","latitude":-6.8111,"longitude":-35.5703,"altitude":null,"ultimaAtualizacao":"2024-02-09T15:22:49.237+0000","baciaCo

In [30]:
# Convert to JSON format
byte_string = all_stations.content 
json_data = json.loads(byte_string.decode('utf-8'))
json_data['content'][0]

# Obtain all the stations
stations_records = []
for stat in json_data['content']:
    stations_records.append({**stat})

# Convert to a pandas Dataframe
df_stations = pd.DataFrame(stations_records)

Unnamed: 0,id,codigoAdicional,aneelPlu,aneelFlu,nome,latitude,longitude,altitude,ultimaAtualizacao,baciaCodigo,...,rioCodigo,estadoCodigo,municipioCodigo,responsavelCodigo,operadoraCodigo,menorDataPeriodo,maiorDataPeriodo,responsavelSigla,operadoraSigla,tipoEstacao
300,230949220,64219080,,64219080,UHE PIRAJU BARRAMENTO,-23.1544,-49.38,,2014-11-19T03:00:00.000+0000,6,...,64100000.0,21,21389000,137,137,1970-01-01T00:00:00.000+0000,2024-05-28T13:00:00.000+0000,CBA,CBA,Fluviométrica
301,231149230,64221000,,64221000,UHE PARANAPANEMA BARRAMENTO,-23.1869,-49.3842,505.0,2016-07-06T03:00:00.000+0000,6,...,64100000.0,21,21389000,1056,1056,2015-03-01T01:00:00.000+0000,2024-05-28T13:00:00.000+0000,ENEL GREEN,ENEL GREEN,Fluviométrica
305,230449500,64278080,,64278080,UHE OURINHOS BARRAMENTO,-23.0678,-49.8375,400.0,2014-08-05T03:00:00.000+0000,6,...,64100000.0,21,21350000,137,137,1970-01-01T00:00:00.000+0000,2024-05-28T13:00:00.000+0000,CBA,CBA,Fluviométrica


In [31]:
arr_stations = df_stations[df_stations['nomeRio'] == 'RIO PARANAPANEMA']['id'].values

## 3. Collect data from the stations and convert to CSV format

In [34]:
periodoInicial = '2023-05-04T04:00:00.000Z'
periodoFinal = '2024-05-05T04:00:00.000Z'
count_try = 15

for stat in arr_stations:
    print(f'STATION: {stat}')
    success = False
    for count in range(1, count_try):
        x = requests.get(f'https://www.snirh.gov.br/hidroweb/rest/api/documento/gerarTelemetricas?codigosEstacoes={stat}&tipoArquivo=3&periodoInicial={periodoInicial}&periodoFinal={periodoFinal}')
        if(x.status_code == 200):
            success = True
            break
        elif(x.status_code == 500):
            print(f'error 500 FAILED')
            break
        print(f'Try: {count}   Response: {x.status_code}')
    if(success):
        print('Successful\n')
        break
    else:
        print('Failed\n')

STATION: 230949220


KeyboardInterrupt: 

In [32]:
byte_string = x.content  # truncated for brevity
json_data = json.loads(byte_string.decode('utf-8'))

medicoes_records = []
for record in json_data:
    for medicoes in record.pop('medicoes', []):
        medicoes_records.append({**record, **medicoes})

# Create a DataFrame from the expanded records
medicoes_df = pd.DataFrame(medicoes_records)

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [None]:
display(medicoes_df.iloc[:,36:].head())
len(medicoes_df)

Unnamed: 0,horQChuva,horChuva,horQNivelAdotado,horNivelAdotado,horQVazao,horVazao
0,0.0,,0.0,25709.0,,
1,0.0,,0.0,25709.0,,
2,0.0,,0.0,25709.0,,
3,0.0,,0.0,25709.0,,
4,0.0,,0.0,25710.0,,


8747