# Brazilian Hydrometric Data

The goal of this notebook is to collect Brazilian Hydrometric Data from different stations. After this, we want to convert the information in CSV format to perform data analysis.

## 1. Import the necessary libraries

In [160]:
import requests
import time
import csv
import pandas as pd
import json

## 2. Search for all stations for a specific river

In [148]:
json_info = []
for page in range(20):    
    print(f'Loading page {page}')
    while(True):
        all_stations = requests.get(f'https://www.snirh.gov.br/hidroweb/rest/api/dadosHistoricos?tipoEstacao=F&size=5000&page={page}')
        if(all_stations.status_code != 200):
            print(f'FAILED! error: {all_stations.status_code}')
        else:
            print('Successful')
            break

    byte_string = all_stations.content 
    json_data = json.loads(byte_string.decode('utf-8'))
    if(len(json_data['content']) == 0):
        break
    json_info.append(json_data)


Loading page 0
Successful
Loading page 1
Successful
Loading page 2
Successful
Loading page 3
Successful
Loading page 4
Successful
Loading page 5
Successful
Loading page 6
Successful
Loading page 7
Successful
Loading page 8
Successful
Loading page 9
Successful
Loading page 10
Successful
Loading page 11
FAILED! error: 504
Successful
Loading page 12
Successful
Loading page 13
FAILED! error: 504
FAILED! error: 504
Successful
Loading page 14
Successful
Loading page 15
Successful
Loading page 16
Successful
Loading page 17
Successful


In [149]:
# Obtain all the stations
stations_records = []
for js in json_info:
    for stat in js['content']:
        stations_records.append({**stat})

# Convert to a pandas Dataframe
df_stations = pd.DataFrame(stations_records).drop_duplicates()

In [201]:
df_stations = pd.read_csv('dados_convencionais.csv', sep=',')
df_stations[df_stations.codigoNomeSubBacia == '86 - RIO TAQUARI'][['nome','latitude','longitude']].to_csv('taquari.csv', sep=',', index=False)

## 3. Collect data from the stations and convert to CSV format

In [210]:
with open('../teste2.txt', 'r') as txt:
    string = txt.read()

arr = []
for i in string.split(' - '):
    if(i.isnumeric() and len(i) > 3):
        arr.append(i)

for i in arr:
    if i in df_stations.id.values:
        print(i)

In [154]:
df_stations[['nome', 'latitude', 'longitude']].to_csv('stations_located.csv', sep=',', index=False)

In [214]:
df_stations[df_stations.nome == 'PCH MORRO GRANDE BARRAMENTO']

Unnamed: 0,id,nome,tipoEstacao,codigoAdicional,codigoNomeBacia,codigoNomeSubBacia,nomeRio,nomeEstado,nomeMunicipio,responsavelSigla,operadoraSigla,areaDrenagem,latitude,longitude,altitude
15882,86125100,PCH MORRO GRANDE BARRAMENTO,Fluviométrica,,"8 - ATLÂNTICO, TRECHO SUDESTE",86 - RIO TAQUARI,RIO ITUIM,RIO GRANDE DO SUL,MUITOS CAPÕES,MORRO GRANDE,MORRO GRANDE,640.0,-28.5589,-51.3111,631.0


In [122]:
df_stations[df_stations.nomeRio == 'RIO PARANÁ'].sort_values(by='ultimaAtualizacao')

Unnamed: 0,id,codigoAdicional,aneelPlu,aneelFlu,nome,latitude,longitude,altitude,ultimaAtualizacao,baciaCodigo,...,rioCodigo,estadoCodigo,municipioCodigo,responsavelCodigo,operadoraCodigo,menorDataPeriodo,maiorDataPeriodo,responsavelSigla,operadoraSigla,tipoEstacao
905,252454356,64902900,,64902900.0,JUSANTE ENSECADEIRA (LP-RM),-25.4158,-54.595,148.0,2011-06-02 03:00:00+00:00,6,...,60001000.0,22,22082000,90,90,,,ITAIPU,ITAIPU,Fluviométrica
902,252454354,64902750,,64902750.0,MONTANTE ENSECADEIRA (CD-RM),-25.4122,-54.5883,122.0,2011-06-02 03:00:00+00:00,6,...,60001000.0,22,22082000,90,90,,,ITAIPU,ITAIPU,Fluviométrica
900,252454352,64902650,,64902650.0,PARAMENTO DE JUSANTE (U13-U14),-25.4083,-54.5878,120.0,2014-09-12 03:00:00+00:00,6,...,60001000.0,22,22082000,90,90,,,ITAIPU,ITAIPU,Fluviométrica
17,222851580,63995080,,63995080.0,UHE PORTO PRIMAVERA BARRAMENTO,-22.4817,-52.9564,280.0,2015-03-11 03:00:00+00:00,6,...,60001000.0,21,21443800,6,6,2008-10-21T00:00:00.000+0000,2024-05-28T12:00:00.000+0000,CESP,CESP,Fluviométrica
896,250454240,64896200,,64896200.0,SOL DE MAIO,-25.07917,-54.4,,2018-12-11 19:16:41.033000+00:00,6,...,60001000.0,22,22160500,90,90,,,ITAIPU,ITAIPU,Fluviométrica
904,252554350,64902850,,64902850.0,JUSANTE ENSECADEIRA (CD-RM),-25.4169,-54.59,148.0,2018-12-11 19:39:59.610000+00:00,6,...,60001000.0,22,22082000,90,90,,,ITAIPU,ITAIPU,Fluviométrica
903,252454355,64902800,,64902800.0,MONTANTE ENSECADEIRA (LP-RM),-25.4114,-54.5931,148.0,2018-12-11 19:40:37.080000+00:00,6,...,60001000.0,22,22082000,90,90,,,ITAIPU,ITAIPU,Fluviométrica
901,252454353,64902700,,64902700.0,PARAMENTO DE JUSANTE (U3-U4TE),-25.4078,-54.5917,120.0,2018-12-11 19:41:05.550000+00:00,6,...,60001000.0,22,22082000,90,90,,,ITAIPU,ITAIPU,Fluviométrica
899,252454351,64902600,,64902600.0,PARAMENTO DE JUSANTE (U16-U17),-25.4086,-54.5861,120.0,2018-12-11 19:42:04.543000+00:00,6,...,60001000.0,22,22082000,90,90,,,ITAIPU,ITAIPU,Fluviométrica
897,252454360,64902000,,64902000.0,UHE ITAIPU PEDRO ORTELLADO,-25.4131,-54.6153,225.0,2018-12-18 18:17:26.153000+00:00,6,...,60001000.0,63,63000000,90,90,2015-11-01T03:00:00.000+0000,2020-07-21T18:00:00.000+0000,ITAIPU,ITAIPU,Fluviométrica


In [40]:
df_stations = pd.read_csv('stations.csv', sep=',')
df_stations['ultimaAtualizacao'] = pd.to_datetime(df_stations['ultimaAtualizacao'], format='%Y-%m-%dT%H:%M:%S.%f%z')

Collect rivers with the most recent stations updates

In [103]:
rivers = df_stations[df_stations.ultimaAtualizacao > query_date].groupby('nomeRio').count().sort_values(by='id', ascending=False).head(15).index

In [120]:
for year in range(2010, 2024):
    query_date = pd.to_datetime(f'{year+1}-01-01T00:00:00.000+0000', format='%Y-%m-%dT%H:%M:%S.%f%z')
    periodoInicial = f'{year}-01-01T04:00:00.000Z'
    periodoFinal = f'{year+1}-01-01T04:00:00.000Z'
    count_try = 10
    rivers = ['RIO PARANÁ', 'RIO PARANAPANEMA', 'RIO PARAMIRIM', 'RIO JOANES', 'RIO CHAPECOZINHO']
    for river in rivers:
        arr_stations = df_stations[df_stations.nomeRio == river].id.values
        for stat in arr_stations:
            print(f'STATION: {stat}')
            success = False
            for count in range(1, count_try):
                x = requests.get(f'https://www.snirh.gov.br/hidroweb/rest/api/documento/gerarTelemetricas?codigosEstacoes={stat}&tipoArquivo=3&periodoInicial={periodoInicial}&periodoFinal={periodoFinal}')
                if(x.status_code == 200):
                    success = True
                    break
                elif(x.status_code == 500):
                    print('Date is probably out of range')
                    break
                print(f'Try: {count}   Response: {x.status_code}')
            if(success):
                filename = f'{river}-{stat}-{year}'
                saveMetrisRecord(x.content, stat, filename)
                print('Successful\n')
            else:
                print('Failed\n')

STATION: 222851580
Try: 1   Response: 504
Try: 2   Response: 504
Try: 3   Response: 504
Try: 4   Response: 504
Try: 5   Response: 504
Try: 6   Response: 504
Try: 7   Response: 504
Try: 8   Response: 504
Try: 9   Response: 504
Failed

STATION: 202351221
Try: 1   Response: 504
Try: 2   Response: 504
Try: 3   Response: 504
Try: 4   Response: 504
Try: 5   Response: 503
Try: 6   Response: 503
Try: 7   Response: 503
Try: 8   Response: 503
Try: 9   Response: 503
Failed

STATION: 224353101
Try: 1   Response: 503
Try: 2   Response: 503
Try: 3   Response: 503
Try: 4   Response: 503
Try: 5   Response: 503
Try: 6   Response: 503
Try: 7   Response: 503
Try: 8   Response: 503
Try: 9   Response: 503
Failed

STATION: 243054190
Try: 1   Response: 503
Try: 2   Response: 503
Try: 3   Response: 503
Try: 4   Response: 503
Try: 5   Response: 503
Try: 6   Response: 503
Try: 7   Response: 503
Try: 8   Response: 503
Try: 9   Response: 503
Failed

STATION: 205251380
Try: 1   Response: 503
Try: 2   Response: 503

In [89]:
def saveMetrisRecord(byte_string, station_number, filename):
    json_data = json.loads(byte_string.decode('utf-8'))
    medicoes_records = []
    for record in json_data:
        for medicoes in record.pop('medicoes', []):
            medicoes_records.append({**record, **medicoes})

    # Create a DataFrame from the expanded records
    medicoes_df = pd.DataFrame(medicoes_records)
    medicoes_df.to_csv(f'{filename}.csv', sep=',', index=False)


In [None]:
display(medicoes_df.iloc[:,36:].head())
len(medicoes_df)

Unnamed: 0,horQChuva,horChuva,horQNivelAdotado,horNivelAdotado,horQVazao,horVazao
0,0.0,,0.0,25709.0,,
1,0.0,,0.0,25709.0,,
2,0.0,,0.0,25709.0,,
3,0.0,,0.0,25709.0,,
4,0.0,,0.0,25710.0,,


8747

In [111]:
test = pd.read_csv('./Data/2023/RIO JOANES-123638260-2023.csv', sep=',')
test.iloc[:,36:]

Unnamed: 0,horQChuva,horChuva,horQNivelAdotado,horNivelAdotado,horQVazao,horVazao
0,,,0.0,222.0,,
1,,,0.0,222.0,,
2,,,0.0,222.0,,
3,,,0.0,222.0,,
4,,,0.0,222.0,,
...,...,...,...,...,...,...
497,,,0.0,220.0,,
498,,,0.0,220.0,,
499,,,0.0,220.0,,
500,,,0.0,220.0,,
