# ResOpsBR: download data Sistema Interligado Nacional
***

***Author:** Chus Casado Rodríguez*<br>
***Date:** 17-07-2025*<br>

**Introduction:**<br>

This notebook downloads the reservoir time series from the Âgencia Nacional de Águas.

In [15]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import urllib.parse
from io import StringIO
from tqdm.auto import tqdm
from pathlib import Path

from lisfloodreservoirs.utils import DatasetConfig

### Configuration

In [None]:
cfg = DatasetConfig('config_ResOpsBR_v11.yml')

SYSTEM = 'SIN'
URL = 'https://www.ana.gov.br/sar0/MedicaoSin'

### List of reservoirs

In [9]:
response = requests.get(URL)
if response.ok:
    # parse HTML response
    soup = BeautifulSoup(response.text, 'html.parser')

    # find list of reservoirs
    reservoir_list = soup.find('select', {'name': 'dropDownListReservatorios'})
    reservoirs = {}
    for option in reservoir_list.find_all('option'):
        try:
            reservoirs[int(option['value'])] = option.text.strip()
        except:
            continue
    print('{0} reservoirs'.format(len(reservoirs)))

# convert to DataFrame
reservoirs = pd.DataFrame(pd.Series(reservoirs))
reservoirs.index.name = 'ID'
reservoirs.columns = ['name']

# export
PATH_ATTRS = cfg.PATH_RESOPS / 'raw' / 'attributes' / SYSTEM
PATH_ATTRS.mkdir(parents=True, exist_ok=True)
reservoirs.to_csv(PATH_ATTRS / f'reservoirs_{SYSTEM}.csv')

162 reservoirs


### Time series

In [14]:
PATH_TS = cfg.PATH_RESOPS / 'raw' / 'time_series' / SYSTEM
PATH_TS.mkdir(parents=True, exist_ok=True)

rename_cols = {
    'Código do Reservatório': 'ID',
    'Reservatório': 'name',
    'Cota (m)': 'level_m',
    'Afluência (m³/s)': 'inflow_cms',
    'Defluência (m³/s)': 'outflow_cms',
    'Vazão Vertida (m³/s)': 'outflow_spillway_cms',
    'Vazão Turbinada (m³/s)': 'outflow_turbine_cms',
    'Vazão Natural (m³/s)': 'outflow_natural_cms',
    'Volume Útil (%)': 'volume_pct',
    'Vazão Incremental (m³/s)': 'outflow_increment_cms',
    'Data da Medição': 'date'   
}

start_url = urllib.parse.quote(cfg.START.strftime('%d/%m/%Y'), safe='')
end_url = urllib.parse.quote(cfg.END.strftime('%d/%m/%Y'), safe='')

n_reservoirs = reservoirs.shape[0]
for ID in tqdm(reservoirs.index, total=n_reservoirs):
    
    output_file = PATH_TS / f'{ID}.csv'
    if output_file.is_file():
        continue
        
    # request info from the URL
    url_reservoir = f'{URL}?dropDownListEstados=&dropDownListReservatorios={ID}&dataInicial={start_url}&dataFinal={end_url}&button=Buscar'
    response = requests.get(url_reservoir)
    
    if response.ok:
        # parse HTML response
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # convert to pandas.DataFrame
        html_table = str(soup.find('table'))
        data = pd.read_html(StringIO(html_table), thousands='.', decimal=',')[0]
        
        # translate columns
        data.rename(columns=rename_cols, inplace=True)
        
        # remove redundant info
        data.drop(['ID', 'name'], axis=1, inplace=True)

        # set a date index
        data.date = pd.to_datetime(data.date, format='%d/%m/%Y')
        data.set_index(data.date, drop=True, inplace=True)
        data.drop('date', axis=1, inplace=True, errors='ignore')
                
        # export
        data.to_csv(output_file)
    else:
        print(f'Data for reservoir {ID} could not be retrieved: {url_reservoir}')

  0%|          | 0/162 [00:00<?, ?it/s]