# ResOpsMX: download data
***

**Author:** Chus Casado Rodríguez<br>
**Date:** 01-08-2024<br>

**Introduction:**<br>

This notebook downloads the reservoir data available in the [Conagua](https://sih.conagua.gob.mx/basedatos/Presas/) website (Mexico).

In [1]:
import os
os.environ['USE_PYGEOS'] = '0'
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import requests
from pathlib import Path
from io import StringIO
from tqdm.notebook import tqdm
import yaml

## Configuration

In [2]:
with open('config_dataset.yml', 'r', encoding='utf8') as ymlfile:
    cfg = yaml.load(ymlfile, Loader=yaml.FullLoader)

VERSION = cfg['version']

URL = cfg['url']

PATH_DATASET = Path(cfg['paths']['dataset']['root'])
PATH_ATTR = PATH_DATASET / 'raw' / 'attributes'
PATH_ATTR.mkdir(parents=True, exist_ok=True)
PATH_TS = PATH_DATASET/ 'raw' / 'time_series'
PATH_TS.mkdir(parents=True, exist_ok=True)

## Catalogue of dams

In [6]:
# download catalogue of dams
response = requests.get(URL + '0_Catalogo_de_presas.csv')

# if correct response
if response.status_code == 200:
    
    # format table of attributes
    data_str = response.content.decode('latin-1')
    data_io = StringIO(data_str)
    dams = pd.read_csv(data_io)

    # rename columns
    rename_cols = {'Número': 'dam_ID',
                   'Clave ': 'key',
                   'Nombre de la presa': 'name',
                   'Latitud': 'lat',
                   'Longitud': 'lon',
                   'Altitud': 'Z',
                   'Estado': 'state',
                   'Municipio': 'city',
                   'Identificador de la \ncuenca de disponibilidad': 'cat_ID',
                   'Cuenca de disponibilidad': 'catchment',
                   'Número de la \nregión hidrológica': 'reg_ID',
                   'Región hidrológica': 'region'}
    dams.rename(columns=rename_cols, inplace=True)
    dams.set_index('dam_ID', inplace=True, drop=True)
    
    # convert into GeoDataFrame
    geometry = [Point(xy) for xy in zip(dams.lon, dams.lat)]
    dams = gpd.GeoDataFrame(dams, geometry=geometry, crs='epsg:4326')

    # export with columns renamed
    dams.drop('geometry', axis=1).to_csv(PATH_ATTR / 'dams.csv', encoding='utf-8')
    dams.to_file(PATH_ATTR / 'dams.shp', encoding='utf-8')
    print("File downloaded successfully!")
else:
    print("Failed to retrieve the file.")

File downloaded successfully!


## Time series

In [None]:
for key in tqdm(dams.key.sort_values()):
    
    # download data
    data = pd.read_csv(URL + f'{key}.csv', skiprows=7)
    data.columns = [col.strip() for col in data.columns]

    # rename columns
    rename_cols = {'Fecha': 'date',
                   'Elevación(msnm)': 'Z_MASL',
                   'Almacenamiento(hm³)': 'V_MCM',
                   'Area(ha)': 'A_HA',
                   'Extracciones por Obra de Toma(m³/s)': 'SLUICE_CMS',
                   'Extracciones por Vertedor(m³/s)': 'SPILL_CMS',
                   'Evaporación(mm)': 'EVAP_MM',
                   'Precipitación(mm)': 'PRECIP_MM'}
    data.rename(columns=rename_cols, inplace=True)

    # set date as index
    data.date = pd.to_datetime(data.date, format='%Y/%m/%d')
    data.set_index('date', drop=True, inplace=True)

    # convert data to float
    data.replace('-', np.nan, inplace=True)
    data = data.astype(float)

    # export
    data.to_csv(PATH_TS / f'{key}.csv')