In [1]:
import pandas as pd
import geopandas as geopd

import numpy as np

import os
import pathlib

from tqdm.notebook import tqdm

from timeseries_utils import read_daily_timeseries_csv

In [2]:
order_list = ['discharge_vol', 'discharge_spec', 'precipitation', 'pet', 'pe_era5_land', 'pet_fmi', 'snow_evaporation', 'swe', 'swe_cci3-1', 'snow_depth', 'temperature_gmin', 'temperature_min', 'temperature_mean', 'temperature_max', 'humidity_rel', 'radiation_global']

calculate_pet = False

#for bilal
#order_list = ['discharge_vol', 'discharge_spec', 'precipitation', 'snow_depth', 'temperature_gmin', 'temperature_min', 'temperature_mean', 'temperature_max', 'humidity_rel', 'radiation_global']

catchments_path = "/media/iielse/T9/CAMELS-FI/data/CAMELS_FI_catchments.gpkg"
src_root = "/media/iielse/T9/CAMELS-FI/data/timeseries_by_attribute"
dst_root = "/media/iielse/T9/CAMELS-FI/data/timeseries"
catchments = geopd.read_file(catchments_path, layer='catchments')
"""
# for bilal
catchments_path = "/home/iiro/Documents/DIWA/bilal/watersheds.gpkg"
src_root = "/home/iiro/Documents/DIWA/bilal/timeseries_by_attribute"
dst_root = "/home/iiro/Documents/DIWA/bilal/timeseries"
catchments = geopd.read_file(catchments_path, layer='watersheds')
"""
root_path = pathlib.Path(src_root)
# Getting the files in 
files = [item for item in root_path.iterdir() if item.is_file()]

### Combining snow evaporation, fmi pet and era5 pet based on if snow depth > 0

In [3]:
# Some of the files contain too many catchments (too short flow time series), so the correct ones are read from the definitely correct place.
gauges = list(catchments['gauge_id'])

if calculate_pet:

    for file in files:
        attribute = file.stem

        if attribute == 'pet_fmi':
            pet_fmi = read_daily_timeseries_csv(file)
            pet_fmi = pet_fmi[gauges]
            
        if attribute == 'pet_era5_land':
            pet_era5 = read_daily_timeseries_csv(file)
            pet_era5 = pet_era5[gauges]
        if attribute == 'snow_evaporation':
            snow_e = read_daily_timeseries_csv(file)
            snow_e = snow_e[gauges]
        if attribute == 'snow_depth':
            snow_depth = read_daily_timeseries_csv(file)
            # snow depth has some extra gauges that have been removed
            snow_depth = snow_depth[gauges]
    assert pet_era5.columns.equals(snow_e.columns), "columns don't match"
    assert pet_era5.columns.equals(snow_depth.columns), "columns don't match"
    assert pet_era5.index.equals(snow_e.index), "indices don't match"
    assert pet_era5.index.equals(snow_depth.index), "indices don't match"

    # Snow evaporation is used for snowy days
    pet = pd.DataFrame(np.where(snow_depth > 0, snow_e, np.nan), index=pet_era5.index, columns=pet_era5.columns)
    # Filling non-snowy observations with FMI pet,
    pet = pet.fillna(pet_fmi)
    # then the gaps with era5-land pet
    pet = pet.fillna(pet_era5)

    # Limiting the valid range to 1981, since era5 has so different values from 
    pet = pet.loc["1981":"2023"]
    
    pet_path = os.path.join(src_root, 'pet.csv')
    pet_path = pathlib.Path(pet_path)
    pet.to_csv(pet_path)
    if pet_path not in files:
        files.append(pet_path)
    

In [5]:
for gauge in tqdm(gauges):
    columns = []
    # inefficiently many file openings, but not worth the rewrite
    for file in files:
        attribute = file.stem
        meteo = read_daily_timeseries_csv(file)

        #column = meteo[[gauge]]
        
        
        column = meteo.loc["1961":"2023", [gauge]]
        column = column.rename({gauge: attribute}, axis=1)
        columns.append(column)
    output = pd.concat(columns, axis=1)
    # reordering the columns
    output = output[order_list]
    dst_path = os.path.join(dst_root, f"CAMELS_FI_hydromet_timeseries_{gauge}_19610101-20231231.csv")
    output.to_csv(dst_path)

  0%|          | 0/320 [00:00<?, ?it/s]