In [2]:
import pandas as pd

import os
import pathlib

# This script concatenates meteorological data with hydrological data for each catchment.
# It reads meteorological data from CSV files and hydrological data from a Parquet file,
# then merges them based on the catchment ID.

In [6]:
# Path to the hydrological data Parquet file
flow_path = '/path/to/flows_from_1961.parquet'
root = "/media/iiro/T9/CAMELS-FI/data/timeseries"

# Root directory containing meteorological data CSV files
root = "/path/to/timeseries"
root_path = pathlib.Path(root)

# List all files in the root directory
files = [item for item in root_path.iterdir() if item.is_file()]

In [None]:
# Verbose mode for printing messages
verbose = True

# Read the hydrological data
flows = pd.read_parquet(flow_path)

# Process each meteorological data file
for file in files:
    meteo = pd.read_csv(file)
    meteo['date'] = pd.to_datetime(meteo['date']) 
    meteo = meteo.set_index('date')
    
   # Skip files that already have a discharge column
    if 'discharge_vol' in meteo:
        if verbose:
            print(f"Skipped {file} because it already has a discharge column")
        continue
        
    # Extract the catchment ID from the file name    
    place_id = str(file)[-26:-22]

    # Get the hydrological data for the catchment
    flow = flows[[place_id]]
    flow = flow.rename({place_id: 'discharge_vol'}, axis=1)

    # Merge the meteorological and hydrological data
    hydrometeo = meteo.join(flow, validate='one_to_one')

    # Save the merged data back to the CSV file
    hydrometeo.to_csv(file)
    
    break

In [None]:
#  Display the meteorological and merged data tables 
meteo
hydrometeo