# Metadata

Author: Thiago Nascimento (thiago.nascimento@eawag.ch)

This notebook is used to derive all needed metadata for CAMELS-CH-Chem.

## Requirements
**Python:**

* Python>=3.6
* Jupyter
* geopandas=0.10.2
* numpy
* os
* pandas=2.1.3
* tqdm

Check the Github repository for an environment.yml (for conda environments) or requirements.txt (pip) file.

**Files:**

* All original files. 


**Directory:**

* Clone the GitHub directory locally
* Place any third-data variables in their respective directory.
* ONLY update the "PATH" variable in the section "Configurations", with their relative path to the EStreams directory. 


## References
* 
## Observations
* None

# Import modules

In [None]:
import pandas as pd
import numpy as np
import tqdm as tqdm
import os
import glob
import warnings
import geopandas as gpd
import os
from pathlib import Path

# Configurations

In [None]:
# Only editable variables:
# Relative path to your local directory
PATH = "../.."

# Suppress all warnings
warnings.filterwarnings("ignore")

# Path to where the data are stored
path_data = r"C:\Users\nascimth\Documents\data\CAMELS_CH_Chem\data"
path_isot = Path(r"C:\Users\nascimth\Documents\data\CAMELS_CH_Chem\data\CH_IRP\isotopes_streamflow\isotopes_streamflow\\")


* #### The users should NOT change anything in the code below here. 

In [None]:
# Non-editable variables:
# Set the directory:
os.chdir(PATH)

# Import data

In [None]:
# Network CAMELS_CH_Chem
network_camels_ch_chem = pd.read_excel(path_data+"/CAMELS_CH_chem_stations_short_v3.xlsx", sheet_name='all_5')
#network_camels_ch_chem.set_index("basin_id", inplace=True)
network_camels_ch_chem.columns

In [None]:
# Network CAMELS-CH
network_camels_ch = pd.read_csv(path_data+"/CAMELS_CH_topographic_attributes.csv", skiprows=1, encoding='latin-1', sep= ";")
network_camels_ch.set_index("gauge_id", inplace=True)
network_camels_ch

In [None]:
# Network CAMELS_CH_Chem (with Q weighting)
network_camels_ch_chem_withq = pd.read_excel(path_data+"/CAMELS_CH_chem_stations_short_v3.xlsx", sheet_name='all_3')
network_camels_ch_chem_withq.set_index("hydro_id", inplace=True)
network_camels_ch_chem_withq.columns

In [None]:
network_camels_ch_chem[["area_camels", "area_bafu"]]

In [None]:
# Load the world shapefile dataset provided by GeoPandas
gdf = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

In [None]:
# Read the shapefile of Switzerland
shapefiles_gve = gpd.read_file(path_data+'\shapefile_gve\GVE_Catchments.shp')
shapefiles_gve

In [None]:
# Read the shapefile of Switzerland
countries = gpd.read_file(path_data+'\shps\countries.shp')
countries

In [None]:
# Read the shapefile of Switzerland
ch_shapefile = gpd.read_file(path_data+'\shps\switzerland.shp')
ch_shapefile

In [None]:
network_camels_ch_chem.columns

In [None]:
network_camels_ch_chem.columns = ['gauge_id', 'sensor_id', 'nawaf_id', 'nawat_id', 'isot_id',
       'gauge_name', 'water_body_name', 'gauge_easting', 'gauge_northing', 'area',
       'area_bafu', 'Q', 'level', 'remarks', 'temperature', 'pH',
       'conductivity', 'oxygen concentration', ' hydrogen-2', 'oxygen-18',
       'gauge_name_nawaf', 'gauge_easting_nawaf', 'gauge_northing_nawaf', 'area_nawaf',
       'foen_nawaf_dist', 'gauge_name_nawat', 'gauge_easting_nawat', 'gauge_northing_nawat',
       'area_nawat', 'foen_nawat_dist', 'remarks.1']

In [None]:
network_camels_ch_chem = network_camels_ch_chem.loc[:, ['gauge_id', 'sensor_id', 'nawaf_id', 'nawat_id', 'isot_id',
       'gauge_name', 'water_body_name', 'gauge_easting', 'gauge_northing', 'area',
        'Q', 'level', 'remarks', 
       'gauge_name_nawaf', 'gauge_easting_nawaf', 'gauge_northing_nawaf', 'area_nawaf',
       'foen_nawaf_dist', 'gauge_name_nawat', 'gauge_easting_nawat', 'gauge_northing_nawat',
       'area_nawat', 'foen_nawat_dist', 'remarks.1']]
network_camels_ch_chem

In [None]:
# Merge the columns
network_camels_ch_chem['remarks'] = network_camels_ch_chem['remarks'].fillna(network_camels_ch_chem['remarks.1'])

# Drop the original columns if no longer needed
network_camels_ch_chem.drop(columns=['remarks.1'], inplace=True)

network_camels_ch_chem

In [None]:
network_camels_ch_chem = network_camels_ch_chem[['gauge_id', 'sensor_id', 'nawaf_id', 'nawat_id', 'isot_id',
       'gauge_name', 'water_body_name', 'gauge_easting', 'gauge_northing',
       'area', 'Q', 'level', 'gauge_name_nawaf',
       'gauge_easting_nawaf', 'gauge_northing_nawaf', 'area_nawaf',
       'foen_nawaf_dist', 'gauge_name_nawat', 'gauge_easting_nawat',
       'gauge_northing_nawat', 'area_nawat', 'foen_nawat_dist','remarks']]


network_camels_ch_chem.set_index("gauge_id", inplace=True)

In [None]:
# Gauge lat and lon
network_camels_ch_chem['gauge_lon'] = network_camels_ch['gauge_lon'] 
network_camels_ch_chem['gauge_lat'] = network_camels_ch['gauge_lat']


# Q corrector factor nawa_trend
network_camels_ch_chem["q_nawat_corrector"] = network_camels_ch_chem_withq["Q_weighting (catchment areas from CAMELS_CH)"]

# Plot to check it
network_camels_ch_chem

In [None]:
network_camels_ch_chem_withq.columns

In [None]:
network_camels_ch_chem = network_camels_ch_chem[['sensor_id', 'nawaf_id', 'nawat_id', 'isot_id',
       'gauge_name', 'water_body_name', 'gauge_easting', 'gauge_northing', "gauge_lon", "gauge_lat",
       'area', 'Q', 'level', 'gauge_name_nawaf',
       'gauge_easting_nawaf', 'gauge_northing_nawaf', 'area_nawaf',
       'foen_nawaf_dist', 'gauge_name_nawat', 'gauge_easting_nawat',
       'gauge_northing_nawat', 'area_nawat', 'foen_nawat_dist', 'q_nawat_corrector', 'remarks']]

network_camels_ch_chem

### Add the infromation about CH-IRP gauges

In [None]:
gauge_ids_list = [f.stem for f in path_isot.glob("*.isoStrm") if f.stem.isdigit()]
gauge_ids_list = list(map(int, gauge_ids_list))  # Convert to integers if your index is int
gauge_series = pd.Series(gauge_ids_list, index=gauge_ids_list)
network_camels_ch_chem["chirp_id"] = network_camels_ch_chem.index.map(gauge_series)

In [None]:
network_camels_ch_chem[~network_camels_ch_chem.index.map(gauge_series).isna()]

In [None]:
gauge_ids_list

In [None]:
# Not included in this version of CAMELS-CH-Chem
2319, 2409, 2491

In [None]:
network_camels_ch_chem = network_camels_ch_chem[['sensor_id', 'nawaf_id', 'nawat_id', 'isot_id', 'chirp_id',
       'gauge_name', 'water_body_name', 'gauge_easting', 'gauge_northing', "gauge_lon", "gauge_lat",
       'area', 'Q', 'level', 'gauge_name_nawaf',
       'gauge_easting_nawaf', 'gauge_northing_nawaf', 'area_nawaf',
       'foen_nawaf_dist', 'gauge_name_nawat', 'gauge_easting_nawat',
       'gauge_northing_nawat', 'area_nawat', 'foen_nawat_dist', 'q_nawat_corrector', 'remarks']]

network_camels_ch_chem

In [None]:
network_camels_ch_chem.to_csv(r"results\Dataset\gauges_metadata\camels_ch_chem_gauges_metadata.csv", encoding='utf-8')

## Adjust the shapefile

In [None]:
CAMELS_CH_catchments = gpd.read_file(path_data+'/shps/CAMELS_CH_catchments.shp') # From the original CAMELS-CH dataset
CAMELS_CH_catchments

In [None]:
CAMELS_CH_Chem_catchments = CAMELS_CH_catchments[CAMELS_CH_catchments['gauge_id'].astype(int).isin(network_camels_ch_chem.index.tolist())]
CAMELS_CH_Chem_catchments.index = network_camels_ch_chem.index
CAMELS_CH_Chem_catchments["sensor_id"] = network_camels_ch_chem["sensor_id"]
CAMELS_CH_Chem_catchments["nawaf_id"] = network_camels_ch_chem["nawaf_id"]
CAMELS_CH_Chem_catchments["nawat_id"] = network_camels_ch_chem["nawat_id"]
CAMELS_CH_Chem_catchments["isot_id"] = network_camels_ch_chem["isot_id"]
CAMELS_CH_Chem_catchments["chirp_id"] = network_camels_ch_chem["chirp_id"]
CAMELS_CH_Chem_catchments["gauge_name"] = network_camels_ch_chem["gauge_name"]
CAMELS_CH_Chem_catchments["water_body_name"] = network_camels_ch_chem["water_body_name"]
CAMELS_CH_Chem_catchments["gauge_easting"] = network_camels_ch_chem["gauge_easting"]
CAMELS_CH_Chem_catchments["gauge_northing"] = network_camels_ch_chem["gauge_northing"]
CAMELS_CH_Chem_catchments["gauge_lon"] = network_camels_ch_chem["gauge_lon"]
CAMELS_CH_Chem_catchments["gauge_lat"] = network_camels_ch_chem["gauge_lat"]
CAMELS_CH_Chem_catchments["area"] = network_camels_ch_chem["area"]

CAMELS_CH_Chem_catchments = CAMELS_CH_Chem_catchments[["sensor_id", "nawaf_id", "nawat_id", "isot_id", "chirp_id",
                                                       "gauge_name", "water_body_name", 
                                                       "gauge_easting", "gauge_northing", "gauge_lon",
                                                       "gauge_lat", "area", "geometry"
                                                       ]]

## Compute the area outside Switzerland

In [None]:
# Load the Switzerland boundary
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
switzerland = world[world.name == "Switzerland"]

# Define the Swiss projected CRS (LV95)
swiss_crs = "EPSG:2056"

# Reproject both datasets to the Swiss CRS
switzerland = switzerland.to_crs(swiss_crs)
catchment_camels = CAMELS_CH_Chem_catchments.to_crs(swiss_crs)

# Function to calculate percentage of each catchment inside Switzerland
def compute_percentage_inside(row):
    catchment_geom = row.geometry  # Get individual catchment geometry
    intersection = catchment_geom.intersection(switzerland.geometry.iloc[0])  # Clip to Switzerland
    if intersection.is_empty:
        return 0  # If no intersection, 0% inside
    return (intersection.area / catchment_geom.area) * 100  # Compute percentage

# Apply function to each row in the dataframe
catchment_camels["percentage_inside_Switzerland"] = catchment_camels.apply(compute_percentage_inside, axis=1)

In [None]:
CAMELS_CH_Chem_catchments

In [None]:
network_camels_ch_chem["area_swiss_perc"] = catchment_camels.percentage_inside_Switzerland
CAMELS_CH_Chem_catchments["area_swiss_perc"] = catchment_camels.percentage_inside_Switzerland

network_camels_ch_chem = network_camels_ch_chem[['sensor_id', 'nawaf_id', 'nawat_id', 'isot_id', 'chirp_id', 'gauge_name',
       'water_body_name', 'gauge_easting', 'gauge_northing', 'gauge_lon',
       'gauge_lat', 'area', 'area_swiss_perc', 'Q', 'level', 'gauge_name_nawaf',
       'gauge_easting_nawaf', 'gauge_northing_nawaf', 'area_nawaf',
       'foen_nawaf_dist', 'gauge_name_nawat', 'gauge_easting_nawat',
       'gauge_northing_nawat', 'area_nawat', 'foen_nawat_dist',
       'q_nawat_corrector', 'remarks']]

CAMELS_CH_Chem_catchments = CAMELS_CH_Chem_catchments[['sensor_id', 'nawaf_id', 'nawat_id', 'isot_id', 'chirp_id', 'gauge_name',
       'water_body_name', 'gauge_easting', 'gauge_northing', 'gauge_lon',
       'gauge_lat', 'area', 'area_swiss_perc', 'geometry']]

network_camels_ch_chem.to_csv(r"results\Dataset\gauges_metadata\camels_ch_chem_gauges_metadata.csv", encoding='utf-8')
CAMELS_CH_Chem_catchments.to_file("results\Dataset\shapefiles\camels_ch_del\camels_ch_chem_catchment_boundaries.shp")

## Different catchment boundaries

In [None]:
camels_ch_chem_catchment_boundaries = gpd.read_file(r"results\Dataset\shapefiles\camels_ch_del\camels_ch_chem_catchment_boundaries.shp")
camels_ch_chem_catchment_boundaries.set_index("gauge_id", inplace = True) 
camels_ch_chem_catchment_boundaries

- NAWA FRACHT

In [None]:
Chem_naduf_EZG_1 = gpd.read_file(path_data+r"\shps\Chem_naduf_EZG_1.shp")

camels_ch_chem_catchment_boundaries_nawaf = Chem_naduf_EZG_1.copy()
camels_ch_chem_catchment_boundaries_nawaf.set_index("gauge_id", inplace = True)

camels_ch_chem_catchment_boundaries_nawaf['sensor_id'] = camels_ch_chem_catchment_boundaries['sensor_id']
camels_ch_chem_catchment_boundaries_nawaf['nawaf_id'] = camels_ch_chem_catchment_boundaries['nawaf_id']
camels_ch_chem_catchment_boundaries_nawaf['nawat_id'] = camels_ch_chem_catchment_boundaries['nawat_id']
camels_ch_chem_catchment_boundaries_nawaf['gauge_name'] = camels_ch_chem_catchment_boundaries['gauge_name']
camels_ch_chem_catchment_boundaries_nawaf['water_body'] = camels_ch_chem_catchment_boundaries['water_body']
camels_ch_chem_catchment_boundaries_nawaf['gauge_east'] = camels_ch_chem_catchment_boundaries['gauge_east']
camels_ch_chem_catchment_boundaries_nawaf['gauge_nort'] = camels_ch_chem_catchment_boundaries['gauge_nort']
camels_ch_chem_catchment_boundaries_nawaf['gauge_lon'] = camels_ch_chem_catchment_boundaries['gauge_lon']
camels_ch_chem_catchment_boundaries_nawaf['gauge_lat'] = camels_ch_chem_catchment_boundaries['gauge_lat']
camels_ch_chem_catchment_boundaries_nawaf['area'] = camels_ch_chem_catchment_boundaries['area']
camels_ch_chem_catchment_boundaries_nawaf['area_swiss'] = camels_ch_chem_catchment_boundaries['area_swiss']

camels_ch_chem_catchment_boundaries_nawaf = camels_ch_chem_catchment_boundaries_nawaf[['sensor_id', 'nawaf_id', 'nawat_id', 'gauge_name',
       'water_body', 'gauge_east', 'gauge_nort', 'gauge_lon',
       'gauge_lat', 'area', 'area_swiss', 'geometry']]

In [None]:
# First, get the valid indices where nawaf_id is not NaN
valid_indices = camels_ch_chem_catchment_boundaries[~camels_ch_chem_catchment_boundaries.nawaf_id.isna()].index

# Exclude index 2243 if it's present
valid_indices = valid_indices.difference([2243])

# Subset the target DataFrame
camels_ch_chem_catchment_boundaries_nawaf = camels_ch_chem_catchment_boundaries_nawaf.loc[valid_indices]

In [None]:
camels_ch_chem_catchment_boundaries

In [None]:
camels_ch_chem_catchment_boundaries_nawaf.loc[2243] = camels_ch_chem_catchment_boundaries.loc[2243]

camels_ch_chem_catchment_boundaries_nawaf

In [None]:
camels_ch_chem_catchment_boundaries_nawaf.to_file("results\\Dataset\shapefiles\\nawa_fracht_del\\camels_ch_chem_catchment_boundaries_nawaf.shp")

## NAWA TREND catchment boundaries

In [None]:
Chem_nawa_EZG_1 = gpd.read_file(path_data+r"\shps\Chem_nawa_EZG_1.shp")

camels_ch_chem_catchment_boundaries_nawat = Chem_nawa_EZG_1.copy()
camels_ch_chem_catchment_boundaries_nawat.set_index("gauge_id", inplace = True)

camels_ch_chem_catchment_boundaries_nawat['sensor_id'] = camels_ch_chem_catchment_boundaries['sensor_id']
camels_ch_chem_catchment_boundaries_nawat['nawaf_id'] = camels_ch_chem_catchment_boundaries['nawaf_id']
camels_ch_chem_catchment_boundaries_nawat['nawat_id'] = camels_ch_chem_catchment_boundaries['nawat_id']
camels_ch_chem_catchment_boundaries_nawat['gauge_name'] = camels_ch_chem_catchment_boundaries['gauge_name']
camels_ch_chem_catchment_boundaries_nawat['water_body'] = camels_ch_chem_catchment_boundaries['water_body']
camels_ch_chem_catchment_boundaries_nawat['gauge_east'] = camels_ch_chem_catchment_boundaries['gauge_east']
camels_ch_chem_catchment_boundaries_nawat['gauge_nort'] = camels_ch_chem_catchment_boundaries['gauge_nort']
camels_ch_chem_catchment_boundaries_nawat['gauge_lon'] = camels_ch_chem_catchment_boundaries['gauge_lon']
camels_ch_chem_catchment_boundaries_nawat['gauge_lat'] = camels_ch_chem_catchment_boundaries['gauge_lat']
camels_ch_chem_catchment_boundaries_nawat['area'] = camels_ch_chem_catchment_boundaries['area']
camels_ch_chem_catchment_boundaries_nawat['area_swiss'] = camels_ch_chem_catchment_boundaries['area_swiss']

camels_ch_chem_catchment_boundaries_nawat = camels_ch_chem_catchment_boundaries_nawat[['sensor_id', 'nawaf_id', 'nawat_id', 'gauge_name',
       'water_body', 'gauge_east', 'gauge_nort', 'gauge_lon',
       'gauge_lat', 'area', 'area_swiss', 'geometry']]

In [None]:
# First, get the valid indices where nawaf_id is not NaN
valid_indices = camels_ch_chem_catchment_boundaries[~camels_ch_chem_catchment_boundaries.nawat_id.isna()].index

valid_indices = valid_indices.difference([2215])
valid_indices = valid_indices.difference([2634])

# Subset the target DataFrame
camels_ch_chem_catchment_boundaries_nawat = camels_ch_chem_catchment_boundaries_nawat.loc[valid_indices]

In [None]:
camels_ch_chem_catchment_boundaries_nawat.loc[2215] = camels_ch_chem_catchment_boundaries.loc[2215]
camels_ch_chem_catchment_boundaries_nawat.loc[2634] = camels_ch_chem_catchment_boundaries.loc[2634]

camels_ch_chem_catchment_boundaries_nawat

In [None]:
camels_ch_chem_catchment_boundaries_nawat.to_file("results\\Dataset\shapefiles\\nawa_trend_del\\camels_ch_chem_catchment_boundaries_nawat.shp")

## Adjust the coordinates

In [None]:
# Network CAMELS-CH
network_camels_ch_chem = pd.read_csv(r"results\Dataset\gauges_metadata\camels_ch_chem_gauges_metadata.csv", encoding='utf-8', sep= ",")
network_camels_ch_chem.set_index("gauge_id", inplace=True)
network_camels_ch_chem

In [None]:
# List the specific columns you want to modify
columns_to_modify = [
    "gauge_easting",
    "gauge_easting_nawat",
    "gauge_easting_nawaf"
]

# Apply the transformation: prepend 2 while keeping it float
for col in columns_to_modify:
    network_camels_ch_chem[col] = network_camels_ch_chem[col].apply(lambda x: int(f"2{x:.0f}") if pd.notnull(x) else x)

network_camels_ch_chem

In [None]:
# List the specific columns you want to modify
columns_to_modify = [
    "gauge_northing",
    "gauge_northing_nawat",
    "gauge_northing_nawaf"
]

# Apply the transformation: prepend 2 while keeping it float
for col in columns_to_modify:
    network_camels_ch_chem[col] = network_camels_ch_chem[col].apply(lambda x: int(f"1{x:.0f}") if pd.notnull(x) else x)

network_camels_ch_chem

It seems that it remains with the error for this station 2167

In [None]:
network_camels_ch_chem.loc[[2167]]

In [None]:
network_camels_ch_chem.loc[[2167]]["gauge_northing"]

In [None]:
network_camels_ch_chem.loc[[2167], "gauge_northing"]=1192145
network_camels_ch_chem.loc[[2167]]["gauge_northing"]

In [None]:
network_camels_ch_chem.loc[[2167], "gauge_northing_nawat"]=1192145
network_camels_ch_chem.loc[[2167]]["gauge_northing_nawat"]

In [None]:
network_camels_ch_chem.loc[[2167]]

In [None]:
network_camels_ch_chem.to_csv(r"results\Dataset\gauges_metadata\camels_ch_chem_gauges_metadata.csv", encoding='utf-8')

## Adjust the coordinates for the shapefiles

In [None]:
camels_ch_chem_catchment_boundaries = gpd.read_file("results\Dataset\shapefiles\camels_ch_del\camels_ch_chem_catchment_boundaries.shp")
camels_ch_chem_catchment_boundaries.set_index("gauge_id", inplace=True)
camels_ch_chem_catchment_boundaries[["gauge_east", "gauge_nort"]] = network_camels_ch_chem[["gauge_easting", "gauge_northing"]]

camels_ch_chem_catchment_boundaries

In [None]:
camels_ch_chem_catchment_boundaries_nawaf = gpd.read_file("results\Dataset\shapefiles\\nawa_fracht_del\camels_ch_chem_catchment_boundaries_nawaf.shp")
camels_ch_chem_catchment_boundaries_nawaf.set_index("gauge_id", inplace=True)
camels_ch_chem_catchment_boundaries_nawaf[["gauge_east", "gauge_nort"]] = network_camels_ch_chem[["gauge_easting", "gauge_northing"]]
camels_ch_chem_catchment_boundaries_nawaf

In [None]:
camels_ch_chem_catchment_boundaries_nawat = gpd.read_file("results\Dataset\shapefiles\\nawa_trend_del\camels_ch_chem_catchment_boundaries_nawat.shp")
camels_ch_chem_catchment_boundaries_nawat.set_index("gauge_id", inplace=True)
camels_ch_chem_catchment_boundaries_nawat[["gauge_east", "gauge_nort"]] = network_camels_ch_chem[["gauge_easting", "gauge_northing"]]
camels_ch_chem_catchment_boundaries_nawat

In [None]:
camels_ch_chem_catchment_boundaries.to_file("results\\Dataset\shapefiles\\camels_ch_del\\camels_ch_chem_catchment_boundaries.shp")
camels_ch_chem_catchment_boundaries_nawaf.to_file("results\\Dataset\shapefiles\\nawa_fracht_del\\camels_ch_chem_catchment_boundaries_nawaf.shp")
camels_ch_chem_catchment_boundaries_nawat.to_file("results\\Dataset\shapefiles\\nawa_trend_del\\camels_ch_chem_catchment_boundaries_nawat.shp")

In [None]:
network_camels_ch_chem[~network_camels_ch_chem.sensor_id.isna()].index.tolist()[80:]

# End