In [1]:
import pandas as pd
import os
import numpy as np


In [2]:
url_glaciers_list = "https://doi.glamos.ch/data/glacier_list/glacier_list.csv"
url_length_change = "https://doi.glamos.ch/data/lengthchange/lengthchange.csv"
url_volume_change = "https://doi.glamos.ch/data/volumechange/volumechange.csv"
url_mass_balance = "https://doi.glamos.ch/data/massbalance/massbalance_observation.csv" #observation period
url_mass_balance2 = "https://doi.glamos.ch/data/massbalance/massbalance_observation_elevationbins.csv" #observation period
url_mass_balance3 = "https://doi.glamos.ch/data/massbalance/massbalance_fixdate.csv" #hydrological year
url_mass_balance4 = "https://doi.glamos.ch/data/massbalance/massbalance_fixdate_elevationbins.csv" #hydrological year
url_davos = "https://data.geo.admin.ch/ch.meteoschweiz.ogd-nbcn/dav/ogd-nbcn_dav_m.csv"
url_sion = "https://data.geo.admin.ch/ch.meteoschweiz.ogd-nbcn/sio/ogd-nbcn_sio_m.csv"
url_metadata = "https://data.geo.admin.ch/ch.meteoschweiz.ogd-nbcn/ogd-nbcn_meta_parameters.csv"

In [3]:
initial_list_df = pd.read_csv(url_glaciers_list, delimiter = '\t', skiprows = 4) #skip the first 4 rows
glaciers_list_df = initial_list_df.drop(index = [1, 2]) # delete rows 1 & 2
glaciers_list_df = glaciers_list_df['SWISS GLACIER LIST (AVAILABLE DATA)'].str.split(',', expand = True) # split data in several columns to get clean information
glaciers_list_df = glaciers_list_df.reset_index(drop = True)

# make first row appear as the column indices
new_headers_c = glaciers_list_df.iloc[0]
glaciers_list_df = glaciers_list_df[1:]
glaciers_list_df.columns = new_headers_c

# Rename columns to include measurement units
glaciers_list_df.rename(columns = {'glacier area': 'glacier area (km2)'}, inplace = True)
glaciers_list_df.rename(columns = {'survey year for glacier area': 'survey year for glacier area (yyyy)'}, inplace = True)
glaciers_list_df.rename(columns = {'coordx': 'coordx (X_LV95)'}, inplace = True)
glaciers_list_df.rename(columns = {'coordy': 'coordy (Y_LV95)'}, inplace = True)

In [4]:
# extract data & clean 

#Length change

initial_length_df = pd.read_csv(url_length_change, delimiter = '\t', skiprows = 4) #skip the first 4 rows
length_change_df = initial_length_df.drop(index = [1, 2]) # delete rows 1 & 2
length_change_df = length_change_df['SWISS GLACIER LENGTH CHANGE'].str.split(',', expand = True) # split data in several columns to get clean information
length_change_df = length_change_df.reset_index(drop = True)



# make first row appear as the column indices
new_headers = length_change_df.iloc[0]
length_change_df = length_change_df[1:]
length_change_df.columns = new_headers



# Rename all columns that contain numerical values to include the units

length_change_df.rename(columns = {'start date of observation': 'start date of observation (yyyy-mm-dd)'}, inplace = True)
length_change_df.rename(columns = {'end date of observation': 'end date of observation (yyyy-mm-dd)'}, inplace = True)
length_change_df.rename(columns = {'length change': 'length change (m)'}, inplace = True)
length_change_df.rename(columns = {'elevation of glacier tongue': 'elevation of glacier tongue (m asl.)'}, inplace = True)


In [5]:
#Volume change

initial_volume_df = pd.read_csv(url_volume_change, sep = r'\s+', engine = 'python')
volume_change_df = initial_volume_df.drop(index = [0,1,2,4])
volume_change_df = volume_change_df.reset_index(drop = True)
volume_change_df.columns = volume_change_df.iloc[0]
volume_change_df = volume_change_df.drop(0)
volume_change_df = volume_change_df.drop(columns = [';'])

volume_change_df['merged_15-16'] = volume_change_df.iloc[:, 15:26].astype(str).apply(lambda x: ' '.join(x), axis = 1)
volume_change_df = volume_change_df.drop(columns = [c for c in volume_change_df.columns if str(c) in ['None', 'NaN', 'Name'] or pd.isna(c)])
volume_change_df.rename(columns = {'merged_15-16': 'Name'}, inplace = True)

# remove all 'None' or NaN that appear in the last column because of the merger
volume_change_df['Name'] = (volume_change_df['Name'].str.replace('None', '', regex = False).str.replace('nan', '', regex = False).str.replace(' - ', ' -', regex = False).str.strip())
cols = volume_change_df.columns.tolist()

# Move the last column to the front
cols = [cols[-1]] + cols[:-1]

# Reorder the DataFrame
volume_change_df = volume_change_df[cols]

# Rename all columns that contain numerical values to include the units
volume_change_df.rename(columns = {'date_start': 'date_start (yyyymmdd)'}, inplace = True)
volume_change_df.rename(columns = {'date_end': 'date_end (yyyymmdd)'}, inplace = True)
volume_change_df.rename(columns = {'A_start': 'A_start (km2)'}, inplace = True)
volume_change_df.rename(columns = {'outline_start': 'outline_start (yyyy)'}, inplace = True)
volume_change_df.rename(columns = {'A_end': 'A_end (km2)'}, inplace = True)
volume_change_df.rename(columns = {'outline_end': 'outline_end (yyyy)'}, inplace = True)
volume_change_df.rename(columns = {'dV': 'dV (km3)'}, inplace = True)
volume_change_df.rename(columns = {'dh_mean': 'dh_mean (m)'}, inplace = True)
volume_change_df.rename(columns = {'Bgeod': 'Bgeod (mw.e.a-1)'}, inplace = True)
volume_change_df.rename(columns = {'sigma': 'sigma (mw.e.)'}, inplace = True)
volume_change_df.rename(columns = {'covered': 'covered (%)'}, inplace = True)
volume_change_df.rename(columns = {'rho_dv': 'rho_dv (kgm-3)'}, inplace = True)
volume_change_df.rename(columns = {'Name': 'glacier name'}, inplace = True)



In [6]:
# Mass balance observation period

initial_mass_balance_df = pd.read_csv(url_mass_balance, delimiter = '\t', skiprows = 4)
mass_balance_df = initial_mass_balance_df.drop(index = [1,2])
mass_balance_df = mass_balance_df['SWISS GLACIER MASS BALANCE (OBSERVATION PERIOD)'].str.split(',', expand = True)
mass_balance_df = mass_balance_df.reset_index(drop = True)
mass_balance_df['merged columns 13,14,15,16'] = mass_balance_df[mass_balance_df.columns[13]].astype(str) + ' ' + mass_balance_df[mass_balance_df.columns[14]].astype(str) + ' ' + mass_balance_df[mass_balance_df.columns[15]].astype(str) + ' ' + mass_balance_df[mass_balance_df.columns[16]].astype(str)
mass_balance_df = mass_balance_df.drop(
    columns = [mass_balance_df.columns[13],
                   mass_balance_df.columns[14],
                   mass_balance_df.columns[15],
                   mass_balance_df.columns[16]])
new_headers_mb = mass_balance_df.iloc[0]
mass_balance_df = mass_balance_df[1:]
mass_balance_df.columns = new_headers_mb
mass_balance_df = mass_balance_df.rename(columns = {'observer None None None': 'observer'})
mass_balance_df['observer'] = (mass_balance_df['observer'].str.replace('None', '', regex = False).str.replace(' - ', ' -', regex = False).str.strip())

mass_balance_df.rename(columns = {'start date of observation': 'start date of observation (yyyy-mm-dd)'}, inplace = True)
mass_balance_df.rename(columns = {'end date of winter observation': 'end date of winter observation (yyyy-mm-dd)'}, inplace = True)
mass_balance_df.rename(columns = {'end date of observation': 'end date of observation (yyyy-mm-dd)'}, inplace = True)
mass_balance_df.rename(columns = {'winter mass balance': 'winter mass balance (mm w.e.)'}, inplace = True)
mass_balance_df.rename(columns = {'summer mass balance': 'summer mass balance (mm w.e.)'}, inplace = True)
mass_balance_df.rename(columns = {'annual mass balance': 'annual mass balance (mm w.e.)'}, inplace = True)
mass_balance_df.rename(columns = {'equilibrium line altitude': 'equilibrium line altitude (m asl.)'}, inplace = True)
mass_balance_df.rename(columns = {'accumulation area ratio': 'accumulation area ratio (%)'}, inplace = True)
mass_balance_df.rename(columns = {'glacier area': 'glacier area (km2)'}, inplace = True)
mass_balance_df.rename(columns = {'minimum elevation of glacier': 'minimum elevation of glacier (m asl.)'}, inplace = True)
mass_balance_df.rename(columns = {'maximum elevation of glacier': 'maximum elevation of glacier (m asl.)'}, inplace = True)


In [7]:
# Mass balance observation period with elevation bins
initial_mass_balance_eb_df = pd.read_csv(url_mass_balance2, delimiter = '\t', skiprows = 4)
mass_balance_eb_df = initial_mass_balance_eb_df.drop(index = [1,2])
mass_balance_eb_df = mass_balance_eb_df['SWISS GLACIER MASS BALANCE (OBSERVATION PERIOD) ELEVATION BINS'].str.split(',', expand = True)
mass_balance_eb_df = mass_balance_eb_df.reset_index(drop = True)
new_headers_mb_eb = mass_balance_eb_df.iloc[0]
mass_balance_eb_df = mass_balance_eb_df[1:]
mass_balance_eb_df.columns = new_headers_mb_eb

mass_balance_eb_df.rename(columns = {'start date of observation': 'start date of observation (yyyy-mm-dd)'}, inplace = True)
mass_balance_eb_df.rename(columns = {'end date of winter observation': 'end date of winter observation (yyyy-mm-dd)'}, inplace = True)
mass_balance_eb_df.rename(columns = {'end date of observation': 'end date of observation (yyyy-mm-dd)'}, inplace=True)
mass_balance_eb_df.rename(columns = {'winter mass balance': 'winter mass balance (mm w.e.)'}, inplace = True)
mass_balance_eb_df.rename(columns = {'summer mass balance': 'summer mass balance (mm w.e.)'}, inplace = True)
mass_balance_eb_df.rename(columns = {'annual mass balance': 'annual mass balance (mm w.e.)'}, inplace = True)
mass_balance_eb_df.rename(columns = {'area of elevation bin': 'area of elevation bin (km2)'}, inplace = True)
mass_balance_eb_df.rename(columns = {'lower elevation of bin': 'lower elevation of bin (m asl.)'}, inplace = True)
mass_balance_eb_df.rename(columns = {'upper elevation of bin': 'upper elevation of bin (m asl.)'}, inplace = True)


In [8]:
# Mass balance hydrological year

initial_mass_balance_hy_df = pd.read_csv(url_mass_balance3, delimiter = '\t', skiprows=4)
mass_balance_hy_df = initial_mass_balance_hy_df.drop(index = [1,2])
mass_balance_hy_df = mass_balance_hy_df['SWISS GLACIER MASS BALANCE (HYDROLOGICAL YEAR)'].str.split(',', expand = True)
mass_balance_hy_df = mass_balance_hy_df.reset_index(drop = True)
# Merging the last 4 columns into a new column to get fewer less important columns and eventually cleaner data
mass_balance_hy_df['merged columns 13,14,15,16'] = mass_balance_hy_df[mass_balance_hy_df.columns[13]].astype(str) + ' ' + mass_balance_hy_df[mass_balance_hy_df.columns[14]].astype(str) + ' ' + mass_balance_hy_df[mass_balance_hy_df.columns[15]].astype(str) + ' ' + mass_balance_hy_df[mass_balance_hy_df.columns[16]].astype(str)
mass_balance_hy_df = mass_balance_hy_df.drop(
    columns = [mass_balance_hy_df.columns[13],
                   mass_balance_hy_df.columns[14],
                   mass_balance_hy_df.columns[15],
                   mass_balance_hy_df.columns[16]])
new_headers_mb_hy = mass_balance_hy_df.iloc[0]
mass_balance_hy_df = mass_balance_hy_df[1:]
mass_balance_hy_df.columns = new_headers_mb_hy
mass_balance_hy_df = mass_balance_hy_df.rename(columns = {'observer None None None': 'observer'})
mass_balance_hy_df['observer'] = (mass_balance_hy_df['observer'].str.replace('None', '', regex = False).str.replace(' - ', ' -', regex = False).str.strip())

mass_balance_hy_df.rename(columns = {'start date of observation': 'start date of observation (yyyy-mm-dd)'}, inplace = True)
mass_balance_hy_df.rename(columns = {'end date of winter observation': 'end date of winter observation (yyyy-mm-dd)'}, inplace=True)
mass_balance_hy_df.rename(columns = {'end date of observation': 'end date of observation (yyyy-mm-dd)'}, inplace = True)
mass_balance_hy_df.rename(columns = {'winter mass balance': 'winter mass balance (mm w.e.)'}, inplace = True)
mass_balance_hy_df.rename(columns = {'summer mass balance': 'summer mass balance (mm w.e.)'}, inplace = True)
mass_balance_hy_df.rename(columns = {'annual mass balance': 'annual mass balance (mm w.e.)'}, inplace = True)
mass_balance_hy_df.rename(columns = {'equilibrium line altitude': 'equilibrium line altitude (m asl.)'}, inplace = True)
mass_balance_hy_df.rename(columns = {'accumulation area ratio': 'accumulation area ratio (%)'}, inplace = True)
mass_balance_hy_df.rename(columns = {'glacier area': 'glacier area (km2)'}, inplace = True)
mass_balance_hy_df.rename(columns = {'minimum elevation of glacier': 'minimum elevation of glacier (m asl.)'}, inplace = True)
mass_balance_hy_df.rename(columns = {'maximum elevation of glacier': 'maximum elevation of glacier (m asl.)'}, inplace = True)


In [9]:
# Mass balance hydrological year with elevation bins

initial_mass_balance_hy_eb_df = pd.read_csv(url_mass_balance4, delimiter = '\t', skiprows = 4)
mass_balance_hy_eb_df = initial_mass_balance_hy_eb_df.drop(index = [1,2])
mass_balance_hy_eb_df = mass_balance_hy_eb_df['SWISS GLACIER MASS BALANCE (HYDROLOGICAL YEAR) ELEVATION BINS'].str.split(',', expand = True)
mass_balance_hy_eb_df = mass_balance_hy_eb_df.reset_index(drop = True)
# Merging the last 4 columns into a new column to get fewer less important columns and eventually cleaner data
mass_balance_hy_eb_df['merged columns 11,12,13,14'] = mass_balance_hy_eb_df[mass_balance_hy_eb_df.columns[11]].astype(str) + ' ' + mass_balance_hy_eb_df[mass_balance_hy_eb_df.columns[12]].astype(str) + ' ' + mass_balance_hy_eb_df[mass_balance_hy_eb_df.columns[13]].astype(str) + ' ' + mass_balance_hy_eb_df[mass_balance_hy_eb_df.columns[14]].astype(str)
mass_balance_hy_eb_df = mass_balance_hy_eb_df.drop(
columns = [mass_balance_hy_eb_df.columns[11],
               mass_balance_hy_eb_df.columns[12],
               mass_balance_hy_eb_df.columns[13],
               mass_balance_hy_eb_df.columns[14]])
new_headers_mb_hy_eb = mass_balance_hy_eb_df.iloc[0]
mass_balance_hy_eb_df = mass_balance_hy_eb_df[1:]
mass_balance_hy_eb_df.columns = new_headers_mb_hy_eb
mass_balance_hy_eb_df = mass_balance_hy_eb_df.rename(columns = {'observer None None None': 'observer'})
mass_balance_hy_eb_df['observer'] = (mass_balance_hy_eb_df['observer'].str.replace('None', '', regex = False).str.replace(' - ', ' -', regex = False).str.strip())

mass_balance_hy_eb_df.rename(columns = {'start date of observation': 'start date of observation (yyyy-mm-dd)'}, inplace = True)
mass_balance_hy_eb_df.rename(columns = {'end date of winter observation': 'end date of winter observation (yyyy-mm-dd)'}, inplace = True)
mass_balance_hy_eb_df.rename(columns = {'end date of observation': 'end date of observation (yyyy-mm-dd)'}, inplace = True)
mass_balance_hy_eb_df.rename(columns = {'winter mass balance': 'winter mass balance (mm w.e.)'}, inplace = True)
mass_balance_hy_eb_df.rename(columns = {'summer mass balance': 'summer mass balance (mm w.e.)'}, inplace = True)
mass_balance_hy_eb_df.rename(columns = {'annual mass balance': 'annual mass balance (mm w.e.)'}, inplace = True)
mass_balance_hy_eb_df.rename(columns = {'area of elevation bin': 'area of elevation bin (km2)'}, inplace = True)
mass_balance_hy_eb_df.rename(columns = {'lower elevation of bin': 'lower elevation of bin (m asl.)'}, inplace = True)
mass_balance_hy_eb_df.rename(columns = {'upper elevation of bin': 'upper elevation of bin (m asl.)'}, inplace = True)


In [10]:
# Create CSV files with the cleaned dataframes to work further

folder_path = "project-glaciers/data"
os.makedirs(folder_path, exist_ok=True)

glaciers_list_df.to_csv(os.path.join(folder_path, "glaciers_list.csv"), index = False)
length_change_df.to_csv(os.path.join(folder_path, "length_change.csv"), index = False)
volume_change_df.to_csv(os.path.join(folder_path, "volume_change.csv"), index = False)
mass_balance_df.to_csv(os.path.join(folder_path, "mass_balance_op.csv"), index = False)
mass_balance_eb_df.to_csv(os.path.join(folder_path, "mass_balance_op_eb.csv"), index = False)
mass_balance_hy_df.to_csv(os.path.join(folder_path, "mass_balance_hy.csv"), index = False)
mass_balance_hy_eb_df.to_csv(os.path.join(folder_path, "mass_balance_hy_eb.csv"), index = False)

In [11]:
try:
    metadata = pd.read_csv(url_metadata, delimiter=';', encoding='latin1')
except UnicodeDecodeError:
    try:
        metadata = pd.read_csv(url_metadata, delimiter=';', encoding='cp1252')
    except UnicodeDecodeError:
        try:
            metadata = pd.read_csv(url_metadata, delimiter=';', encoding='utf-16')
        except Exception as e:
            print(f"Failed to read file: {e}")

metadata = metadata[['parameter_shortname',
                         'parameter_description_en',
                         'parameter_unit'
                    ]]


In [17]:
sion_weather = pd.read_csv(url_sion, delimiter = ';')
sion_weather['date'] = pd.to_datetime(
    sion_weather['reference_timestamp'],
    format='%d.%m.%Y %H:%M'
)

sion_weather = sion_weather.drop(columns = ['station_abbr'])
sion_weather = sion_weather.set_index('date')

davos_weather = pd.read_csv(url_davos, delimiter = ';')
davos_weather['date'] = pd.to_datetime(
    davos_weather['reference_timestamp'],
    format='%d.%m.%Y %H:%M'
)
davos_weather = davos_weather.drop(columns = ['station_abbr'])
davos_weather = davos_weather.set_index('date')

sion_1914 = sion_weather[
    (sion_weather.index >= '1914-10-01') &
    (sion_weather.index < '2025-10-01')
]
sion_1914 = sion_1914.dropna(axis=1)
sion_1914 = sion_1914.drop('reference_timestamp', axis=1)


davos_1914 = davos_weather[
    (davos_weather.index >= '1914-10-01') &
    (davos_weather.index < '2025-10-01')
]
davos_1914 = davos_1914.dropna(axis=1)
davos_1914 = davos_1914.drop('reference_timestamp', axis=1)



In [28]:
metadata = metadata[metadata['parameter_shortname'].isin(davos_1914.columns)]
metadata = metadata.reset_index(drop = True)
metadata.to_csv(os.path.join(folder_path, "weather_metadata.csv"), index = False)
sion_1914.to_csv(os.path.join(folder_path, "weather_data_sion_monthly.csv"), index = True)
davos_1914.to_csv(os.path.join(folder_path, "weather_data_davos_monthly.csv"), index = True)

In [20]:
davos = davos_1914[['rhs150m0', 'shs000m0', 'ths200m0', 'ths2dymx', 'ths2dymn']].copy()
davos.rename(columns = {'rhs150m0': 'precipitation (mm)'}, inplace = True)
davos.rename(columns = {'shs000m0': 'sunshine (min)'}, inplace = True)
davos.rename(columns = {'ths200m0': 'temperature (°C)'}, inplace = True)
davos.rename(columns = {'ths2dymx': 'max_temperature (°C)'}, inplace = True)
davos.rename(columns = {'ths2dymn': 'min_temperature (°C)'}, inplace = True)

sion = sion_1914[['rhs150m0', 'shs000m0', 'ths200m0']].copy()
sion.rename(columns = {'rhs150m0': 'precipitation (mm)'}, inplace = True)
sion.rename(columns = {'shs000m0': 'sunshine (min)'}, inplace = True)
sion.rename(columns = {'ths200m0': 'temperature (°C)'}, inplace = True)

Unnamed: 0_level_0,precipitation (mm),sunshine (min),temperature (°C),max_temperature (°C),min_temperature (°C)
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1914-10-01,73.9,8267.0,3.2,8.8,-1.5
1914-11-01,42.1,5888.0,-2.6,2.5,-7.0
1914-12-01,21.7,4349.0,-3.4,1.1,-8.6
1915-01-01,101.3,3537.0,-7.1,-2.4,-12.1
1915-02-01,40.8,5433.0,-5.4,0.3,-11.2
...,...,...,...,...,...
2025-05-01,157.8,7027.0,7.1,12.2,2.8
2025-06-01,126.6,13901.0,14.6,21.2,8.6
2025-07-01,192.9,7651.0,11.9,17.2,7.5
2025-08-01,139.3,12149.0,13.5,19.4,8.4


In [26]:

sion_p_s = sion[['precipitation (mm)', 'sunshine (min)']].copy()
hy_data_sion_p_s = sion_p_s.groupby(np.arange(len(sion_p_s)) // 12).sum()

# Generate the date range for hydrological years (October to September)
start_date = '1914-10-01'
end_date = '2024-10-01'
hy_dates = pd.date_range(start = start_date, end = end_date, freq = 'YS-OCT')  # 'AS-OCT' = Annual, starting in October

# Assign the date range as the index
hy_data_sion_p_s.index = hy_dates


sion_temp = sion[['temperature (°C)']].copy()
sion_temp["days_in_month"] = sion_temp.index.days_in_month
sion_temp['temp*days'] = sion_temp['temperature (°C)'] * sion_temp['days_in_month']

n = 12  # Number of rows per group (12 months)
sion_temp = pd.DataFrame({
    'mean temperature (°C)': [
        sion_temp['temp*days'].iloc[i:i+n].sum() / sion_temp['days_in_month'].iloc[i:i+n].sum()
        for i in range(0, len(sion_temp), n)
    ]
})

sion_temp = sion_temp.round(1)
sion_temp.index = hy_dates


hy_data_sion = pd.concat([hy_data_sion_p_s, sion_temp], axis=1)
hy_data_sion = hy_data_sion.round(1)
hy_data_sion["date"] = hy_data_sion.index


Unnamed: 0,precipitation (mm),sunshine (min),mean temperature (°C),date
1914-10-01,523.6,120215.0,8.5,1914-10-01
1915-10-01,565.3,110469.0,8.4,1915-10-01
1916-10-01,514.9,121406.0,8.2,1916-10-01
1917-10-01,543.7,129935.0,8.2,1917-10-01
1918-10-01,622.7,132887.0,8.6,1918-10-01
...,...,...,...,...
2020-10-01,663.5,126887.0,10.6,2020-10-01
2021-10-01,403.5,147039.0,11.4,2021-10-01
2022-10-01,689.7,132301.0,12.0,2022-10-01
2023-10-01,787.5,115172.0,11.9,2023-10-01


In [25]:
# Get sums of precipitation and sunchine for the hydrological years
davos_p_s = davos[['precipitation (mm)', 'sunshine (min)']].copy()
hy_data_davos_p_s = davos_p_s.groupby(np.arange(len(davos_p_s)) // 12).sum()

# Generate the date range for hydrological years (October to September)
start_date = '1914-10-01'
end_date = '2024-10-01'
hy_dates = pd.date_range(start = start_date, end = end_date, freq = 'YS-OCT')  # 'AS-OCT' = Annual, starting in October

# Assign the date range as the index
hy_data_davos_p_s.index = hy_dates


# Compute average temperature for the hydrological years
davos_temp = davos[['temperature (°C)']].copy()
davos_temp["days_in_month"] = davos_temp.index.days_in_month
davos_temp['temp*days'] = davos_temp['temperature (°C)'] * davos_temp['days_in_month']

n = 12  # Number of rows per group (12 months)
davos_temp = pd.DataFrame({
    'mean temperature (°C)': [
        davos_temp['temp*days'].iloc[i:i+n].sum() / davos_temp['days_in_month'].iloc[i:i+n].sum()
        for i in range(0, len(davos_temp), n)
    ]
})

davos_temp = davos_temp.round(1)
davos_temp.index = hy_dates


# Compute average daily max temperature for the hydrological years
davos_temp_max = davos[['max_temperature (°C)']].copy()
davos_temp_max["days_in_month"] = davos_temp_max.index.days_in_month
davos_temp_max['temp*days'] = davos_temp_max['max_temperature (°C)'] * davos_temp_max['days_in_month']

n = 12  # Number of rows per group (12 months)
davos_temp_max = pd.DataFrame({
    'mean max daily temperature (°C)': [
        davos_temp_max['temp*days'].iloc[i:i+n].sum() / davos_temp_max['days_in_month'].iloc[i:i+n].sum()
        for i in range(0, len(davos_temp_max), n)
    ]
})

davos_temp_max = davos_temp_max.round(1)
davos_temp_max.index = hy_dates


# Compute average daily min temperature for the hydrological years
davos_temp_min = davos[['min_temperature (°C)']].copy()
davos_temp_min["days_in_month"] = davos_temp_min.index.days_in_month
davos_temp_min['temp*days'] = davos_temp_min['min_temperature (°C)'] * davos_temp_min['days_in_month']

n = 12  # Number of rows per group (12 months)
davos_temp_min = pd.DataFrame({
    'mean min daily temperature (°C)': [
        davos_temp_min['temp*days'].iloc[i:i+n].sum() / davos_temp_min['days_in_month'].iloc[i:i+n].sum()
        for i in range(0, len(davos_temp_min), n)
    ]
})

davos_temp_min = davos_temp_min.round(1)
davos_temp_min.index = hy_dates


hy_data_davos = pd.concat([hy_data_davos_p_s, davos_temp, davos_temp_max, davos_temp_min], axis=1)
hy_data_davos = hy_data_davos.round(1)
hy_data_davos['date'] = hy_data_davos.index
hy_data_davos = hy_data_davos.reset_index(drop = True)


Unnamed: 0,precipitation (mm),sunshine (min),mean temperature (°C),mean max daily temperature (°C),mean min daily temperature (°C),date
0,945.1,99753.0,2.3,7.6,-2.9,1914-10-01
1,1048.3,97771.0,2.2,7.3,-2.5,1915-10-01
2,988.9,111156.0,2.2,7.4,-2.9,1916-10-01
3,825.3,106304.0,1.8,7.1,-3.1,1917-10-01
4,953.5,105488.0,2.0,7.6,-2.8,1918-10-01
...,...,...,...,...,...,...
106,989.6,105430.0,3.9,9.2,-0.6,2020-10-01
107,810.9,117173.0,4.8,10.4,-0.0,2021-10-01
108,974.3,105650.0,5.4,10.9,0.9,2022-10-01
109,1269.0,93040.0,5.5,10.5,1.1,2023-10-01


In [27]:
hy_data_sion.to_csv(os.path.join(folder_path, "weather_data_sion_hy.csv"), index = False)
hy_data_davos.to_csv(os.path.join(folder_path, "weather_data_davos_hy.csv"), index = False)

In [56]:
# Define winter and summer months
winter_months = [10, 11, 12, 1, 2, 3, 4]   # Oct–Apr
summer_months = [5, 6, 7, 8, 9]            # May–Sep

def classify_season(date):
    if date.month in winter_months:
        return 'winter'
    else:
        return 'summer'

def season_year(date):
    # Winter belongs to the year of January (e.g. winter 1914-15 -> 1915)
    if date.month >= 10:
        return date.year + 1  # Oct–Dec -> next year
    else:
        return date.year

sion_seasonal = sion.copy()
sion_seasonal['season'] = sion_seasonal.index.to_series().apply(classify_season)
sion_seasonal['season_year'] = sion_seasonal.index.to_series().apply(season_year)

seasonal_sum = (
    sion_seasonal[['precipitation (mm)', 'sunshine (min)', 'season', 'season_year']]
    .groupby(['season_year', 'season'])
    .sum()
)

sion_seasonal['days_in_month'] = sion_seasonal.index.days_in_month
sion_seasonal['temp*days'] = sion_seasonal['temperature (°C)'] * sion_seasonal['days_in_month']

seasonal_temp = (
    sion_seasonal.groupby(['season_year', 'season'])
    .apply(lambda df: df['temp*days'].sum() / df['days_in_month'].sum())
    .to_frame(name='mean temperature (°C)')
)

seasonal_data = pd.concat([seasonal_sum, seasonal_temp], axis=1)
seasonal_data = seasonal_data.round(1)

def compute_season_date(row):
    season_year, season = row.name  # MultiIndex: (season_year, season)
    
    if season == "winter":
        return pd.Timestamp(season_year - 1, 10, 1)
    else:  # summer
        return pd.Timestamp(season_year, 5, 1)

seasonal_data['date'] = seasonal_data.apply(compute_season_date, axis=1)
# Make "date" the new index
seasonal_data = seasonal_data.set_index('date')

# Sort chronologically (important!)
seasonal_data = seasonal_data.sort_index()
seasonal_data = seasonal_data.reset_index()

# Even rows → winter (0, 2, 4, ...)
weather_sion_winter = seasonal_data.iloc[::2, :]  

# Odd rows → summer (1, 3, 5, ...)
weather_sion_summer = seasonal_data.iloc[1::2, :]

weather_sion_summer = weather_sion_summer.reset_index(drop=True)
weather_sion_winter = weather_sion_winter.reset_index(drop=True)
weather_sion_summer

  .apply(lambda df: df['temp*days'].sum() / df['days_in_month'].sum())


Unnamed: 0,date,precipitation (mm),sunshine (min),mean temperature (°C)
0,1915-05-01,213.3,70774.0,15.7
1,1916-05-01,210.4,67496.0,14.8
2,1917-05-01,246.4,74086.0,16.7
3,1918-05-01,271.1,77064.0,15.8
4,1919-05-01,124.4,83162.0,16.4
...,...,...,...,...
106,2021-05-01,376.1,70743.0,17.6
107,2022-05-01,194.6,83436.0,20.2
108,2023-05-01,231.8,77947.0,19.9
109,2024-05-01,279.3,67714.0,18.5


In [57]:

davos_seasonal = davos.copy()
davos_seasonal['season'] = davos_seasonal.index.to_series().apply(classify_season)
davos_seasonal['season_year'] = davos_seasonal.index.to_series().apply(season_year)

seasonal_sum = (
    davos_seasonal[['precipitation (mm)', 'sunshine (min)', 'season', 'season_year']]
    .groupby(['season_year', 'season'])
    .sum()
)

davos_seasonal['days_in_month'] = davos_seasonal.index.days_in_month
davos_seasonal['temp*days'] = davos_seasonal['temperature (°C)'] * sion_seasonal['days_in_month']
davos_seasonal['temp_max*days'] = davos_seasonal['max_temperature (°C)'] * sion_seasonal['days_in_month']
davos_seasonal['temp_min*days'] = davos_seasonal['min_temperature (°C)'] * sion_seasonal['days_in_month']


seasonal_temp_d= (
    davos_seasonal.groupby(['season_year', 'season'])
    .apply(lambda df: df['temp*days'].sum() / df['days_in_month'].sum())
    .to_frame(name='mean temperature (°C)')
)


seasonal_temp_d_max= (
    davos_seasonal.groupby(['season_year', 'season'])
    .apply(lambda df: df['temp_max*days'].sum() / df['days_in_month'].sum())
    .to_frame(name='mean max daily temperature (°C)')
)

seasonal_temp_d_min= (
    davos_seasonal.groupby(['season_year', 'season'])
    .apply(lambda df: df['temp_min*days'].sum() / df['days_in_month'].sum())
    .to_frame(name='mean min daily temperature (°C)')
)


seasonal_data_d = pd.concat([seasonal_sum, seasonal_temp_d, seasonal_temp_d_max, seasonal_temp_d_min], axis=1)
seasonal_data_d = seasonal_data_d.round(1)

def compute_season_date(row):
    season_year, season = row.name  # MultiIndex: (season_year, season)
    
    if season == "winter":
        return pd.Timestamp(season_year - 1, 10, 1)
    else:  # summer
        return pd.Timestamp(season_year, 5, 1)

seasonal_data_d['date'] = seasonal_data_d.apply(compute_season_date, axis=1)
# Make "date" the new index
seasonal_data_d = seasonal_data_d.set_index('date')

# Sort chronologically (important!)
seasonal_data_d = seasonal_data_d.sort_index()
seasonal_data_d = seasonal_data_d.reset_index()

# Even rows → winter (0, 2, 4, ...)
weather_davos_winter = seasonal_data_d.iloc[::2, :]  

# Odd rows → summer (1, 3, 5, ...)
weather_davos_summer = seasonal_data_d.iloc[1::2, :]

weather_davos_summer = weather_davos_summer.reset_index(drop=True)
weather_davos_winter = weather_davos_winter.reset_index(drop=True)
weather_davos_summer

  .apply(lambda df: df['temp*days'].sum() / df['days_in_month'].sum())
  .apply(lambda df: df['temp_max*days'].sum() / df['days_in_month'].sum())
  .apply(lambda df: df['temp_min*days'].sum() / df['days_in_month'].sum())


Unnamed: 0,date,precipitation (mm),sunshine (min),mean temperature (°C),mean max daily temperature (°C),mean min daily temperature (°C)
0,1915-05-01,547.7,54361.0,9.0,14.6,3.6
1,1916-05-01,595.3,51709.0,7.9,13.1,3.0
2,1917-05-01,505.0,63540.0,10.1,15.7,4.5
3,1918-05-01,457.1,54944.0,8.6,14.0,3.5
4,1919-05-01,448.2,61665.0,8.4,14.4,3.0
...,...,...,...,...,...,...
106,2021-05-01,533.8,50780.0,10.5,16.1,5.7
107,2022-05-01,491.4,56842.0,12.1,17.9,6.9
108,2023-05-01,595.9,52639.0,12.2,18.0,7.1
109,2024-05-01,579.0,47106.0,11.6,17.1,6.9


In [58]:
weather_sion_summer.to_csv(os.path.join(folder_path, "weather_data_sion_summer.csv"), index = False)
weather_sion_winter.to_csv(os.path.join(folder_path, "weather_data_sion_winter.csv"), index = False)

weather_davos_summer.to_csv(os.path.join(folder_path, "weather_data_davos_summer.csv"), index = False)
weather_davos_winter.to_csv(os.path.join(folder_path, "weather_data_davos_winter.csv"), index = False)
