In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.ensemble import RandomForestRegressor


In [2]:
mass_balance_df = pd.read_csv('project-glaciers/data/mass_balance_hy.csv')
davos_dev = pd.read_csv('project-glaciers/data/weather_dev_davos_temp.csv')
davos_dev_p = pd.read_csv('project-glaciers/data/weather_dev_davos_prec.csv')
sion_dev_t = pd.read_csv('project-glaciers/data/weather_dev_sion_temp.csv')
sion_dev_p = pd.read_csv('project-glaciers/data/weather_dev_sion_prec.csv')
jungfraujoch = pd.read_csv('project-glaciers/data/weather_dev_jungfraujoch.csv')
mass_balance_hy_eb_df = pd.read_csv('project-glaciers/data/mass_balance_hy_eb.csv')
davos_dev = davos_dev.drop(columns = 'hydrological year')
silvretta_mb = mass_balance_df[mass_balance_df['glacier name'] == 'Silvrettagletscher']
silvretta_mb_hy = silvretta_mb[['annual mass balance (mm w.e.)']]
silvretta_mb_w = silvretta_mb[['winter mass balance (mm w.e.)']]
silvretta_mb_s = silvretta_mb[['summer mass balance (mm w.e.)']]
aletsch_mb = mass_balance_df[mass_balance_df['glacier name'] == 'Grosser Aletschgletscher']
aletsch_mb_hy = aletsch_mb[['annual mass balance (mm w.e.)']].reset_index(drop = True)
aletsch_mb_s = aletsch_mb[['summer mass balance (mm w.e.)']].reset_index(drop = True)
aletsch_mb_w = aletsch_mb[['winter mass balance (mm w.e.)']].reset_index(drop = True)


allalin_mb = mass_balance_df[mass_balance_df['glacier name'] == 'Silvrettagletscher']
allalin_mb_hy = silvretta_mb[['annual mass balance (mm w.e.)']]
allalin_mb_w = silvretta_mb[['winter mass balance (mm w.e.)']]
allalin_mb_s = silvretta_mb[['summer mass balance (mm w.e.)']]
gries_mb = mass_balance_df[mass_balance_df['glacier name'] == 'Grosser Aletschgletscher']
gries_mb_hy = aletsch_mb[['annual mass balance (mm w.e.)']].reset_index(drop = True)
gries_mb_s = aletsch_mb[['summer mass balance (mm w.e.)']].reset_index(drop = True)
aletsch_mb_w = aletsch_mb[['winter mass balance (mm w.e.)']].reset_index(drop = True)

davos_dev_p = davos_dev_p.drop(columns = 'hydrological year')


In [3]:
# Merge on index
silvretta_reg_t = silvretta_mb_hy.merge(davos_dev, left_index=True, right_index=True)
silvretta_reg = silvretta_reg_t.merge(davos_dev_p, left_index=True, right_index=True)
# Define predictors and target
X = silvretta_reg[['april_td', 'may_td', 'june_td', 'july_td',
                       'august_td', 'september_td', 
                       'october_pd', 'november_pd', 
                       'december_pd', 'january_pd',
                       'february_pd', 'march_pd',
                       'april_pd', 'may_pd', 
                       'june_pd', 'july_pd',
                       'august_pd', 'september_pd'
                  ]] 
Y = silvretta_reg_t['annual mass balance (mm w.e.)']

# Add intercept for statsmodels
X = sm.add_constant(X)

# Fit regression
model = sm.OLS(Y, X).fit()

print(model.summary())


                                  OLS Regression Results                                 
Dep. Variable:     annual mass balance (mm w.e.)   R-squared:                       0.780
Model:                                       OLS   Adj. R-squared:                  0.736
Method:                            Least Squares   F-statistic:                     18.07
Date:                           Thu, 27 Nov 2025   Prob (F-statistic):           9.32e-23
Time:                                   18:55:09   Log-Likelihood:                -829.88
No. Observations:                            111   AIC:                             1698.
Df Residuals:                                 92   BIC:                             1749.
Df Model:                                     18                                         
Covariance Type:                       nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
-------------------

In [4]:
silvretta_reg_t_s = silvretta_mb_s.merge(davos_dev, left_index=True, right_index=True)

# Define predictors and target
X = silvretta_reg_t_s[['april_td', 'may_td', 'june_td', 'july_td',
                         'august_td', 'september_td'
                    ]] 
Y = silvretta_reg_t_s['summer mass balance (mm w.e.)']

# Add intercept for statsmodels
X = sm.add_constant(X)

# Fit regression
model = sm.OLS(Y, X).fit()

print(model.summary())

                                  OLS Regression Results                                 
Dep. Variable:     summer mass balance (mm w.e.)   R-squared:                       0.723
Model:                                       OLS   Adj. R-squared:                  0.707
Method:                            Least Squares   F-statistic:                     45.22
Date:                           Thu, 27 Nov 2025   Prob (F-statistic):           7.87e-27
Time:                                   18:55:52   Log-Likelihood:                -836.63
No. Observations:                            111   AIC:                             1687.
Df Residuals:                                104   BIC:                             1706.
Df Model:                                      6                                         
Covariance Type:                       nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
-------------------

In [5]:
silvretta_reg_t_w = silvretta_mb_w.merge(davos_dev_p, left_index=True, right_index=True)

# Define predictors and target
x = silvretta_reg_t_w[['october_pd', 'november_pd', 'december_pd',
                           'january_pd', 'february_pd', 'march_pd'
                      ]] 
y = silvretta_reg_t_w['winter mass balance (mm w.e.)']

# Add intercept for statsmodels
x = sm.add_constant(x)

# Fit regression
model = sm.OLS(y, x).fit()

print(model.summary())

                                  OLS Regression Results                                 
Dep. Variable:     winter mass balance (mm w.e.)   R-squared:                       0.519
Model:                                       OLS   Adj. R-squared:                  0.491
Method:                            Least Squares   F-statistic:                     18.67
Date:                           Thu, 27 Nov 2025   Prob (F-statistic):           1.23e-14
Time:                                   18:55:56   Log-Likelihood:                -790.75
No. Observations:                            111   AIC:                             1595.
Df Residuals:                                104   BIC:                             1614.
Df Model:                                      6                                         
Covariance Type:                       nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
--------------------

In [6]:
x = silvretta_reg_t_s.drop(columns = 'summer mass balance (mm w.e.)').values
y = silvretta_reg_t_s['summer mass balance (mm w.e.)'].values

train_indices = []
test_indices = []

for i in range(len(silvretta_reg_t_s)):
    if (i % 4) == 3:   # every 4th row → test
        test_indices.append(i)
    else:             # all others → train
        train_indices.append(i)

# Build train/test sets
x_train = x[train_indices]
y_train = y[train_indices]

x_test = x[test_indices]
y_test = y[test_indices]

# Fit the model
model = LinearRegression()
model.fit(x_train, y_train)

# Predict
y_pred = model.predict(x_test)

# Compute RMSE
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print("RMSE:", rmse)

RMSE: 473.8834909803635


In [41]:
aletsch_mb_eb_3600_s = mass_balance_hy_eb_df[(mass_balance_hy_eb_df['glacier name'] == 'Grosser Aletschgletscher') &
                 (mass_balance_hy_eb_df['upper elevation of bin (m asl.)'] == 3600)]

aletsch_mb_eb_3600_s = aletsch_mb_eb_3600_s[['start date of observation (yyyy-mm-dd)',
                                                   'summer mass balance (mm w.e.)'
                                              ]]
aletsch_mb_eb_3600_s = aletsch_mb_eb_3600_s.reset_index(drop=True)
aletsch_mb_eb_3600_s['start date of observation (yyyy-mm-dd)'] = pd.to_datetime(
    aletsch_mb_eb_3600_s['start date of observation (yyyy-mm-dd)']
)

cutoff = pd.to_datetime("1933-09-30")

aletsch_mb_eb_3600_s = aletsch_mb_eb_3600_s[
    aletsch_mb_eb_3600_s['start date of observation (yyyy-mm-dd)'] > cutoff
].reset_index(drop=True)
aletsch_mb_eb_3600_s = aletsch_mb_eb_3600_s[['summer mass balance (mm w.e.)']]

jun_t = jungfraujoch.drop(columns = 'hydrological year')



In [42]:
aletsch_reg_t_s_3600 = aletsch_mb_eb_3600_s.merge(jun_t, left_index=True, right_index=True)

# Define predictors and target
x = aletsch_reg_t_s_3600[['may_td', 'june_td', 'july_td','august_td', 'september_td']] 
y = aletsch_reg_t_s_3600['summer mass balance (mm w.e.)']

# Add intercept for statsmodels
x = sm.add_constant(x)

# Fit regression
model = sm.OLS(y, x).fit()

print(model.summary())

                                  OLS Regression Results                                 
Dep. Variable:     summer mass balance (mm w.e.)   R-squared:                       0.388
Model:                                       OLS   Adj. R-squared:                  0.352
Method:                            Least Squares   F-statistic:                     10.89
Date:                           Thu, 27 Nov 2025   Prob (F-statistic):           3.88e-08
Time:                                   17:54:55   Log-Likelihood:                -669.38
No. Observations:                             92   AIC:                             1351.
Df Residuals:                                 86   BIC:                             1366.
Df Model:                                      5                                         
Covariance Type:                       nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
-------------------

In [43]:
# Merge on index
aletsch_reg_t = aletsch_mb_hy.merge(sion_dev_t, left_index=True, right_index=True)
aletsch_reg = aletsch_reg_t.merge(sion_dev_p, left_index=True, right_index=True)

# Define predictors and target
x = aletsch_reg[['april_pd', 'may_td', 'june_td', 'july_td',
                     'august_td', 'september_td',
                     'october_pd', 'november_pd', 
                     'december_pd', 'january_pd', 
                     'february_pd', 'march_pd'
                ]] 

y = aletsch_reg['annual mass balance (mm w.e.)']

# Add intercept for statsmodels
x = sm.add_constant(x)

# Fit regression
model = sm.OLS(y, x).fit()

print(model.summary())


                                  OLS Regression Results                                 
Dep. Variable:     annual mass balance (mm w.e.)   R-squared:                       0.760
Model:                                       OLS   Adj. R-squared:                  0.731
Method:                            Least Squares   F-statistic:                     25.86
Date:                           Thu, 27 Nov 2025   Prob (F-statistic):           3.55e-25
Time:                                   17:54:56   Log-Likelihood:                -810.49
No. Observations:                            111   AIC:                             1647.
Df Residuals:                                 98   BIC:                             1682.
Df Model:                                     12                                         
Covariance Type:                       nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
-------------------

In [44]:
# Merge on index
aletsch_reg_w = aletsch_mb_w.merge(sion_dev_p, left_index=True, right_index=True)
aletsch_reg_w_60 = aletsch_reg_w[:60]
aletsch_reg_w_last50 = aletsch_reg_w[-50:]



# Define predictors and target
x = aletsch_reg_w[['october_pd', 'november_pd', 
                              'december_pd', 'january_pd', 
                              'february_pd', 'march_pd', 'april_pd'
                         ]] 

y = aletsch_reg_w['winter mass balance (mm w.e.)']

# Add intercept for statsmodels
x = sm.add_constant(x)

# Fit regression
model = sm.OLS(y, x).fit()

print(model.summary())


                                  OLS Regression Results                                 
Dep. Variable:     winter mass balance (mm w.e.)   R-squared:                       0.331
Model:                                       OLS   Adj. R-squared:                  0.286
Method:                            Least Squares   F-statistic:                     7.283
Date:                           Thu, 27 Nov 2025   Prob (F-statistic):           4.39e-07
Time:                                   17:54:56   Log-Likelihood:                -776.52
No. Observations:                            111   AIC:                             1569.
Df Residuals:                                103   BIC:                             1591.
Df Model:                                      7                                         
Covariance Type:                       nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
--------------------

In [45]:
# Merge on index
aletsch_reg_s = aletsch_mb_s.merge(sion_dev_t, left_index=True, right_index=True)

# Define predictors and target
x = aletsch_reg_s[['may_td', 'june_td', 'july_td',
                       'august_td', 'september_td',
                  ]] 

y = aletsch_reg_s['summer mass balance (mm w.e.)']

# Add intercept for statsmodels
x = sm.add_constant(x)

# Fit regression
model = sm.OLS(y, x).fit()

print(model.summary())

                                  OLS Regression Results                                 
Dep. Variable:     summer mass balance (mm w.e.)   R-squared:                       0.716
Model:                                       OLS   Adj. R-squared:                  0.702
Method:                            Least Squares   F-statistic:                     52.89
Date:                           Thu, 27 Nov 2025   Prob (F-statistic):           3.76e-27
Time:                                   17:54:57   Log-Likelihood:                -817.28
No. Observations:                            111   AIC:                             1647.
Df Residuals:                                105   BIC:                             1663.
Df Model:                                      5                                         
Covariance Type:                       nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
-------------------

In [46]:
df = aletsch_reg_t_s_3600.copy()

# Summer months
summer_months = ["may", "june", "july", "august", "september"]


for m in summer_months:
    td_col = f"{m}_td"  # deviation column
    T_col  = f"{m}_t"   # actual temperature column
    eff_col = f"{m}_eff"  # new effective predictor

    # Make sure columns exist
    if td_col in df.columns and T_col in df.columns:
        df[eff_col] = np.where(df[T_col] > 0, df[td_col], 0)
    else:
        print(f"Missing column for month {m}: {td_col} or {T_col}")

x = df[[f"{m}_eff" for m in summer_months]]
y = df["summer mass balance (mm w.e.)"]

x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, random_state=42
)
model = RandomForestRegressor(
    n_estimators=600,
    max_depth=None,
    random_state=42
)

model.fit(x_train, y_train)
pred = model.predict(x_test)

print("R² =", r2_score(y_test, pred))
print("MSE =", mean_squared_error(y_test, pred))

R² = 0.38082506871568966
MSE = 93497.9153359802
