In [1]:
import numpy as np
import xarray as xr
import statsmodels.api as sm
from statsmodels.regression.linear_model import OLS
from sklearn.metrics import mean_squared_error, r2_score

data_home = "/mnt/g/My Drive/GTC/solodoch_data_minimal"
lats = ["26N", "30S", "55S", "60S"]

In [2]:
lat = lats[0]
data = xr.open_dataset(f"{data_home}/{lat}.nc")

data_np = data.to_array().values
# time x lon x feature
data_np = data_np.transpose((2, 1, 3, 0)).squeeze(0)

# dummy strength data
strength = np.random.rand(*(data_np.shape[:-1]))
X = data_np.mean(axis=1); y = strength.mean(axis=-1)

In [3]:
# linear regression and variants

# Assuming X and y are your numpy arrays
X = sm.add_constant(X) 

# linear regression
model_ols = OLS(y, X).fit()

# robust linear regression
model_robust = sm.RLM(y, X).fit()

# ridge regression
model_ridge = OLS(y, X).fit_regularized(alpha=0.1, L1_wt=0)

def eval(model, X, y):
    predictions = model.predict(X)
    mse = mean_squared_error(y, predictions)
    r2 = r2_score(y, predictions)
    return r2, mse

r2_ols, mse_ols = eval(model_ols, X, y)
r2_robust, mse_robust = eval(model_robust, X, y)
r2_ridge, mse_ridge = eval(model_ridge, X, y)

# Print performance metrics
print(f"OLS: R-squared = {r2_ols}, MSE = {mse_ols}")
print(f"Robust: R-squared = {r2_robust}, MSE = {mse_robust}")
print(f"Ridge: R-squared = {r2_ridge}, MSE = {mse_ridge}")

OLS: R-squared = 0.04120167667950103, MSE = 0.00045430303326965464
Robust: R-squared = 0.040675550302787244, MSE = 0.00045455232532931996
Ridge: R-squared = 0.0038845441528403413, MSE = 0.000471984839847158
