In [3]:
import numpy as np
from typing import List
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import os 


Create a X Design matrix made in shape (400*600, 24)
when each row is a grid point and each feature is a model.

In [4]:
import numpy as np
from typing import List

def flatten_models_to_grid_matrix(interpolated_precip_list: List[np.ndarray]) -> np.ndarray:
    """
    Create a design matrix X where each row represents a grid point, and each column 
    (feature) represents the flattened precipitation data from a different model.

    Parameters:
    -----------
    interpolated_precip_list : List[np.ndarray]
        A list of 2D arrays (matrices) with dimensions (400, 600) representing 
        precipitation data from different models. Each matrix corresponds to a different 
        model.

    Returns:
    --------
    np.ndarray
        A 2D array (design matrix X) of shape (240000, num_models), where:
        - Each row corresponds to a specific grid point across all models.
        - Each column corresponds to the precipitation data for that grid point from 
          a particular model.

    Notes:
    ------
    - The output design matrix is of shape (240000, num_models), where:
        - 240,000 = 400 * 600 represents all grid points.
        - num_models is the number of models, i.e., the length of the input list.
    - Each row in the matrix corresponds to a unique grid point from the (400, 600) 
      grid, and each feature (column) corresponds to one of the models.
    """
    
    # Number of models and number of grid points
    num_models = len(interpolated_precip_list)
    num_grid_points = 400 * 600

    # Initialize an empty matrix to store the reshaped data
    X = np.zeros((num_grid_points, num_models))

    # Loop over each model's matrix and reshape it into a vector
    for i, precip_matrix in enumerate(interpolated_precip_list):
        # Reshape each (400, 600) matrix to (240000,) and assign it to the ith column
        X[:, i] = precip_matrix.reshape(-1)

    return X


# Flatening the Data and Training the Models

In [5]:
# get correlation and precipitation data in 24-sized list 
models_precipitation : list[np.ndarray] = []
models_correlation : list[np.ndarray] = []
# TODO we need to see how to separate to test and train so it make sense
X_precipitation : np.ndarray = flatten_models_to_grid_matrix(models_precipitation)
X_correlation : np.ndarray = flatten_models_to_grid_matrix(models_correlation)

y_precipitation : np.ndarray = np.mean(X_precipitation, axis=1)
y_correlation : np.ndarray = np.mean(X_correlation, axis=1)

reg_model_precipitation = LinearRegression()
reg_model_correlation = LinearRegression()

reg_model_precipitation.fit(X_precipitation, y_precipitation)
reg_model_correlation.fit(X_correlation, y_correlation)

    

# coefficient evaluetion

In [6]:

def plot_coefficients(coefficients: np.ndarray, model_names: List[str], title: str):
    """
    Plot the coefficients of the regression model.

    Parameters:
    -----------
    coefficients : np.ndarray
        The regression coefficients for each model.
        
    model_names : List[str]
        The names or labels of the models corresponding to the coefficients.

    title : str
        Title of the plot.
    """
    plt.figure(figsize=(10, 6))
    plt.barh(model_names, coefficients, color='skyblue')
    plt.xlabel('Coefficient Value')
    plt.title(title)
    plt.show()

In [18]:
CMIP_PATH = os.path.join('..','Data','CMIP6','Precipitation')
model_names = [f'Model {model_name}' for model_name in os.listdir(CMIP_PATH)]  # Replace with actual model names if available
plot_coefficients(precipitation_coefficients, model_names, 'Precipitation Model Coefficients')
plot_coefficients(correlation_coefficients, model_names, 'Correlation Model Coefficients')


NameError: name 'precipitation_coefficients' is not defined