In [None]:
import os
import pandas as pd
import numpy as np
import re
import threading
import time
import pickle  # Add this import
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from lightgbm import LGBMRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
import xgboost as xgb
import sys
sys.stdout.reconfigure(line_buffering=True)

In [None]:
# 2. Define the column cleaning functions
def clean_column_names(df):
    """
    Clean column names by:
    - Replacing non-alphanumeric characters with underscores.
    - Ensuring column names start with a letter.
    - Making column names unique.
    """
    # Replace any sequence of non-word characters with a single underscore
    df.columns = [
        re.sub(r'\W+', '_', col).strip('_') for col in df.columns
    ]

    # Ensure column names start with a letter by prefixing with 'f_' if necessary
    df.columns = [
        col if re.match(r'^[A-Za-z]', col) else f'f_{col}' for col in df.columns
    ]

    # Ensure uniqueness by appending suffixes to duplicate names
    seen = {}
    new_columns = []
    for col in df.columns:
        if col in seen:
            seen[col] += 1
            new_columns.append(f"{col}_{seen[col]}")
        else:
            seen[col] = 0
            new_columns.append(col)
    df.columns = new_columns

    return df

In [None]:
# 3. Define the model training function
def train_model(model_name, model, X_train, y_train, X_test, y_test, results, training_threshold, dataset_name):
    y_pred = [None]
    training_time = [None]
    training_completed = [False]

    def train():
        start_time = time.time()
        try:
            print(f"Starting training for {model_name}...")
            model.fit(X_train, y_train)
            y_pred[0] = model.predict(X_test)
            training_time[0] = time.time() - start_time
            training_completed[0] = True
            print(f"Completed training for {model_name} in {training_time[0]:.2f} seconds.")
        except Exception as e:
            print(f"Error training model {model_name}: {e}")
            training_completed[0] = False

    thread = threading.Thread(target=train)
    thread.start()
    thread.join(timeout=training_threshold)

    if not training_completed[0]:
        print(f"Model {model_name} exceeded training time ({training_threshold} seconds) or encountered an error.")
        y_pred[0] = np.nan
        training_time[0] = np.nan
    else:
        mse = mean_squared_error(y_test, y_pred[0])
        rmse = np.sqrt(mse)
        mae = mean_absolute_error(y_test, y_pred[0])
        r_squared = r2_score(y_test, y_pred[0])

        n = len(y_test)
        p = X_test.shape[1]
        if n > p + 1 and p > 0:
            adjusted_r_squared = 1 - (1 - r_squared) * ((n - 1) / (n - p - 1))
        else:
            adjusted_r_squared = r_squared

        print(f"Model {model_name} trained successfully in {training_time[0]:.2f} seconds.")
        print(f"MSE: {mse}, RMSE: {rmse}, MAE: {mae}, R²: {r_squared}, Adjusted R²: {adjusted_r_squared}")

        best_params = None
        if isinstance(model, GridSearchCV):
            best_params = model.best_params_
            print(f"Best parameters for {model_name}: {best_params}")

        models_dir = os.path.join("models", dataset_name)
        os.makedirs(models_dir, exist_ok=True)
        model_filename = f"{model_name.replace(' ', '_')}.pkl"
        model_filepath = os.path.join(models_dir, model_filename)
        with open(model_filepath, 'wb') as f:
            pickle.dump(model, f)
        print(f"Model {model_name} saved to {model_filepath}")

        result = {
            'Model': model_name,
            'Dataset': dataset_name,
            'Training Time (s)': training_time[0],
            'MSE': mse,
            'RMSE': rmse,
            'MAE': mae,
            'R2 Score': r_squared,
            'Adjusted R2 Score': adjusted_r_squared
        }
        if best_params:
            result['Best Params'] = str(best_params)
        results.append(result)

In [None]:
# 4. Main Script
def main():
    # Define parameter grids for each model
    param_grids = {
        'Ridge Regression': {
            'ridge__alpha': [0.1, 1.0, 10.0, 100.0]
        },
        'Lasso Regression': {
            'lasso__alpha': [0.01, 0.1, 1.0, 10.0]
        },
        'Elastic Net Regression': {
            'elasticnet__alpha': [0.01, 0.1, 1.0, 10.0],
            'elasticnet__l1_ratio': [0.1, 0.3, 0.5, 0.7, 0.9]
        },
        'LightGBM Regression': {
            'lightgbm__num_leaves': [31, 50, 70],
            'lightgbm__learning_rate': [0.01, 0.05, 0.1],
            'lightgbm__n_estimators': [100, 200, 500]
        },
        'Random Forest Regression': {
            'randomforest__n_estimators': [100, 200, 500],
            'randomforest__max_depth': [10, 20, None],
            'randomforest__min_samples_split': [2, 5, 10]
        },
        'Support Vector Regression': {
            'svr__C': [0.1, 1, 10],
            'svr__epsilon': [0.1, 0.2, 0.5],
            'svr__kernel': ['linear', 'rbf']
        },
        'XGBoost Regression': {
            'xgboost__learning_rate': [0.01, 0.05, 0.1],
            'xgboost__max_depth': [3, 5, 7],
            'xgboost__n_estimators': [100, 200, 500]
        }
    }

    # Define pipelines for each model
    pipelines = {
        'Linear Regression': Pipeline([
            ('linearregression', LinearRegression())
        ]),
        'Ridge Regression': Pipeline([
            ('ridge', Ridge())
        ]),
        'Lasso Regression': Pipeline([
            ('lasso', Lasso())
        ]),
        'Elastic Net Regression': Pipeline([
            ('elasticnet', ElasticNet())
        ]),
        'LightGBM Regression': Pipeline([
            ('lightgbm', LGBMRegressor())
        ]),
        'Random Forest Regression': Pipeline([
            ('randomforest', RandomForestRegressor())
        ]),
        # 'Support Vector Regression': Pipeline([
        #     ('svr', SVR())
        # ]),
        'XGBoost Regression': Pipeline([
            ('xgboost', xgb.XGBRegressor(use_label_encoder=False, eval_metric='rmse'))
        ]),
    }

    # Create GridSearchCV objects
    models = {}
    for name, pipeline in pipelines.items():
        if name in param_grids:
            models[name] = GridSearchCV(
                estimator=pipeline,
                param_grid=param_grids[name],
                cv=5,
                scoring='r2',
                n_jobs=-1,
                verbose=1
            )
        else:
            models[name] = pipeline  # Linear Regression has no hyperparameters

    # Training threshold
    training_threshold = 7200  # seconds

    # List of preprocessed datasets
    datasets = ['dataset5.csv', 'dataset4.csv', 'dataset3.csv', 'dataset2.csv', 'dataset1.csv']

    # Results folder
    results_dir = "model_results"
    os.makedirs(results_dir, exist_ok=True)

    # Models directory
    models_dir = "models"
    os.makedirs(models_dir, exist_ok=True)

    # Process each dataset
    for dataset_name in datasets:
        print(f"\nProcessing {dataset_name}")

        # Read dataset
        try:
            data = pd.read_csv(dataset_name)
            print(f"Successfully read {dataset_name}")
        except Exception as e:
            print(f"Error reading {dataset_name}: {e}")
            continue

        # Clean column names
        data = clean_column_names(data)
        print("Column names after cleaning:", data.columns.tolist())

        # Verify column names
        problematic_columns = verify_column_names(data)
        if problematic_columns:
            print(f"Problematic columns after cleaning: {problematic_columns}")
            print("Further cleaning or renaming may be required.")
        else:
            print("All column names are clean and compatible with models.")

        # Identify numerical and categorical columns
        numerical = data.select_dtypes(include=['int64', 'float64']).columns.tolist()
        categorical = data.select_dtypes(include=['object']).columns.tolist()

        print(f"Numerical columns: {numerical}")
        print(f"Categorical columns: {categorical}")

        # Define target variable
        target_variable = 'diem_hp'
        if target_variable not in data.columns:
            print(f"Target variable '{target_variable}' not found in dataset '{dataset_name}'. Skipping this dataset.")
            continue
        if target_variable in numerical:
            numerical.remove(target_variable)
        if target_variable in categorical:
            categorical.remove(target_variable)

        # Define features and target
        X = data.drop(columns=[target_variable])
        y = data[target_variable]

        # Specify the group column
        group_column = 'hocky_monhoc_count'

        if group_column not in data.columns:
            print(f"Group column '{group_column}' not found in dataset '{dataset_name}'.")
            print("Proceeding with a standard train-test split without grouping.")
            try:
                X_train, X_test, y_train, y_test = train_test_split(
                    X, y, test_size=0.2, random_state=42, shuffle=True
                )
                print(f"Data split into Training ({len(X_train)} samples) and Testing ({len(X_test)} samples) sets.")
            except ValueError as e:
                print(f"Error splitting data for dataset '{dataset_name}': {e}")
                continue
        else:
            print(f"\nGroup column '{group_column}' found. Proceeding with grouped split.")
            print(f"Initial data shape: {data.shape}")
            
            # Split data using split_data_by_group
            train_set, val_set, test_set = split_data_by_group(
                data, 
                group_col=group_column,
                train_ratio=0.8,
                val_ratio=0.2,
                test_ratio=None
            )
            
            if len(train_set) == 0 or len(test_set) == 0:
                print("\nFallback to standard train-test split due to empty sets...")
                X_train, X_test, y_train, y_test = train_test_split(
                    X, y, test_size=0.2, random_state=42, shuffle=True
                )
            else:
                # Define features and targets
                X_train = train_set.drop(columns=[target_variable])
                y_train = train_set[target_variable]
                X_val = val_set.drop(columns=[target_variable])
                y_val = val_set[target_variable]
                X_test = test_set.drop(columns=[target_variable])
                y_test = test_set[target_variable]

                print(f"\nFinal shapes:")
                print(f"Training set: {X_train.shape}")
                print(f"Validation set: {X_val.shape}")
                print(f"Testing set: {X_test.shape}")

        # Encode categorical variables
        if group_column in categorical:
            categorical.remove(group_column)

        use_validation = 'X_val' in locals()

        if use_validation:
            X_to_encode = [X_train, X_val, X_test]
            y_to_encode = [y_train, y_val, y_test]
        else:
            X_to_encode = [X_train, X_test]
            y_to_encode = [y_train, y_test]

        for col in categorical:
            for df in X_to_encode:
                if col in df.columns:
                    df[col] = df[col].astype('category').cat.codes
            print(f"Label encoded categorical column: {col}")

        if use_validation:
            X_train, X_val, X_test = X_to_encode
            y_train, y_val, y_test = y_to_encode
        else:
            X_train, X_test = X_to_encode
            y_train, y_test = y_to_encode

        print("All categorical features are now label encoded.")

        # Handle missing values
        if use_validation:
            X_train = X_train.fillna(0)
            X_val = X_val.fillna(0)
            X_test = X_test.fillna(0)
            y_train = y_train.fillna(0)
            y_val = y_val.fillna(0)
            y_test = y_test.fillna(0)
        else:
            X_train = X_train.fillna(0)
            X_test = X_test.fillna(0)
            y_train = y_train.fillna(0)
            y_test = y_test.fillna(0)

        print("All features are now numeric and missing values are handled.")

        # Prepare results storage
        results = []

        # Train and evaluate each model
        for model_name, model in models.items():
            print(f"\nTraining model: {model_name}")

            if use_validation and model_name in ['LightGBM Regression', 'Random Forest Regression', 'XGBoost Regression']:
                # If using validation, you might want to include it in GridSearchCV
                train_model(
                    model_name=model_name,
                    model=model,
                    X_train=X_train,
                    y_train=y_train,
                    X_test=X_test,
                    y_test=y_test,
                    results=results,
                    training_threshold=training_threshold,
                    dataset_name=dataset_name
                )
            else:
                train_model(
                    model_name=model_name,
                    model=model,
                    X_train=X_train,
                    y_train=y_train,
                    X_test=X_test,
                    y_test=y_test,
                    results=results,
                    training_threshold=training_threshold,
                    dataset_name=dataset_name
                )

        # Save results to CSV
        results_df = pd.DataFrame(results)
        dataset_results_dir = os.path.join(results_dir, os.path.splitext(dataset_name)[0])
        os.makedirs(dataset_results_dir, exist_ok=True)
        results_file = os.path.join(dataset_results_dir, 'model_results.csv')
        results_df.to_csv(results_file, index=False)
        print(f"Results for {dataset_name} saved to {results_file}")

if __name__ == "__main__":
    main()

In [None]:
# 4. Main Script
def main():
    # Define parameter grids for each model
    param_grids = {
        'Ridge Regression': {
            'ridge__alpha': [0.1, 1.0, 10.0, 100.0]
        },
        'Lasso Regression': {
            'lasso__alpha': [0.01, 0.1, 1.0, 10.0]
        },
        'Elastic Net Regression': {
            'elasticnet__alpha': [0.01, 0.1, 1.0, 10.0],
            'elasticnet__l1_ratio': [0.1, 0.3, 0.5, 0.7, 0.9]
        },
        'LightGBM Regression': {
            'lightgbm__num_leaves': [31, 50, 70],
            'lightgbm__learning_rate': [0.01, 0.05, 0.1],
            'lightgbm__n_estimators': [100, 200, 500]
        },
        'Random Forest Regression': {
            'randomforest__n_estimators': [100, 200, 500],
            'randomforest__max_depth': [10, 20, None],
            'randomforest__min_samples_split': [2, 5, 10]
        },
        'Support Vector Regression': {
            'svr__C': [0.1, 1, 10],
            'svr__epsilon': [0.1, 0.2, 0.5],
            'svr__kernel': ['linear', 'rbf']
        },
        'XGBoost Regression': {
            'xgboost__learning_rate': [0.01, 0.05, 0.1],
            'xgboost__max_depth': [3, 5, 7],
            'xgboost__n_estimators': [100, 200, 500]
        }
    }

    # Define pipelines for each model
    pipelines = {
        'Linear Regression': Pipeline([
            ('linearregression', LinearRegression())
        ]),
        'Ridge Regression': Pipeline([
            ('ridge', Ridge())
        ]),
        'Lasso Regression': Pipeline([
            ('lasso', Lasso())
        ]),
        'Elastic Net Regression': Pipeline([
            ('elasticnet', ElasticNet())
        ]),
        'LightGBM Regression': Pipeline([
            ('lightgbm', LGBMRegressor())
        ]),
        'Random Forest Regression': Pipeline([
            ('randomforest', RandomForestRegressor())
        ]),
        # 'Support Vector Regression': Pipeline([
        #     ('svr', SVR())
        # ]),
        'XGBoost Regression': Pipeline([
            ('xgboost', xgb.XGBRegressor(use_label_encoder=False, eval_metric='rmse'))
        ]),
    }
    
    # Create GridSearchCV objects
    models = {}
    for name, pipeline in pipelines.items():
        if name in param_grids:
            models[name] = GridSearchCV(
                estimator=pipeline,
                param_grid=param_grids[name],
                cv=5,
                scoring='r2',
                n_jobs=-1,
                verbose=1
            )
        else:
            models[name] = pipeline  # Linear Regression has no hyperparameters

    # Training threshold
    training_threshold = 7200 
    
    # Import train, test, val from folder results
    train1 = pd.read_csv('/results/dataset1/train.csv')
    test1 = pd.read_csv('/results/dataset1/test.csv')
    val1 = pd.read_csv('/results/dataset1/val.csv')
    
    train5 = pd.read_csv('/results/dataset5/train.csv')
    test5 = pd.read_csv('/results/dataset5/test.csv')
    val5 = pd.read_csv('/results/dataset5/val.csv')
    
    # Results folder
    results_dir = "model_results"
    os.makedirs(results_dir, exist_ok=True)

    # Models directory
    models_dir = "models"
    os.makedirs(models_dir, exist_ok=True)
    
    # Process each dataset

In [None]:

import os
import pandas as pd
import numpy as np
import threading
import time
import pickle  # Add this import
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
import xgboost as xgb
from lightgbm import LGBMRegressor

# 1. Define the model training function
def train_model(model_name, model, X_train, y_train, X_test, y_test, results, training_threshold, dataset_name):
    y_pred = [None]
    training_time = [None]
    training_completed = [False]

    def train():
        start_time = time.time()
        try:
            print(f"Starting training for {model_name}...")
            model.fit(X_train, y_train)
            y_pred[0] = model.predict(X_test)
            training_time[0] = time.time() - start_time
            training_completed[0] = True
            print(f"Completed training for {model_name} in {training_time[0]:.2f} seconds.")
        except Exception as e:
            print(f"Error training model {model_name}: {e}")
            training_completed[0] = False

    thread = threading.Thread(target=train)
    thread.start()
    thread.join(timeout=training_threshold)

    if not training_completed[0]:
        print(f"Model {model_name} exceeded training time ({training_threshold} seconds) or encountered an error.")
        y_pred[0] = np.nan
        training_time[0] = np.nan
    else:
        mse = mean_squared_error(y_test, y_pred[0])
        rmse = np.sqrt(mse)
        mae = mean_absolute_error(y_test, y_pred[0])
        r_squared = r2_score(y_test, y_pred[0])

        n = len(y_test)
        p = X_test.shape[1]
        if n > p + 1 and p > 0:
            adjusted_r_squared = 1 - (1 - r_squared) * ((n - 1) / (n - p - 1))
        else:
            adjusted_r_squared = r_squared

        print(f"Model {model_name} trained successfully in {training_time[0]:.2f} seconds.")
        print(f"MSE: {mse}, RMSE: {rmse}, MAE: {mae}, R²: {r_squared}, Adjusted R²: {adjusted_r_squared}")

        best_params = None
        if isinstance(model, GridSearchCV):
            best_params = model.best_params_
            print(f"Best parameters for {model_name}: {best_params}")

        models_dir = os.path.join("models", dataset_name)
        os.makedirs(models_dir, exist_ok=True)
        model_filename = f"{model_name.replace(' ', '_')}.pkl"
        model_filepath = os.path.join(models_dir, model_filename)
        with open(model_filepath, 'wb') as f:
            pickle.dump(model, f)
        print(f"Model {model_name} saved to {model_filepath}")

        result = {
            'Model': model_name,
            'Dataset': dataset_name,
            'Training Time (s)': training_time[0],
            'MSE': mse,
            'RMSE': rmse,
            'MAE': mae,
            'R2 Score': r_squared,
            'Adjusted R2 Score': adjusted_r_squared
        }
        if best_params:
            result['Best Params'] = str(best_params)
        results.append(result)

# 2. Main Script
def main():
    # Define parameter grids for each model
    param_grids = {
        'Ridge Regression': {
            'ridge__alpha': [0.1, 1.0, 10.0, 100.0]
        },
        'Lasso Regression': {
            'lasso__alpha': [0.01, 0.1, 1.0, 10.0]
        },
        'Elastic Net Regression': {
            'elasticnet__alpha': [0.01, 0.1, 1.0, 10.0],
            'elasticnet__l1_ratio': [0.1, 0.3, 0.5, 0.7, 0.9]
        },
        'LightGBM Regression': {
            'lightgbm__num_leaves': [31, 50, 70],
            'lightgbm__learning_rate': [0.01, 0.05, 0.1],
            'lightgbm__n_estimators': [100, 200, 500]
        },
        'Random Forest Regression': {
            'randomforest__n_estimators': [100, 200, 500],
            'randomforest__max_depth': [10, 20, None],
            'randomforest__min_samples_split': [2, 5, 10]
        },
        'Support Vector Regression': {
            'svr__C': [0.1, 1, 10],
            'svr__epsilon': [0.1, 0.2, 0.5],
            'svr__kernel': ['linear', 'rbf']
        },
        'XGBoost Regression': {
            'xgboost__learning_rate': [0.01, 0.05, 0.1],
            'xgboost__max_depth': [3, 5, 7],
            'xgboost__n_estimators': [100, 200, 500]
        }
    }

    # Define pipelines for each model
    pipelines = {
        'Linear Regression': Pipeline([
            ('linearregression', LinearRegression())
        ]),
        'Ridge Regression': Pipeline([
            ('ridge', Ridge())
        ]),
        'Lasso Regression': Pipeline([
            ('lasso', Lasso())
        ]),
        'Elastic Net Regression': Pipeline([
            ('elasticnet', ElasticNet())
        ]),
        'LightGBM Regression': Pipeline([
            ('lightgbm', LGBMRegressor())
        ]),
        'Random Forest Regression': Pipeline([
            ('randomforest', RandomForestRegressor())
        ]),
        'XGBoost Regression': Pipeline([
            ('xgboost', xgb.XGBRegressor(use_label_encoder=False, eval_metric='rmse'))
        ]),
    }

    # Create GridSearchCV objects
    models = {}
    for name, pipeline in pipelines.items():
        if name in param_grids:
            models[name] = GridSearchCV(
                estimator=pipeline,
                param_grid=param_grids[name],
                cv=5,
                scoring='r2',
                n_jobs=-1,
                verbose=1
            )
        else:
            models[name] = pipeline  # Linear Regression has no hyperparameters

    # Training threshold
    training_threshold = 7200  # seconds

    # List of datasets
    datasets = ['dataset1', 'dataset5']

    # Results folder
    results_dir = "model_results"
    os.makedirs(results_dir, exist_ok=True)

    # Models directory
    models_dir = "models"
    os.makedirs(models_dir, exist_ok=True)

    # Process each dataset
    for dataset_name in datasets:
        print(f"\nProcessing {dataset_name}")

        # Load pre-split data
        data_path = f"/home/dev/project/modelling/preprocessing/results/{dataset_name}"
        try:
            train_data = pd.read_csv(os.path.join(data_path, "train.csv"))
            test_data = pd.read_csv(os.path.join(data_path, "test.csv"))
            val_data = pd.read_csv(os.path.join(data_path, "val.csv"))
            print(f"Successfully loaded pre-split data for {dataset_name}")
        except Exception as e:
            print(f"Error loading data for {dataset_name}: {e}")
            continue

        # Define target variable
        target_variable = 'diem_hp'
        if target_variable not in train_data.columns:
            print(f"Target variable '{target_variable}' not found in training data for dataset '{dataset_name}'. Skipping this dataset.")
            continue

        # Separate features and target
        X_train = train_data.drop(columns=[target_variable])
        y_train = train_data[target_variable]
        X_test = test_data.drop(columns=[target_variable])
        y_test = test_data[target_variable]
        X_val = val_data.drop(columns=[target_variable])
        y_val = val_data[target_variable]

        # Handle missing values
        X_train = X_train.fillna(0)
        X_test = X_test.fillna(0)
        X_val = X_val.fillna(0)
        y_train = y_train.fillna(0)
        y_test = y_test.fillna(0)
        y_val = y_val.fillna(0)

        print("All features are now numeric and missing values are handled.")

        # Prepare results storage
        results = []

        # Train and evaluate each model
        for model_name, model in models.items():
            print(f"\nTraining model: {model_name}")
            train_model(
                model_name=model_name,
                model=model,
                X_train=X_train,
                y_train=y_train,
                X_test=X_test,
                y_test=y_test,
                results=results,
                training_threshold=training_threshold,
                dataset_name=dataset_name
            )

        # Save results to CSV
        results_df = pd.DataFrame(results)
        dataset_results_dir = os.path.join(results_dir, dataset_name)
        os.makedirs(dataset_results_dir, exist_ok=True)
        results_file = os.path.join(dataset_results_dir, 'model_results.csv')
        results_df.to_csv(results_file, index=False)
        print(f"Results for {dataset_name} saved to {results_file}")

if __name__ == "__main__":
    main()


Processing dataset1
Successfully loaded pre-split data for dataset1
All features are now numeric and missing values are handled.

Training model: Linear Regression
Starting training for Linear Regression...
Completed training for Linear Regression in 0.61 seconds.
Model Linear Regression trained successfully in 0.61 seconds.
MSE: 3.2919359280973275, RMSE: 1.8143692920950045, MAE: 1.2694315353712364, R²: 0.5187536134032071, Adjusted R²: 0.5179535260530577
Model Linear Regression saved to models/dataset1/Linear_Regression.pkl

Training model: Ridge Regression
Starting training for Ridge Regression...
Fitting 5 folds for each of 4 candidates, totalling 20 fits
Completed training for Ridge Regression in 2.69 seconds.
Model Ridge Regression trained successfully in 2.69 seconds.
MSE: 3.2918223839399534, RMSE: 1.8143380015697057, MAE: 1.269440415049705, R²: 0.518770212364016, Adjusted R²: 0.5179701526101643
Best parameters for Ridge Regression: {'ridge__alpha': 1.0}
Model Ridge Regression sa