# REGRESSION MODELS: PCA IMPUTATION

## Description of this notebook

This notebook evaluates the performance of the regression models using the datasets imputed using the Principal Component Analysis (PCA).

In [None]:
# Import all the necessary libraries 

import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
import joblib
from xgboost import XGBRegressor
from sklearn.svm import SVR
from sklearn.gaussian_process.kernels import RBF, Matern, RationalQuadratic, WhiteKernel, ExpSineSquared, DotProduct
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV, KFold, train_test_split, cross_val_score
from sklearn.metrics import make_scorer
from sklearn.preprocessing import StandardScaler
import os
from sklearn.neural_network import MLPRegressor
from GPy import models
from GPy import kern
from sklearn.gaussian_process import GaussianProcessRegressor
import warnings
warnings.filterwarnings("ignore")
from sklearn.base import clone

In [None]:
#os.chdir('..') # move to the general directory

In [None]:
def scale_data(input, target):
    all_data = pd.concat([input, target], axis=1)

    all_data = all_data.dropna(subset=['ADAS13'])

    # Divide again in input and target

    input = all_data.drop(columns=['ADAS13'])
    target = all_data['ADAS13']

    # Divide numerical and categorical variables
    input_num = input.drop(columns=['PTGENDER', 'APOE4'])
    input_cat = input[['PTGENDER', 'APOE4']]

    # Scale the data
    scaler = StandardScaler()
    input_scaled = scaler.fit_transform(input_num)
    input_scaled = pd.DataFrame(input_scaled, columns=input_num.columns).reset_index(drop=True)
    input_cat = input_cat.reset_index(drop=True)

    # Concatenate scaled data with categorical variables
    input = pd.concat([input_scaled, input_cat], axis=1)
    
    return input, target

## **1.** Models Definitions

### **1.1** Random Forest Model

In [None]:
def random_forest(X, y, imp='None'):
    rf = RandomForestRegressor(random_state=42)

    param_grid_rf = {
        'n_estimators': [30, 40, 50, 55],
        'max_depth': [1, 5, 10, 20, 30],
        'min_samples_split': [2, 5, 10, 15, 20],
        'min_samples_leaf': [1, 2, 4, 6, 8, 10],
        'max_features': [15, 16, 17, 18, 19, 20]
    }

    scorer = make_scorer(mean_absolute_error, greater_is_better=False)

    kf_outer = KFold(n_splits=10, shuffle=True, random_state=42)

    r2_scores = []

    fold = 1
    for train_index, test_index in kf_outer.split(X):
        print(f"Fold {fold}:")

        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        grid_search_rf = GridSearchCV(
            estimator=rf,
            param_grid=param_grid_rf,
            cv=KFold(n_splits=3, shuffle=True, random_state=42), 
            scoring=scorer,
            verbose=0,
            n_jobs=-1
        )

        grid_search_rf.fit(X_train, y_train)

        best_rf_model = grid_search_rf.best_estimator_

        y_pred_test = best_rf_model.predict(X_test)

        r2 = r2_score(y_test, y_pred_test)

        print(f"Fold R²: {r2:.3f}")

        r2_scores.append(r2)

        fold += 1

    return (f"\nRandom Forest 10-Fold CV R² Score {imp}: {np.mean(r2_scores):.3f} ± {np.std(r2_scores):.3f}")


### **1.2** Gradient Boosting Regressor

In [None]:
def gradient_boosting(X, y, imp='None'):
    gradboosting = GradientBoostingRegressor(random_state=42)

    param_grid = {
        'n_estimators': [500, 700, 900],
        'learning_rate': [0.0001, 0.001, 0.01],
        'max_depth': [1, 2, 3],
        'min_samples_split': [3, 4, 5],
        'min_samples_leaf': [1, 2, 3]
    }

    scorer = make_scorer(mean_absolute_error, greater_is_better=False)

    kf_outer = KFold(n_splits=10, shuffle=True, random_state=42)

    r2_scores = []

    fold = 1
    for train_index, test_index in kf_outer.split(X):
        print(f"Fold {fold}:")

        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        grid_search = GridSearchCV(
            estimator=gradboosting,
            param_grid=param_grid,
            cv=KFold(n_splits=3, shuffle=True, random_state=42), 
            scoring=scorer,
            verbose=0,
            n_jobs=-1
        )

        grid_search.fit(X_train, y_train)

        best_model = grid_search.best_estimator_

        y_pred_test = best_model.predict(X_test)

        r2 = r2_score(y_test, y_pred_test)

        print(f"  Fold R²: {r2:.3f}")

        r2_scores.append(r2)

        fold += 1

    return (f"\nGradient Boosting 10-Fold CV R² Score {imp}: {np.mean(r2_scores):.3f} ± {np.std(r2_scores):.3f}")


### **1.3** XGBRegressor

In [None]:
def xgb_regressor(X, y, imp='None'):
    xgb = XGBRegressor(random_state=42)

    param_grid = {
        'n_estimators': [100, 500, 1000],
        'learning_rate': [1e-4, 1e-3, 1e-2, 1e-1],
        'max_depth': [1, 3, 5, 7]
    }

    scorer = make_scorer(mean_absolute_error, greater_is_better=False)

    kf_outer = KFold(n_splits=10, shuffle=True, random_state=42)

    r2_scores = []

    fold = 1
    for train_index, test_index in kf_outer.split(X):
        print(f"Fold {fold}:")

        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        grid_search = GridSearchCV(
            estimator=xgb,
            param_grid=param_grid,
            cv=KFold(n_splits=3, shuffle=True, random_state=42),
            scoring=scorer,
            verbose=0,
            n_jobs=-1
        )

        grid_search.fit(X_train, y_train)

        best_model = grid_search.best_estimator_

        y_pred_test = best_model.predict(X_test)

        r2 = r2_score(y_test, y_pred_test)

        print(f"  Fold R²: {r2:.3f}")

        r2_scores.append(r2)

        fold += 1

    return (f"\nXGB Regressor 10-Fold CV R² Score {imp}: {np.mean(r2_scores):.3f} ± {np.std(r2_scores):.3f}")


### **1.4** Support Vector Regressor

In [None]:
def svr(X, y, imp='None'):
    svr_model = SVR()

    param_grid = {
        'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
        'C': [1e-3, 1e-2, 1e-1, 1, 10],
        'gamma': [1e-4, 1e-3, 1e-2, 1, 10],
        'epsilon': [1e-4, 1e-3, 1e-2, 1, 10]
    }

    scorer = make_scorer(mean_absolute_error, greater_is_better=False)

    kf_outer = KFold(n_splits=10, shuffle=True, random_state=42)

    r2_scores = []

    fold = 1
    for train_index, test_index in kf_outer.split(X):
        print(f"Fold {fold}:")

        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        grid_search = GridSearchCV(
            estimator=svr_model,
            param_grid=param_grid,
            cv=KFold(n_splits=3, shuffle=True, random_state=42),
            scoring=scorer,
            verbose=0,
            n_jobs=-1
        )

        grid_search.fit(X_train, y_train)

        best_model = grid_search.best_estimator_

        y_pred_test = best_model.predict(X_test)

        r2 = r2_score(y_test, y_pred_test)

        print(f"  Fold R²: {r2:.3f}")

        r2_scores.append(r2)

        fold += 1

    return (f"\nSVR 10-Fold CV R² Score {imp}: {np.mean(r2_scores):.3f} ± {np.std(r2_scores):.3f}")


### **1.5** Ridge Regression Model

In [None]:
def ridge_regression(X, y, imp='None'):
    ridge = Ridge(random_state=42)

    param_grid = {
        'alpha': [1e-10, 1e-5, 1e-4, 1e-3, 1e-2],
        'fit_intercept': [True, False],
        'tol': [1e-5, 1e-4, 1e-3, 1e-2],
        'max_iter': [100, 500, 1000, 2000, 3000],
        'solver': ['auto', 'saga'],
    }

    scorer = make_scorer(mean_absolute_error, greater_is_better=False)

    kf_outer = KFold(n_splits=10, shuffle=True, random_state=42)

    r2_scores = []

    fold = 1
    for train_index, test_index in kf_outer.split(X):
        print(f"Fold {fold}:")

        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        grid_search = GridSearchCV(
            estimator=ridge,
            param_grid=param_grid,
            cv=KFold(n_splits=3, shuffle=True, random_state=42),
            scoring=scorer,
            verbose=0,
            n_jobs=-1
        )

        grid_search.fit(X_train, y_train)

        best_model = grid_search.best_estimator_

        y_pred_test = best_model.predict(X_test)

        r2 = r2_score(y_test, y_pred_test)

        print(f"  Fold R²: {r2:.3f}")

        r2_scores.append(r2)

        fold += 1

    return (f"\nRidge Regression 10-Fold CV R² Score {imp}: {np.mean(r2_scores):.3f} ± {np.std(r2_scores):.3f}")


### **1.6** MultiLayer Perceptron (MLP)

In [None]:
def mlp(X, y, imp='None'):
    mlp = MLPRegressor(random_state=42, max_iter=1000000)

    param_grid = {
        'hidden_layer_sizes': [(2,), (5,), (10,), (2, 2), (5, 2), (10, 2)],
        'activation': ['relu', 'tanh'],
        'solver': ['adam', 'lbfgs'],
        'alpha': [1e-10, 1e-5, 1e-4, 1e-3, 1e-2]
    }

    scorer = make_scorer(mean_absolute_error, greater_is_better=False)

    kf_outer = KFold(n_splits=10, shuffle=True, random_state=42)

    r2_scores = []

    fold = 1
    for train_index, test_index in kf_outer.split(X):
        print(f"Fold {fold}:")

        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        grid_search = GridSearchCV(
            estimator=mlp,
            param_grid=param_grid,
            cv=KFold(n_splits=3, shuffle=True, random_state=42),
            scoring=scorer,
            verbose=0,
            n_jobs=-1
        )

        grid_search.fit(X_train, y_train)

        best_model = grid_search.best_estimator_

        y_pred_test = best_model.predict(X_test)

        r2 = r2_score(y_test, y_pred_test)

        print(f"  Fold R²: {r2:.3f}")

        r2_scores.append(r2)

        fold += 1

    return (f"\nMLP 10-Fold CV R² Score {imp}: {np.mean(r2_scores):.3f} ± {np.std(r2_scores):.3f}")

### **1.7** GP Regressor

In [None]:
def gp_regressor(X, y, imp='None'):

    gpr = GaussianProcessRegressor(random_state=42, normalize_y=True)

    # Define kernels with tunable hyperparameters
    kernel_options = [
        RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e3)) +
        WhiteKernel(noise_level=1.0, noise_level_bounds=(1e-10, 1e1)) +
        Matern(length_scale=1.0, length_scale_bounds=(1e-2, 1e3)),
        
        RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e3)) +
        WhiteKernel(noise_level=1.0, noise_level_bounds=(1e-10, 1e1)) +
        RationalQuadratic(alpha=1.0, alpha_bounds=(1e-2, 1e3),
                  length_scale=1.0, length_scale_bounds=(1e-2, 1e3)),

        
        RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e3)) +
        WhiteKernel(noise_level=1.0, noise_level_bounds=(1e-10, 1e1)) +
        ExpSineSquared(length_scale=1.0, length_scale_bounds=(1e-2, 1e3),
                    periodicity=3.0, periodicity_bounds=(1e-2, 10.0)),
        
        RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e3)) +
        WhiteKernel(noise_level=1.0, noise_level_bounds=(1e-10, 1e1)) +
        DotProduct(sigma_0=1.0, sigma_0_bounds=(1e-2, 1e3)),
        
        Matern(length_scale=1.0, length_scale_bounds=(1e-2, 1e3)),
        WhiteKernel(noise_level=1.0, noise_level_bounds=(1e-10, 1e1)) +
        RationalQuadratic(alpha=1.0, alpha_bounds=(1e-2, 1e3),
                  length_scale=1.0, length_scale_bounds=(1e-2, 1e3)),
        
        Matern(length_scale=1.0, length_scale_bounds=(1e-2, 1e3)) +
        WhiteKernel(noise_level=1.0, noise_level_bounds=(1e-10, 1e1)) +
        ExpSineSquared(length_scale=1.0, length_scale_bounds=(1e-2, 1e3),
               periodicity=3.0, periodicity_bounds=(1e-2, 10.0)),
        
        Matern(length_scale=1.0, length_scale_bounds=(1e-2, 1e3)) +
        WhiteKernel(noise_level=1.0, noise_level_bounds=(1e-10, 1e1)) +
        DotProduct(sigma_0=1.0, sigma_0_bounds=(1e-2, 1e3)),
        
        RationalQuadratic(alpha=1.0, alpha_bounds=(1e-2, 1e3),
                  length_scale=1.0, length_scale_bounds=(1e-2, 1e3)),
        WhiteKernel(noise_level=1.0, noise_level_bounds=(1e-10, 1e1)) +
        ExpSineSquared(length_scale=1.0, length_scale_bounds=(1e-2, 1e3),
               periodicity=3.0, periodicity_bounds=(1e-2, 10.0)),
        
        RationalQuadratic(alpha=1.0, alpha_bounds=(1e-2, 1e3),
                  length_scale=1.0, length_scale_bounds=(1e-2, 1e3)) +
        WhiteKernel(noise_level=1.0, noise_level_bounds=(1e-10, 1e1)) +
        DotProduct(sigma_0=1.0, sigma_0_bounds=(1e-2, 1e3)),
        
        ExpSineSquared(length_scale=1.0, length_scale_bounds=(1e-2, 1e3),
               periodicity=3.0, periodicity_bounds=(1e-2, 10.0)) +  
        WhiteKernel(noise_level=1.0, noise_level_bounds=(1e-10, 1e1)) +
        DotProduct(sigma_0=1.0, sigma_0_bounds=(1e-2, 1e3)),
        
        DotProduct(sigma_0=1.0, sigma_0_bounds=(1e-2, 1e3)) +
        WhiteKernel(noise_level=1.0, noise_level_bounds=(1e-10, 1e1)),
        
        RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e3)) +         
        WhiteKernel(noise_level=1.0, noise_level_bounds=(1e-10, 1e1)),
        
        Matern(length_scale=1.0, length_scale_bounds=(1e-2, 1e3)) +
        WhiteKernel(noise_level=1.0, noise_level_bounds=(1e-10, 1e1)),
        
        RationalQuadratic(alpha=1.0, alpha_bounds=(1e-2, 1e3),
                  length_scale=1.0, length_scale_bounds=(1e-2, 1e3)) + 
        WhiteKernel(noise_level=1.0, noise_level_bounds=(1e-10, 1e1)),
        
        ExpSineSquared(length_scale=1.0, length_scale_bounds=(1e-2, 1e3),
                    periodicity=3.0, periodicity_bounds=(1e-2, 10.0)) +
        WhiteKernel(noise_level=1.0, noise_level_bounds=(1e-10, 1e1))

    ]

    scorer = make_scorer(mean_absolute_error, greater_is_better=False)

    kf_outer = KFold(n_splits=10, shuffle=True, random_state=42)

    r2_scores = []

    param_grid = {
        'kernel': [clone(k) for k in kernel_options]
    }

    fold = 1
    for train_index, test_index in kf_outer.split(X):
        print(f"Fold {fold}:")

        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        grid_search = GridSearchCV(
            estimator=gpr,
            param_grid=param_grid,
            cv=KFold(n_splits=3, shuffle=True, random_state=42),
            scoring=scorer,
            verbose=0,
            n_jobs=-1
        )

        grid_search.fit(X_train, y_train)
        best_model = grid_search.best_estimator_

        y_pred, y_std = best_model.predict(X_test, return_std=True)

        r2 = r2_score(y_test, y_pred)

        print(f"  Fold R²: {r2:.3f}")

        r2_scores.append(r2)

        fold += 1

    return (f"\nGPR 10-Fold CV R² Score {imp}: {np.mean(r2_scores):.3f} ± {np.std(r2_scores):.3f}")
    


## **2.** PCA Imputed Datasets

### **2.1** Mean Pre-Imputation

In [None]:
input = pd.read_csv('PCA IMPUTATION/imputed_pca_mean_df.csv')
target = pd.read_csv('DATA/target_COMPLETE_DATA.csv')

scaled_input, y = scale_data(input, target)

##### Random Forest

In [None]:
r2_score_rf = random_forest(scaled_input, y, imp='PCA Mean')

Fold 1:
Fold R²: 0.770
Fold 2:
Fold R²: 0.720
Fold 3:
Fold R²: 0.744
Fold 4:
Fold R²: 0.734
Fold 5:
Fold R²: 0.717
Fold 6:
Fold R²: 0.727
Fold 7:
Fold R²: 0.684
Fold 8:
Fold R²: 0.688
Fold 9:
Fold R²: 0.674
Fold 10:
Fold R²: 0.628


In [None]:
print(r2_score_rf)


Random Forest 10-Fold CV R² Score PCA Mean: 0.709 ± 0.039


##### Gradient Boosting

In [None]:
r2_score_grad = gradient_boosting(scaled_input, y, imp='PCA Mean')

Fold 1:
  Fold R²: 0.777
Fold 2:
  Fold R²: 0.726
Fold 3:
  Fold R²: 0.751
Fold 4:
  Fold R²: 0.736
Fold 5:
  Fold R²: 0.741
Fold 6:
  Fold R²: 0.717
Fold 7:
  Fold R²: 0.709
Fold 8:
  Fold R²: 0.700
Fold 9:
  Fold R²: 0.712
Fold 10:
  Fold R²: 0.641


In [None]:
print(r2_score_grad)


Gradient Boosting 10-Fold CV R² Score PCA Mean: 0.721 ± 0.034


##### Extreme Gradient Boosting

In [None]:
r2_score_xgb = xgb_regressor(scaled_input, y, imp='PCA Mean')

Fold 1:
  Fold R²: 0.769
Fold 2:
  Fold R²: 0.726
Fold 3:
  Fold R²: 0.744
Fold 4:
  Fold R²: 0.724
Fold 5:
  Fold R²: 0.742
Fold 6:
  Fold R²: 0.717
Fold 7:
  Fold R²: 0.709
Fold 8:
  Fold R²: 0.693
Fold 9:
  Fold R²: 0.712
Fold 10:
  Fold R²: 0.639


In [None]:
print(r2_score_xgb)


XGB Regressor 10-Fold CV R² Score PCA Mean: 0.717 ± 0.033


##### SVR

In [None]:
r2_score_svr = svr(scaled_input, y, imp='PCA Mean')

Fold 1:
  Fold R²: 0.775
Fold 2:
  Fold R²: 0.728
Fold 3:
  Fold R²: 0.764
Fold 4:
  Fold R²: 0.745
Fold 5:
  Fold R²: 0.744
Fold 6:
  Fold R²: 0.748
Fold 7:
  Fold R²: 0.715
Fold 8:
  Fold R²: 0.684
Fold 9:
  Fold R²: 0.739
Fold 10:
  Fold R²: 0.656


In [None]:
print(r2_score_svr)


SVR 10-Fold CV R² Score PCA Mean: 0.730 ± 0.034


##### Ridge Regression

In [None]:
r2_score_ridge = ridge_regression(scaled_input, y, imp='PCA Mean')

In [None]:
print(r2_score_ridge)


Ridge Regression 10-Fold CV R² Score PCA Mean: 0.730 ± 0.035


##### MLP

In [None]:
r2_score_mlp = mlp(scaled_input, y, imp='PCA Mean')

In [None]:
print(r2_score_mlp)


MLP 10-Fold CV R² Score PCA Mean: 0.729 ± 0.038


##### GP Regression

In [None]:
r2_score_gpr = gp_regressor(scaled_input, y, imp='PCA Mean')

In [None]:
print(r2_score_gpr)


GPR 10-Fold CV R² Score PCA Mean: 0.730 ± 0.037


### **2.2** Median Pre-Imputation

In [None]:
input = pd.read_csv('PCA IMPUTATION/imputed_pca_median_df.csv')
target = pd.read_csv('DATA/target_COMPLETE_DATA.csv')

scaled_input, y = scale_data(input, target)

##### Random Forest

In [None]:
r2_score_rf = random_forest(scaled_input, y, imp='PCA Median')

Fold 1:
Fold R²: 0.763
Fold 2:
Fold R²: 0.729
Fold 3:
Fold R²: 0.748
Fold 4:
Fold R²: 0.741
Fold 5:
Fold R²: 0.739
Fold 6:
Fold R²: 0.720
Fold 7:
Fold R²: 0.705
Fold 8:
Fold R²: 0.684
Fold 9:
Fold R²: 0.703
Fold 10:
Fold R²: 0.626


In [None]:
print(r2_score_rf)


Random Forest 10-Fold CV R² Score PCA Median: 0.716 ± 0.038


##### Gradient Boosting

In [None]:
r2_score_grad = gradient_boosting(scaled_input, y, imp='PCA Median')

Fold 1:
  Fold R²: 0.774
Fold 2:
  Fold R²: 0.723
Fold 3:
  Fold R²: 0.750
Fold 4:
  Fold R²: 0.735
Fold 5:
  Fold R²: 0.742
Fold 6:
  Fold R²: 0.716
Fold 7:
  Fold R²: 0.708
Fold 8:
  Fold R²: 0.700
Fold 9:
  Fold R²: 0.714
Fold 10:
  Fold R²: 0.640


In [None]:
print(r2_score_grad)


Gradient Boosting 10-Fold CV R² Score PCA Median: 0.720 ± 0.034


##### Extreme Gradient Boosting

In [None]:
r2_score_xgb = xgb_regressor(scaled_input, y, imp='PCA Median')

Fold 1:
  Fold R²: 0.768
Fold 2:
  Fold R²: 0.721
Fold 3:
  Fold R²: 0.752
Fold 4:
  Fold R²: 0.735
Fold 5:
  Fold R²: 0.745
Fold 6:
  Fold R²: 0.718
Fold 7:
  Fold R²: 0.717
Fold 8:
  Fold R²: 0.701
Fold 9:
  Fold R²: 0.714
Fold 10:
  Fold R²: 0.638


In [None]:
print(r2_score_xgb)


XGB Regressor 10-Fold CV R² Score PCA Median: 0.721 ± 0.034


##### SVR

In [None]:
r2_score_svr = svr(scaled_input, y, imp='PCA Median')

Fold 1:
  Fold R²: 0.774
Fold 2:
  Fold R²: 0.728
Fold 3:
  Fold R²: 0.766
Fold 4:
  Fold R²: 0.747
Fold 5:
  Fold R²: 0.747
Fold 6:
  Fold R²: 0.748
Fold 7:
  Fold R²: 0.716
Fold 8:
  Fold R²: 0.678
Fold 9:
  Fold R²: 0.740
Fold 10:
  Fold R²: 0.658


In [None]:
print(r2_score_svr)


SVR 10-Fold CV R² Score PCA Median: 0.730 ± 0.035


##### Ridge Regression

In [None]:
r2_score_ridge = ridge_regression(scaled_input, y, imp='PCA Median')

In [None]:
print(r2_score_ridge)


Ridge Regression 10-Fold CV R² Score PCA Median: 0.731 ± 0.035


##### MLP

In [None]:
r2_score_mlp = mlp(scaled_input, y, imp='PCA Median')

In [None]:
print(r2_score_mlp)


MLP 10-Fold CV R² Score PCA Median: 0.726 ± 0.037


##### GP Regression

In [None]:
r2_score_gpr = gp_regressor(scaled_input, y, imp='PCA Median')

In [None]:
print(r2_score_gpr)


GPR 10-Fold CV R² Score PCA Median: 0.730 ± 0.037


### **2.3** Mode Pre-Imputation

In [None]:
input = pd.read_csv('PCA IMPUTATION/imputed_pca_mode_df.csv')
target = pd.read_csv('DATA/target_COMPLETE_DATA.csv')

scaled_input, y = scale_data(input, target)

#X_train, X_test, y_train, y_test = train_test_split(scaled_input, y, test_size=0.2, random_state=42)

##### Random Forest

In [None]:
r2_score_rf = random_forest(scaled_input, y, imp='PCA Mode')

Fold 1:
Fold R²: 0.770
Fold 2:
Fold R²: 0.724
Fold 3:
Fold R²: 0.745
Fold 4:
Fold R²: 0.730
Fold 5:
Fold R²: 0.721
Fold 6:
Fold R²: 0.716
Fold 7:
Fold R²: 0.708
Fold 8:
Fold R²: 0.697
Fold 9:
Fold R²: 0.698
Fold 10:
Fold R²: 0.621


In [None]:
print(r2_score_rf)


Random Forest 10-Fold CV R² Score PCA Mode: 0.713 ± 0.037


##### Gradient Boosting

In [None]:
r2_score_grad = gradient_boosting(scaled_input, y, imp='PCA Mode')

Fold 1:
  Fold R²: 0.780
Fold 2:
  Fold R²: 0.728
Fold 3:
  Fold R²: 0.749
Fold 4:
  Fold R²: 0.731
Fold 5:
  Fold R²: 0.750
Fold 6:
  Fold R²: 0.712
Fold 7:
  Fold R²: 0.706
Fold 8:
  Fold R²: 0.706
Fold 9:
  Fold R²: 0.714
Fold 10:
  Fold R²: 0.640


In [None]:
print(r2_score_grad)


Gradient Boosting 10-Fold CV R² Score PCA Mode: 0.722 ± 0.035


##### Extreme Gradient Boosting

In [None]:
r2_score_xgb = xgb_regressor(scaled_input, y, imp='PCA Mode')

Fold 1:
  Fold R²: 0.770
Fold 2:
  Fold R²: 0.719
Fold 3:
  Fold R²: 0.741
Fold 4:
  Fold R²: 0.725
Fold 5:
  Fold R²: 0.738
Fold 6:
  Fold R²: 0.717
Fold 7:
  Fold R²: 0.707
Fold 8:
  Fold R²: 0.697
Fold 9:
  Fold R²: 0.715
Fold 10:
  Fold R²: 0.643


In [None]:
print(r2_score_xgb)


XGB Regressor 10-Fold CV R² Score PCA Mode: 0.717 ± 0.031


##### SVR

In [None]:
r2_score_svr = svr(scaled_input, y, imp='PCA Mode')
print(r2_score_svr)

Fold 1:
  Fold R²: 0.776
Fold 2:
  Fold R²: 0.728
Fold 3:
  Fold R²: 0.768
Fold 4:
  Fold R²: 0.746
Fold 5:
  Fold R²: 0.752
Fold 6:
  Fold R²: 0.752
Fold 7:
  Fold R²: 0.720
Fold 8:
  Fold R²: 0.682
Fold 9:
  Fold R²: 0.735
Fold 10:
  Fold R²: 0.659

SVR 10-Fold CV R² Score PCA Mode: 0.732 ± 0.035


##### Ridge Regression

In [None]:
r2_score_ridge = ridge_regression(scaled_input, y, imp='PCA Mode')
print(r2_score_ridge)

##### MLP

In [None]:
r2_score_mlp = mlp(scaled_input, y, imp='PCA Mode')
print(r2_score_mlp)

##### GP Regression

In [None]:
r2_score_gpr = gp_regressor(scaled_input, y, imp='PCA Mode')
print(r2_score_gpr)

### **2.4** KNN Pre-Imputation

In [None]:
input = pd.read_csv('PCA IMPUTATION/imputed_pca_knn_df.csv')
target = pd.read_csv('DATA/target_COMPLETE_DATA.csv')

scaled_input, y = scale_data(input, target)

##### Random Forest

In [None]:
r2_score_rf = random_forest(scaled_input, y, imp='PCA KNN')

Fold 1:
Fold R²: 0.773
Fold 2:
Fold R²: 0.723
Fold 3:
Fold R²: 0.742
Fold 4:
Fold R²: 0.735
Fold 5:
Fold R²: 0.727
Fold 6:
Fold R²: 0.710
Fold 7:
Fold R²: 0.704
Fold 8:
Fold R²: 0.682
Fold 9:
Fold R²: 0.689
Fold 10:
Fold R²: 0.617


In [None]:
print(r2_score_rf)


Random Forest 10-Fold CV R² Score PCA KNN: 0.710 ± 0.040


##### Gradient Boosting

In [None]:
r2_score_grad = gradient_boosting(scaled_input, y, imp='PCA KNN')

Fold 1:
  Fold R²: 0.776
Fold 2:
  Fold R²: 0.723
Fold 3:
  Fold R²: 0.750
Fold 4:
  Fold R²: 0.735
Fold 5:
  Fold R²: 0.746
Fold 6:
  Fold R²: 0.715
Fold 7:
  Fold R²: 0.710
Fold 8:
  Fold R²: 0.698
Fold 9:
  Fold R²: 0.707
Fold 10:
  Fold R²: 0.639


In [None]:
print(r2_score_grad)


Gradient Boosting 10-Fold CV R² Score PCA KNN: 0.720 ± 0.035


##### Extreme Gradient Boosting

In [None]:
r2_score_xgb = xgb_regressor(scaled_input, y, imp='PCA KNN')

Fold 1:
  Fold R²: 0.769
Fold 2:
  Fold R²: 0.721
Fold 3:
  Fold R²: 0.754
Fold 4:
  Fold R²: 0.731
Fold 5:
  Fold R²: 0.734
Fold 6:
  Fold R²: 0.714
Fold 7:
  Fold R²: 0.707
Fold 8:
  Fold R²: 0.690
Fold 9:
  Fold R²: 0.703
Fold 10:
  Fold R²: 0.639


In [None]:
print(r2_score_xgb)


XGB Regressor 10-Fold CV R² Score PCA KNN: 0.716 ± 0.034


##### SVR

In [None]:
r2_score_svr = svr(scaled_input, y, imp='PCA KNN')

Fold 1:
  Fold R²: 0.775
Fold 2:
  Fold R²: 0.729
Fold 3:
  Fold R²: 0.765
Fold 4:
  Fold R²: 0.745
Fold 5:
  Fold R²: 0.743
Fold 6:
  Fold R²: 0.750
Fold 7:
  Fold R²: 0.716
Fold 8:
  Fold R²: 0.681
Fold 9:
  Fold R²: 0.740
Fold 10:
  Fold R²: 0.657


In [None]:
print(r2_score_svr)


SVR 10-Fold CV R² Score PCA KNN: 0.730 ± 0.035


##### Ridge Regression

In [None]:
r2_score_ridge = ridge_regression(scaled_input, y, imp='PCA KNN')

Fold 1:




  Fold R²: 0.772
Fold 2:




  Fold R²: 0.729
Fold 3:




  Fold R²: 0.765
Fold 4:




  Fold R²: 0.751
Fold 5:




  Fold R²: 0.744
Fold 6:




  Fold R²: 0.752
Fold 7:




  Fold R²: 0.717
Fold 8:




  Fold R²: 0.680
Fold 9:




  Fold R²: 0.732
Fold 10:




  Fold R²: 0.659


In [None]:
print(r2_score_ridge)


Ridge Regression 10-Fold CV R² Score PCA KNN: 0.730 ± 0.035


##### MLP

In [None]:
r2_score_mlp = mlp(scaled_input, y, imp='PCA KNN')

Fold 1:


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  sel

  Fold R²: 0.782
Fold 2:


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  sel

  Fold R²: 0.736
Fold 3:


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  sel

  Fold R²: 0.768
Fold 4:


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  sel

  Fold R²: 0.753
Fold 5:


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  sel

  Fold R²: 0.727
Fold 6:


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  sel

  Fold R²: 0.747
Fold 7:


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  sel

  Fold R²: 0.706
Fold 8:


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  sel

  Fold R²: 0.676
Fold 9:


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  sel

  Fold R²: 0.743
Fold 10:


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  sel

  Fold R²: 0.652


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


In [None]:
print(r2_score_mlp)


MLP 10-Fold CV R² Score PCA KNN: 0.729 ± 0.038


##### GP Regression

In [None]:
r2_score_gpr = gp_regressor(scaled_input, y, imp='PCA KNN')

Fold 1:


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


  Fold R²: 0.772
Fold 2:




  Fold R²: 0.726
Fold 3:




  Fold R²: 0.771
Fold 4:




  Fold R²: 0.756
Fold 5:




  Fold R²: 0.741
Fold 6:




  Fold R²: 0.747
Fold 7:




  Fold R²: 0.718
Fold 8:




  Fold R²: 0.673
Fold 9:




  Fold R²: 0.733
Fold 10:




  Fold R²: 0.657


In [None]:
print(r2_score_gpr)


GPR 10-Fold CV R² Score PCA KNN: 0.730 ± 0.036


### **2.5** Iterative Pre-Imputation

In [None]:
input = pd.read_csv('PCA IMPUTATION/imputed_pca_iter_df.csv')
target = pd.read_csv('DATA/target_COMPLETE_DATA.csv')

scaled_input, y = scale_data(input, target)

##### Random Forest

In [None]:
r2_score_rf = random_forest(scaled_input, y, imp='PCA Iterative')

Fold 1:
Fold R²: 0.763
Fold 2:
Fold R²: 0.723
Fold 3:
Fold R²: 0.741
Fold 4:
Fold R²: 0.727
Fold 5:
Fold R²: 0.725
Fold 6:
Fold R²: 0.722
Fold 7:
Fold R²: 0.700
Fold 8:
Fold R²: 0.694
Fold 9:
Fold R²: 0.693
Fold 10:
Fold R²: 0.628


In [None]:
print(r2_score_rf)


Random Forest 10-Fold CV R² Score PCA Iterative: 0.712 ± 0.035


##### Gradient Boosting

In [None]:
r2_score_grad = gradient_boosting(scaled_input, y, imp='PCA Iterative')

Fold 1:
  Fold R²: 0.776
Fold 2:
  Fold R²: 0.724
Fold 3:
  Fold R²: 0.752
Fold 4:
  Fold R²: 0.736
Fold 5:
  Fold R²: 0.743
Fold 6:
  Fold R²: 0.717
Fold 7:
  Fold R²: 0.708
Fold 8:
  Fold R²: 0.700
Fold 9:
  Fold R²: 0.710
Fold 10:
  Fold R²: 0.639


In [None]:
print(r2_score_grad)


Gradient Boosting 10-Fold CV R² Score PCA Iterative: 0.721 ± 0.035


##### Extreme Gradient Boosting

In [None]:
r2_score_xgb = xgb_regressor(scaled_input, y, imp='PCA Iterative')

Fold 1:
  Fold R²: 0.771
Fold 2:
  Fold R²: 0.723
Fold 3:
  Fold R²: 0.742
Fold 4:
  Fold R²: 0.724
Fold 5:
  Fold R²: 0.738
Fold 6:
  Fold R²: 0.715
Fold 7:
  Fold R²: 0.705
Fold 8:
  Fold R²: 0.693
Fold 9:
  Fold R²: 0.711
Fold 10:
  Fold R²: 0.639


In [None]:
print(r2_score_xgb)


XGB Regressor 10-Fold CV R² Score PCA Iterative: 0.716 ± 0.033


##### SVR

In [None]:
r2_score_svr = svr(scaled_input, y, imp='PCA Iterative')

Fold 1:
  Fold R²: 0.778
Fold 2:
  Fold R²: 0.725
Fold 3:
  Fold R²: 0.765
Fold 4:
  Fold R²: 0.743
Fold 5:
  Fold R²: 0.742
Fold 6:
  Fold R²: 0.746
Fold 7:
  Fold R²: 0.716
Fold 8:
  Fold R²: 0.689
Fold 9:
  Fold R²: 0.734
Fold 10:
  Fold R²: 0.654


In [None]:
print(r2_score_svr)


SVR 10-Fold CV R² Score PCA Iterative: 0.729 ± 0.034


##### Ridge Regression

In [None]:
r2_score_ridge = ridge_regression(scaled_input, y, imp='PCA Iterative')

Fold 1:




  Fold R²: 0.772
Fold 2:




  Fold R²: 0.728
Fold 3:




  Fold R²: 0.765
Fold 4:




  Fold R²: 0.751
Fold 5:




  Fold R²: 0.742
Fold 6:




  Fold R²: 0.750
Fold 7:




  Fold R²: 0.717
Fold 8:




  Fold R²: 0.683
Fold 9:




  Fold R²: 0.732
Fold 10:




  Fold R²: 0.655


In [None]:
print(r2_score_ridge)


Ridge Regression 10-Fold CV R² Score PCA Iterative: 0.729 ± 0.035


##### MLP

In [None]:
r2_score_mlp = mlp(scaled_input, y, imp='PCA Iterative')

Fold 1:


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  sel

  Fold R²: 0.783
Fold 2:


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  sel

  Fold R²: 0.741
Fold 3:


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  sel

  Fold R²: 0.773
Fold 4:


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  sel

  Fold R²: 0.757
Fold 5:


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  sel

  Fold R²: 0.724
Fold 6:


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  sel

  Fold R²: 0.740
Fold 7:


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  sel

  Fold R²: 0.715
Fold 8:


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  sel

  Fold R²: 0.680
Fold 9:


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  sel

  Fold R²: 0.739
Fold 10:


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  sel

  Fold R²: 0.649


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


In [None]:
print(r2_score_mlp)


MLP 10-Fold CV R² Score PCA Iterative: 0.730 ± 0.039


##### GP Regression

In [None]:
r2_score_gpr = gp_regressor(scaled_input, y, imp='PCA Iterative')

Fold 1:




  Fold R²: 0.779
Fold 2:




  Fold R²: 0.727
Fold 3:




  Fold R²: 0.774
Fold 4:




  Fold R²: 0.755
Fold 5:




  Fold R²: 0.741
Fold 6:




  Fold R²: 0.742
Fold 7:




  Fold R²: 0.718
Fold 8:




  Fold R²: 0.674
Fold 9:




  Fold R²: 0.742
Fold 10:




  Fold R²: 0.656


In [None]:
print(r2_score_gpr)


GPR 10-Fold CV R² Score PCA Iterative: 0.731 ± 0.037


### **2.6** MICE Pre-Imputation

In [None]:
input = pd.read_csv('PCA IMPUTATION/imputed_pca_mice_df.csv')
target = pd.read_csv('DATA/target_COMPLETE_DATA.csv')

scaled_input, y = scale_data(input, target)

##### Random Forest

In [None]:
r2_score_rf = random_forest(scaled_input, y, imp='PCA MICE')

Fold 1:
Fold R²: 0.772
Fold 2:
Fold R²: 0.727
Fold 3:
Fold R²: 0.745
Fold 4:
Fold R²: 0.736
Fold 5:
Fold R²: 0.731
Fold 6:
Fold R²: 0.714
Fold 7:
Fold R²: 0.702
Fold 8:
Fold R²: 0.696
Fold 9:
Fold R²: 0.698
Fold 10:
Fold R²: 0.615


In [None]:
print(r2_score_rf)


Random Forest 10-Fold CV R² Score PCA MICE: 0.714 ± 0.040


##### Gradient Boosting

In [None]:
r2_score_grad = gradient_boosting(scaled_input, y, imp='PCA MICE')

Fold 1:
  Fold R²: 0.776
Fold 2:
  Fold R²: 0.725
Fold 3:
  Fold R²: 0.752
Fold 4:
  Fold R²: 0.729
Fold 5:
  Fold R²: 0.743
Fold 6:
  Fold R²: 0.718
Fold 7:
  Fold R²: 0.706
Fold 8:
  Fold R²: 0.699
Fold 9:
  Fold R²: 0.710
Fold 10:
  Fold R²: 0.636


In [None]:
print(r2_score_grad)


Gradient Boosting 10-Fold CV R² Score PCA MICE: 0.719 ± 0.035


##### Extreme Gradient Boosting

In [None]:
r2_score_xgb = xgb_regressor(scaled_input, y, imp='PCA MICE')

Fold 1:
  Fold R²: 0.771
Fold 2:
  Fold R²: 0.721
Fold 3:
  Fold R²: 0.747
Fold 4:
  Fold R²: 0.721
Fold 5:
  Fold R²: 0.740
Fold 6:
  Fold R²: 0.715
Fold 7:
  Fold R²: 0.704
Fold 8:
  Fold R²: 0.703
Fold 9:
  Fold R²: 0.711
Fold 10:
  Fold R²: 0.636


In [None]:
print(r2_score_xgb)


XGB Regressor 10-Fold CV R² Score PCA MICE: 0.717 ± 0.034


##### SVR

In [None]:
r2_score_svr = svr(scaled_input, y, imp='PCA MICE')

Fold 1:
  Fold R²: 0.776
Fold 2:
  Fold R²: 0.728
Fold 3:
  Fold R²: 0.764
Fold 4:
  Fold R²: 0.746
Fold 5:
  Fold R²: 0.747
Fold 6:
  Fold R²: 0.748
Fold 7:
  Fold R²: 0.718
Fold 8:
  Fold R²: 0.683
Fold 9:
  Fold R²: 0.734
Fold 10:
  Fold R²: 0.655


In [None]:
print(r2_score_svr)


SVR 10-Fold CV R² Score PCA MICE: 0.730 ± 0.035


##### Ridge Regression

In [None]:
r2_score_ridge = ridge_regression(scaled_input, y, imp='PCA MICE')

Fold 1:




  Fold R²: 0.773
Fold 2:




  Fold R²: 0.728
Fold 3:




  Fold R²: 0.764
Fold 4:




  Fold R²: 0.752
Fold 5:




  Fold R²: 0.745
Fold 6:




  Fold R²: 0.750
Fold 7:




  Fold R²: 0.718
Fold 8:




  Fold R²: 0.681
Fold 9:




  Fold R²: 0.733
Fold 10:




  Fold R²: 0.657




In [None]:
print(r2_score_ridge)


Ridge Regression 10-Fold CV R² Score PCA MICE: 0.730 ± 0.035


##### MLP

In [None]:
r2_score_mlp = mlp(scaled_input, y, imp='PCA MICE')

Fold 1:


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  sel

  Fold R²: 0.784
Fold 2:


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  sel

  Fold R²: 0.741
Fold 3:


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  sel

  Fold R²: 0.756
Fold 4:


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  sel

  Fold R²: 0.758
Fold 5:


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  sel

  Fold R²: 0.727
Fold 6:


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  sel

  Fold R²: 0.740
Fold 7:


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  sel

  Fold R²: 0.715
Fold 8:


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  sel

  Fold R²: 0.678
Fold 9:


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  sel

  Fold R²: 0.745
Fold 10:


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  sel

  Fold R²: 0.649


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


In [None]:
print(r2_score_mlp)


MLP 10-Fold CV R² Score PCA MICE: 0.729 ± 0.038


##### GP Regression

In [None]:
r2_score_gpr = gp_regressor(scaled_input, y, imp='PCA MICE')

Fold 1:




  Fold R²: 0.779
Fold 2:




  Fold R²: 0.726
Fold 3:




  Fold R²: 0.772
Fold 4:




  Fold R²: 0.757
Fold 5:




  Fold R²: 0.743
Fold 6:




  Fold R²: 0.744
Fold 7:




  Fold R²: 0.718
Fold 8:




  Fold R²: 0.674
Fold 9:




  Fold R²: 0.734
Fold 10:




  Fold R²: 0.657


In [None]:
print(r2_score_gpr)


GPR 10-Fold CV R² Score PCA MICE: 0.730 ± 0.037


In [None]:
print(r2_score_gpr)


GPR 10-Fold CV R² Score Latent PCA MICE: 0.188 ± 0.042
