In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

In [2]:
df = pd.read_excel("Final_CI_CD_data.xlsx", sheet_name="CI")

In [3]:
df = df.drop(['Source','Rock Name', 'Magnesite (%)', 'Dolomite (%)', 'Gypsum (%)', 'Anhydrite (%)'], axis=1)

In [4]:
df = df.sample(frac=1, random_state=13).reset_index(drop=True)
df

Unnamed: 0,Mean Grain Size (mm),Rock Classification,Plagioclase feldspar (%),Alkali feldspar (%),Quartz (%),Calcite (%),Clay (%),Mica (%),Amphibole (%),Density (g/cm3),Porosity (%),E (GPa),v,UCS (MPa),CI (MPa)
0,3.500,Igneous,38.0,27.0,31.0,0,0.0,4.0,0.0,2.620,0.260,66.100,0.31,149.0,71.20
1,3.500,Igneous,38.0,27.0,31.0,0,0.0,4.0,0.0,2.620,0.260,66.100,0.31,139.8,66.00
2,3.500,Igneous,38.0,27.0,31.0,0,0.0,4.0,0.0,2.620,0.260,66.100,0.31,152.3,59.10
3,3.000,Igneous,34.0,23.0,36.0,0,0.0,6.0,0.0,2.650,0.400,72.600,0.26,256.1,126.50
4,0.002,Sedimentary,0.0,4.0,15.0,15,60.0,0.0,0.0,2.150,18.000,2.134,0.16,8.7,2.14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1339,2.000,Metamorphic,17.0,11.0,34.0,0,0.0,26.0,0.0,2.724,0.183,68.070,0.23,123.0,51.00
1340,2.500,Igneous,34.0,25.0,22.0,0,0.0,12.0,7.0,2.640,0.960,51.700,0.19,153.3,58.68
1341,1.500,Igneous,73.0,8.0,19.0,0,0.0,0.0,0.0,2.740,0.400,73.000,0.29,294.0,157.00
1342,1.750,Igneous,44.0,20.0,22.0,0,0.0,9.0,0.0,2.685,0.240,73.000,0.12,181.0,86.00


In [5]:

def one_hot_encode(df, column_name):
    """
    Perform one-hot encoding on a specified column of a Pandas DataFrame.
    
    Parameters:
    -----------
    df : pandas DataFrame
        The DataFrame to be encoded.
    column_name : str
        The name of the column to be encoded.
    
    Returns:
    --------
    pandas DataFrame
        The encoded DataFrame.
    """
    # Create a new DataFrame with the one-hot encoded columns
    encoded_cols = pd.get_dummies(df[column_name], prefix=column_name)
    
    # Concatenate the original DataFrame with the encoded columns
    df_encoded = pd.concat([df, encoded_cols], axis=1)
    
    # Drop the original categorical column
    df_encoded.drop(column_name, axis=1, inplace=True)
    
    return df_encoded

In [6]:
df = one_hot_encode(df,"Rock Classification")

In [7]:
def drop_all_zero_columns(df):
    """
    Drop all columns from a Pandas DataFrame that contain only zeros.
    
    Parameters:
    -----------
    df : pandas DataFrame
        The DataFrame to be processed.
    
    Returns:
    --------
    pandas DataFrame
        The processed DataFrame with zero columns removed.
    """
    # Get a boolean mask indicating which columns have all zero values
    all_zeros_mask = (df == 0).all(axis=0)
    
    # Get the names of the columns to drop
    zero_cols = df.columns[all_zeros_mask]
    
    # Drop the zero columns from the DataFrame
    df_dropped = df.drop(zero_cols, axis=1)
    
    return df_dropped

In [8]:
df = drop_all_zero_columns(df)

In [9]:
num_columns = len(df.columns)
last_three_columns = df.iloc[:, num_columns - 3:]
first_columns = df.iloc[:, :num_columns - 3]

# Concatenate the last three columns with the first columns
df = pd.concat([last_three_columns, first_columns], axis=1)
new_column_names = ['IGN', 'MET', 'SED']
df = df.rename(columns=dict(zip(df.columns[:3], new_column_names)))
df

Unnamed: 0,IGN,MET,SED,Mean Grain Size (mm),Plagioclase feldspar (%),Alkali feldspar (%),Quartz (%),Calcite (%),Clay (%),Mica (%),Amphibole (%),Density (g/cm3),Porosity (%),E (GPa),v,UCS (MPa),CI (MPa)
0,1,0,0,3.500,38.0,27.0,31.0,0,0.0,4.0,0.0,2.620,0.260,66.100,0.31,149.0,71.20
1,1,0,0,3.500,38.0,27.0,31.0,0,0.0,4.0,0.0,2.620,0.260,66.100,0.31,139.8,66.00
2,1,0,0,3.500,38.0,27.0,31.0,0,0.0,4.0,0.0,2.620,0.260,66.100,0.31,152.3,59.10
3,1,0,0,3.000,34.0,23.0,36.0,0,0.0,6.0,0.0,2.650,0.400,72.600,0.26,256.1,126.50
4,0,0,1,0.002,0.0,4.0,15.0,15,60.0,0.0,0.0,2.150,18.000,2.134,0.16,8.7,2.14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1339,0,1,0,2.000,17.0,11.0,34.0,0,0.0,26.0,0.0,2.724,0.183,68.070,0.23,123.0,51.00
1340,1,0,0,2.500,34.0,25.0,22.0,0,0.0,12.0,7.0,2.640,0.960,51.700,0.19,153.3,58.68
1341,1,0,0,1.500,73.0,8.0,19.0,0,0.0,0.0,0.0,2.740,0.400,73.000,0.29,294.0,157.00
1342,1,0,0,1.750,44.0,20.0,22.0,0,0.0,9.0,0.0,2.685,0.240,73.000,0.12,181.0,86.00


In [10]:
def normalize_numeric_columns(df, dependent_col):
    """
    Normalize all numerical columns in a Pandas DataFrame, except for a specified dependent column.
    
    Parameters:
    -----------
    df : pandas DataFrame
        The DataFrame to be normalized.
    dependent_col : str
        The name of the dependent column (i.e., the column to be predicted) that should not be normalized.
    
    Returns:
    --------
    pandas DataFrame
        The normalized DataFrame.
    """
    # Get the names of all numerical columns except for the dependent column
    numeric_cols = [col for col in df.columns if col != dependent_col and pd.api.types.is_numeric_dtype(df[col])]
    
    # Create a scaler object to normalize the data
    scaler = MinMaxScaler()
    
    # Normalize the numerical columns
    df[numeric_cols] = scaler.fit_transform(df[numeric_cols])
    
    return df

In [11]:
df = normalize_numeric_columns(df, "CI_MPa")

In [12]:
df.head()

Unnamed: 0,IGN,MET,SED,Mean Grain Size (mm),Plagioclase feldspar (%),Alkali feldspar (%),Quartz (%),Calcite (%),Clay (%),Mica (%),Amphibole (%),Density (g/cm3),Porosity (%),E (GPa),v,UCS (MPa),CI (MPa)
0,1.0,0.0,0.0,0.174917,0.520548,0.465517,0.326316,0.0,0.0,0.1,0.0,0.67033,0.007667,0.723293,0.642857,0.393293,0.369673
1,1.0,0.0,0.0,0.174917,0.520548,0.465517,0.326316,0.0,0.0,0.1,0.0,0.67033,0.007667,0.723293,0.642857,0.368151,0.342313
2,1.0,0.0,0.0,0.174917,0.520548,0.465517,0.326316,0.0,0.0,0.1,0.0,0.67033,0.007667,0.723293,0.642857,0.402312,0.306009
3,1.0,0.0,0.0,0.149915,0.465753,0.396552,0.378947,0.0,0.0,0.15,0.0,0.703297,0.013171,0.795526,0.52381,0.685988,0.660633
4,0.0,0.0,1.0,0.0,0.0,0.068966,0.157895,0.15,0.84507,0.0,0.0,0.153846,0.705131,0.012457,0.285714,0.009866,0.006314


In [13]:
df

Unnamed: 0,IGN,MET,SED,Mean Grain Size (mm),Plagioclase feldspar (%),Alkali feldspar (%),Quartz (%),Calcite (%),Clay (%),Mica (%),Amphibole (%),Density (g/cm3),Porosity (%),E (GPa),v,UCS (MPa),CI (MPa)
0,1.0,0.0,0.0,0.174917,0.520548,0.465517,0.326316,0.00,0.00000,0.100,0.0000,0.670330,0.007667,0.723293,0.642857,0.393293,0.369673
1,1.0,0.0,0.0,0.174917,0.520548,0.465517,0.326316,0.00,0.00000,0.100,0.0000,0.670330,0.007667,0.723293,0.642857,0.368151,0.342313
2,1.0,0.0,0.0,0.174917,0.520548,0.465517,0.326316,0.00,0.00000,0.100,0.0000,0.670330,0.007667,0.723293,0.642857,0.402312,0.306009
3,1.0,0.0,0.0,0.149915,0.465753,0.396552,0.378947,0.00,0.00000,0.150,0.0000,0.703297,0.013171,0.795526,0.523810,0.685988,0.660633
4,0.0,0.0,1.0,0.000000,0.000000,0.068966,0.157895,0.15,0.84507,0.000,0.0000,0.153846,0.705131,0.012457,0.285714,0.009866,0.006314
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1339,0.0,1.0,0.0,0.099910,0.232877,0.189655,0.357895,0.00,0.00000,0.650,0.0000,0.784615,0.004639,0.745185,0.452381,0.322238,0.263391
1340,1.0,0.0,0.0,0.124912,0.465753,0.431034,0.231579,0.00,0.00000,0.300,0.0875,0.692308,0.035188,0.563270,0.357143,0.405045,0.303799
1341,1.0,0.0,0.0,0.074907,1.000000,0.137931,0.200000,0.00,0.00000,0.000,0.0000,0.802198,0.013171,0.799971,0.595238,0.789566,0.821109
1342,1.0,0.0,0.0,0.087409,0.602740,0.344828,0.231579,0.00,0.00000,0.225,0.0000,0.741758,0.006880,0.799971,0.190476,0.480747,0.447543


In [14]:
len(df.columns)

17

In [15]:
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
lr = LinearRegression()
ridge = Ridge()
lasso = Lasso()
en = ElasticNet()
svr = SVR()
dtr = DecisionTreeRegressor()
rfr = RandomForestRegressor()
abr = AdaBoostRegressor()
gbr = GradientBoostingRegressor()
xgbr = XGBRegressor()

In [16]:
def apply_pca(df, dependent_col, num_components):
    """
    Apply PCA on all columns in a Pandas DataFrame, except for a specified dependent column.
    
    Parameters:
    -----------
    df : pandas DataFrame
        The DataFrame to be transformed.
    dependent_col : str
        The name of the dependent column (i.e., the column to be predicted) that should not be transformed.
    num_components : int
        The number of principal components to retain after transformation.
    
    Returns:
    --------
    pandas DataFrame
        The transformed DataFrame.
    """
    # Get the names of all columns except for the dependent column
    cols_to_transform = [col for col in df.columns if col != dependent_col]
    
    # Create a PCA object
    pca = PCA(n_components=num_components)
    
    # Apply PCA to the selected columns
    transformed = pca.fit_transform(df[cols_to_transform])
    
    # Create a new DataFrame with the transformed columns
    new_cols = [f'PC{i+1}' for i in range(num_components)]
    transformed_df = pd.DataFrame(transformed, columns=new_cols, index=df.index)
    
    # Add the dependent column back to the transformed DataFrame
    transformed_df[dependent_col] = df[dependent_col]
    
    return transformed_df

In [17]:
df

Unnamed: 0,IGN,MET,SED,Mean Grain Size (mm),Plagioclase feldspar (%),Alkali feldspar (%),Quartz (%),Calcite (%),Clay (%),Mica (%),Amphibole (%),Density (g/cm3),Porosity (%),E (GPa),v,UCS (MPa),CI (MPa)
0,1.0,0.0,0.0,0.174917,0.520548,0.465517,0.326316,0.00,0.00000,0.100,0.0000,0.670330,0.007667,0.723293,0.642857,0.393293,0.369673
1,1.0,0.0,0.0,0.174917,0.520548,0.465517,0.326316,0.00,0.00000,0.100,0.0000,0.670330,0.007667,0.723293,0.642857,0.368151,0.342313
2,1.0,0.0,0.0,0.174917,0.520548,0.465517,0.326316,0.00,0.00000,0.100,0.0000,0.670330,0.007667,0.723293,0.642857,0.402312,0.306009
3,1.0,0.0,0.0,0.149915,0.465753,0.396552,0.378947,0.00,0.00000,0.150,0.0000,0.703297,0.013171,0.795526,0.523810,0.685988,0.660633
4,0.0,0.0,1.0,0.000000,0.000000,0.068966,0.157895,0.15,0.84507,0.000,0.0000,0.153846,0.705131,0.012457,0.285714,0.009866,0.006314
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1339,0.0,1.0,0.0,0.099910,0.232877,0.189655,0.357895,0.00,0.00000,0.650,0.0000,0.784615,0.004639,0.745185,0.452381,0.322238,0.263391
1340,1.0,0.0,0.0,0.124912,0.465753,0.431034,0.231579,0.00,0.00000,0.300,0.0875,0.692308,0.035188,0.563270,0.357143,0.405045,0.303799
1341,1.0,0.0,0.0,0.074907,1.000000,0.137931,0.200000,0.00,0.00000,0.000,0.0000,0.802198,0.013171,0.799971,0.595238,0.789566,0.821109
1342,1.0,0.0,0.0,0.087409,0.602740,0.344828,0.231579,0.00,0.00000,0.225,0.0000,0.741758,0.006880,0.799971,0.190476,0.480747,0.447543


In [18]:
X_train, y_train  = df.drop("CI (MPa)", axis=1), df["CI (MPa)"]
X_train

Unnamed: 0,IGN,MET,SED,Mean Grain Size (mm),Plagioclase feldspar (%),Alkali feldspar (%),Quartz (%),Calcite (%),Clay (%),Mica (%),Amphibole (%),Density (g/cm3),Porosity (%),E (GPa),v,UCS (MPa)
0,1.0,0.0,0.0,0.174917,0.520548,0.465517,0.326316,0.00,0.00000,0.100,0.0000,0.670330,0.007667,0.723293,0.642857,0.393293
1,1.0,0.0,0.0,0.174917,0.520548,0.465517,0.326316,0.00,0.00000,0.100,0.0000,0.670330,0.007667,0.723293,0.642857,0.368151
2,1.0,0.0,0.0,0.174917,0.520548,0.465517,0.326316,0.00,0.00000,0.100,0.0000,0.670330,0.007667,0.723293,0.642857,0.402312
3,1.0,0.0,0.0,0.149915,0.465753,0.396552,0.378947,0.00,0.00000,0.150,0.0000,0.703297,0.013171,0.795526,0.523810,0.685988
4,0.0,0.0,1.0,0.000000,0.000000,0.068966,0.157895,0.15,0.84507,0.000,0.0000,0.153846,0.705131,0.012457,0.285714,0.009866
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1339,0.0,1.0,0.0,0.099910,0.232877,0.189655,0.357895,0.00,0.00000,0.650,0.0000,0.784615,0.004639,0.745185,0.452381,0.322238
1340,1.0,0.0,0.0,0.124912,0.465753,0.431034,0.231579,0.00,0.00000,0.300,0.0875,0.692308,0.035188,0.563270,0.357143,0.405045
1341,1.0,0.0,0.0,0.074907,1.000000,0.137931,0.200000,0.00,0.00000,0.000,0.0000,0.802198,0.013171,0.799971,0.595238,0.789566
1342,1.0,0.0,0.0,0.087409,0.602740,0.344828,0.231579,0.00,0.00000,0.225,0.0000,0.741758,0.006880,0.799971,0.190476,0.480747


In [19]:
grid_lr = GridSearchCV(estimator=lr, param_grid={"fit_intercept" : [True, False],
                                                 }, 
                                                 scoring="r2",
                                                 cv=4)

In [20]:
grid_lr.fit(X_train, y_train)
grid_lr.best_params_, grid_lr.best_score_

({'fit_intercept': True}, 0.8794990521901173)

In [21]:
grid_ridge = GridSearchCV(estimator=ridge, param_grid={"fit_intercept" : [True, False],
                                                        'alpha': np.logspace(-4, 4, 9),
                                                        'solver': ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']},
                                                        scoring="r2",
                                                        cv=4)

In [22]:
grid_ridge.fit(X_train, y_train)
grid_ridge.best_params_, grid_ridge.best_score_

({'alpha': 0.1, 'fit_intercept': False, 'solver': 'lsqr'}, 0.8796017821080477)

In [23]:
grid_lasso = GridSearchCV(estimator=lasso, param_grid={"fit_intercept" : [True, False],
                                                        'alpha': np.logspace(-3, 3, 7),
                                                        'tol': [1e-5, 1e-4, 1e-3]}, 
                                                        scoring="r2",
                                                        cv=4)

In [24]:
grid_lasso.fit(X_train, y_train)
grid_lasso.best_params_, grid_lasso.best_score_

({'alpha': 0.001, 'fit_intercept': False, 'tol': 0.0001}, 0.8768459510030088)

In [25]:
grid_en = GridSearchCV(estimator=en, param_grid={"fit_intercept" : [True, False],
                                                        'alpha': np.logspace(-4, 0, 50), # range of values for l1_ratio
                                                        'tol': [1e-3, 1e-4, 1e-5]}, # range of values for tol, 
                                                        scoring="r2",
                                                        cv=4)

In [26]:
grid_en.fit(X_train, y_train)
grid_en.best_params_, grid_en.best_score_

({'alpha': 0.00014563484775012445, 'fit_intercept': False, 'tol': 0.001},
 0.8798018874836339)

In [27]:
grid_svr = GridSearchCV(estimator=svr, param_grid={"kernel":['linear', 'poly', 'rbf', 'sigmoid'],
                                                    }, 
                                                        scoring="r2",
                                                        cv=4)

In [28]:
grid_svr.fit(X_train, y_train)
grid_svr.best_params_, grid_svr.best_score_

({'kernel': 'linear'}, 0.8674562636876919)

In [29]:
grid_dtr = GridSearchCV(estimator=dtr, 
                        param_grid={"criterion":['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],
                                    "splitter": ["best", "random"],
                                    "max_depth": [3, 5, 7, 10, 15, 25]}, 
                        
                        scoring="r2",
                        cv=4)

In [30]:
grid_dtr.fit(X_train, y_train)
grid_dtr.best_params_, grid_dtr.best_score_

Traceback (most recent call last):
  File "c:\Users\Abhinesh\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\Abhinesh\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 1252, in fit
    super().fit(
  File "c:\Users\Abhinesh\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 351, in fit
    criterion = CRITERIA_REG[self.criterion](self.n_outputs_,
KeyError: 'squared_error'

Traceback (most recent call last):
  File "c:\Users\Abhinesh\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\Abhinesh\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 1252, in fit
    super().fit(
  File "c:\Users\Abhinesh\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 351, in fit
    criterion = CRITERIA_REG[self.criterion](self.n_outputs_,
KeyEr

({'criterion': 'friedman_mse', 'max_depth': 5, 'splitter': 'best'},
 0.8619399855568366)

In [31]:
grid_rfr = GridSearchCV(estimator=rfr, 
                        param_grid={"criterion":['squared_error', 'absolute_error', 'poisson'],
                                    "max_depth": [3, 5, 7, 10, 15, 25],
                                     },
                        scoring="r2",
                        cv=4)

In [32]:
grid_rfr.fit(X_train, y_train)
grid_rfr.best_params_, grid_rfr.best_score_

Traceback (most recent call last):
  File "c:\Users\Abhinesh\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\Abhinesh\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 387, in fit
    trees = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
  File "c:\Users\Abhinesh\anaconda3\lib\site-packages\joblib\parallel.py", line 1043, in __call__
    if self.dispatch_one_batch(iterator):
  File "c:\Users\Abhinesh\anaconda3\lib\site-packages\joblib\parallel.py", line 861, in dispatch_one_batch
    self._dispatch(tasks)
  File "c:\Users\Abhinesh\anaconda3\lib\site-packages\joblib\parallel.py", line 779, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File "c:\Users\Abhinesh\anaconda3\lib\site-packages\joblib\_parallel_backends.py", line 208, in apply_async
    result = ImmediateResult(func)
  File "c:\Users\Abhinesh\anaconda3\lib\site-packages\jo

({'criterion': 'poisson', 'max_depth': 25}, 0.498797772444761)

In [33]:
grid_abr = GridSearchCV(estimator=abr, 
                        param_grid={"loss":['linear', 'square', 'exponential'],
                                     }, 
                        verbose=1, 
                        scoring="r2",
                        cv=4)

In [34]:
grid_abr.fit(X_train, y_train)
grid_abr.best_params_, grid_abr.best_score_

Fitting 4 folds for each of 3 candidates, totalling 12 fits


({'loss': 'exponential'}, 0.8615726728361159)

In [35]:
grid_gbr = GridSearchCV(estimator=gbr, 
                        param_grid={"loss":['squared_error', 'absolute_error', 'huber', 'quantile'],
                                    "criterion":['friedman_mse', 'squared_error', 'mse']}, 
                        scoring="r2",
                        cv=4)

In [36]:
grid_gbr.fit(X_train, y_train)
grid_gbr.best_params_, grid_gbr.best_score_

Traceback (most recent call last):
  File "c:\Users\Abhinesh\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\Abhinesh\anaconda3\lib\site-packages\sklearn\ensemble\_gb.py", line 448, in fit
    self._check_params()
  File "c:\Users\Abhinesh\anaconda3\lib\site-packages\sklearn\ensemble\_gb.py", line 239, in _check_params
    raise ValueError("Loss '{0:s}' not supported. ".format(self.loss))
ValueError: Loss 'squared_error' not supported. 

Traceback (most recent call last):
  File "c:\Users\Abhinesh\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\Abhinesh\anaconda3\lib\site-packages\sklearn\ensemble\_gb.py", line 448, in fit
    self._check_params()
  File "c:\Users\Abhinesh\anaconda3\lib\site-packages\sklearn\ensemble\_gb.py", line 239, in _check_params
    r

({'criterion': 'friedman_mse', 'loss': 'huber'}, 0.8948614290937792)

In [37]:
grid_xgbr = GridSearchCV(estimator=xgbr, 
                        param_grid={
                                    "max_depth": [3, 5, 7, 10, 15, 25],
                                     'learning_rate': [0.1, 0.01]}, 
                        verbose=1, 
                        scoring="r2",
                        cv=4)

In [38]:
grid_xgbr.fit(X_train, y_train)
grid_xgbr.best_params_, grid_xgbr.best_score_

Fitting 4 folds for each of 12 candidates, totalling 48 fits


({'learning_rate': 0.1, 'max_depth': 3}, 0.8929925075970766)

| ML algorithm | R2_score |
| -------- | -------- |
| Linear Regression | 82.43 |
| Ridge Regression | 83.58 |
| Lasso Regression | 84.19 |
| ElaasticNet Regression | 83.82 |
| Support Vector Regression | 74.76 |
| Decision Tree Regression | 77.27 |
| Random Forest Regression | 41.05 |
| AdaBoost Regression | 80.05 |
| GradientBoost Regression| 80.63 |
| XgBoost Regression | 80.81 |

