In [7]:
from xgboost import XGBRegressor
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [4]:
data = pd.read_csv('concrete_data.csv')

data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [8]:
X = data.iloc[:,:8].values
Y = data.iloc[:,8].values.reshape(-1,1)

In [9]:
print(np.shape(X))
print(np.shape(Y))

(1030, 8)
(1030, 1)


In [10]:
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=2021)

In [11]:
search_space ={
    "n_estimators" : [100,200,500],
    "max_depth" : [3,6,9],
    "gamma" : [0.01,0.1],
    "learning_rate" : [0.001,0.01,0.1,1]
}

In [13]:
xgb_model = XGBRegressor(random_state=2021)

In [14]:
from sklearn.model_selection import GridSearchCV

GS = GridSearchCV(estimator =xgb_model,
                  param_grid =search_space,
                  scoring =["r2",'neg_root_mean_squared_error'],
                  refit = "r2",
                  cv = 5,
                  verbose = 4)

In [15]:
GS.fit(X_train,Y_train)

Fitting 5 folds for each of 72 candidates, totalling 360 fits
[CV 1/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100; neg_root_mean_squared_error: (test=-35.192) r2: (test=-3.685) total time=   0.2s
[CV 2/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100; neg_root_mean_squared_error: (test=-36.463) r2: (test=-4.022) total time=   0.1s
[CV 3/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100; neg_root_mean_squared_error: (test=-35.801) r2: (test=-3.374) total time=   0.1s
[CV 4/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100; neg_root_mean_squared_error: (test=-35.460) r2: (test=-3.748) total time=   0.1s
[CV 5/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100; neg_root_mean_squared_error: (test=-37.090) r2: (test=-3.535) total time=   0.1s
[CV 1/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=200; neg_root_mean_squared_error: (test=-32.121) r2: (test=-2.903) total tim

GridSearchCV(cv=5, estimator=XGBRegressor(random_state=2021),
             param_grid={'gamma': [0.01, 0.1],
                         'learning_rate': [0.001, 0.01, 0.1, 1],
                         'max_depth': [3, 6, 9],
                         'n_estimators': [100, 200, 500]},
             refit='r2', scoring=['r2', 'neg_root_mean_squared_error'],
             verbose=4)

In [16]:
print(GS.best_estimator_)

XGBRegressor(gamma=0.1, max_depth=6, n_estimators=500, random_state=2021)


In [17]:
print(GS.best_params_)

{'gamma': 0.1, 'learning_rate': 0.1, 'max_depth': 6, 'n_estimators': 500}


In [18]:
print(GS.best_score_)

0.9228097087200642


In [21]:
df = pd.DataFrame(GS.cv_results_)
df=df.sort_values("rank_test_r2")
df.to_csv("cv_results.csv")

In [22]:
cv = pd.read_csv('cv_results.csv')

cv.head()

Unnamed: 0.1,Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_gamma,param_learning_rate,param_max_depth,param_n_estimators,params,...,std_test_r2,rank_test_r2,split0_test_neg_root_mean_squared_error,split1_test_neg_root_mean_squared_error,split2_test_neg_root_mean_squared_error,split3_test_neg_root_mean_squared_error,split4_test_neg_root_mean_squared_error,mean_test_neg_root_mean_squared_error,std_test_neg_root_mean_squared_error,rank_test_neg_root_mean_squared_error
0,59,0.484662,0.108356,0.005066,0.000534,0.1,0.1,6,500,"{'gamma': 0.1, 'learning_rate': 0.1, 'max_dept...",...,0.016755,1,-4.02197,-4.773389,-4.262009,-4.341917,-5.657795,-4.611416,0.576768,1
1,58,0.246718,0.050301,0.004058,0.000618,0.1,0.1,6,200,"{'gamma': 0.1, 'learning_rate': 0.1, 'max_dept...",...,0.017409,2,-4.00892,-4.839573,-4.259608,-4.350223,-5.682663,-4.628197,0.59215,3
2,23,0.596826,0.040072,0.007754,0.001126,0.01,0.1,6,500,"{'gamma': 0.01, 'learning_rate': 0.1, 'max_dep...",...,0.016501,3,-4.069086,-4.777762,-4.284857,-4.373111,-5.673246,-4.635612,0.567411,4
3,56,0.259902,0.035415,0.005051,0.001507,0.1,0.1,3,500,"{'gamma': 0.1, 'learning_rate': 0.1, 'max_dept...",...,0.02095,4,-3.669399,-4.782585,-4.284095,-4.571116,-5.82871,-4.627181,0.707958,2
4,22,0.262353,0.073951,0.003842,0.000586,0.01,0.1,6,200,"{'gamma': 0.01, 'learning_rate': 0.1, 'max_dep...",...,0.017017,5,-4.05795,-4.865263,-4.28617,-4.385986,-5.68354,-4.655782,0.577403,5
