In [66]:
from sklearn.ensemble import GradientBoostingRegressor
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_validate

In [67]:
grd = pd.read_csv("../data/graphene_data_final.csv")

In [68]:
X, Y = grd[['Graphene_percentage', 'FEED', 'RPM', 'DOC']], grd['MRR_gm_per_sec']
Y2 = grd['Ra']

In [69]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=39)
X_train2,X_test2,Y2_train, Y2_test = train_test_split(X, Y2, test_size=0.3, random_state=23)

Let's create, train and test a basic gradient boosting model.

In [70]:
gbr = GradientBoostingRegressor(random_state=0)
gbr.fit(X_train, Y_train)
gbr.score(X_test,Y_test)

0.8640911104913794

In [71]:
gbr2 = GradientBoostingRegressor(random_state=51)
gbr2.fit(X_train2, Y2_train)
gbr2.score(X_test2,Y2_test)

0.5422137881583955

In [72]:
gbr_cv = GradientBoostingRegressor(random_state=7)

In [73]:
param_grid = { 
    'n_estimators': [10,20,30,50,100],
    'max_depth' : [1,2,3,4,6,8],
    'min_samples_leaf' : [1,2,3,4],
    'min_samples_split' : [2,3,4]
}

In [74]:
CV_gbr = GridSearchCV(estimator=gbr_cv, param_grid=param_grid, cv= 3)
CV_gbr.fit(X_train, Y_train)

GridSearchCV(cv=3, estimator=GradientBoostingRegressor(random_state=7),
             param_grid={'max_depth': [1, 2, 3, 4, 6, 8],
                         'min_samples_leaf': [1, 2, 3, 4],
                         'min_samples_split': [2, 3, 4],
                         'n_estimators': [10, 20, 30, 50, 100]})

In [75]:
CV_gbr.best_params_

{'max_depth': 4,
 'min_samples_leaf': 4,
 'min_samples_split': 2,
 'n_estimators': 30}

In [76]:
CV_gbr.score(X_test,Y_test)

0.9067852421364127

In [77]:
gbr_cv2 = GradientBoostingRegressor(random_state=19)

In [78]:
CV_gbr2 = GridSearchCV(estimator=gbr_cv2, param_grid=param_grid, cv= 3)
CV_gbr2.fit(X_train2, Y2_train)


GridSearchCV(cv=3, estimator=GradientBoostingRegressor(random_state=19),
             param_grid={'max_depth': [1, 2, 3, 4, 6, 8],
                         'min_samples_leaf': [1, 2, 3, 4],
                         'min_samples_split': [2, 3, 4],
                         'n_estimators': [10, 20, 30, 50, 100]})

In [79]:
print(CV_gbr2.score(X_test2,Y2_test))

0.3413523637970435


In [80]:
CV_gbr2.best_params_

{'max_depth': 2,
 'min_samples_leaf': 2,
 'min_samples_split': 2,
 'n_estimators': 30}

Now let's save the best models for further usage.

In [81]:
import pickle
with open('../trained_models/gradient_boosting_MRR.pkl','wb') as f:
    pickle.dump(CV_gbr,f)
with open('../trained_models/gradient_boosting_RA.pkl','wb') as f:
    pickle.dump(gbr2,f)