# Decision Tree Regressor

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn import tree
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import make_scorer

In [2]:
grd = pd.read_csv("../data/graphene_data_final.csv")

In [3]:
X, Y = grd[['Graphene_percentage', 'FEED', 'RPM', 'DOC']], grd['MRR_gm_per_sec']
Y2 = grd['Ra']

In [4]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.3, random_state=33)
X_train2, X_test2, Y2_train, Y2_test = train_test_split(X, Y2, test_size = 0.3, random_state=1)

In [5]:
print(tree.DecisionTreeRegressor().fit(X_train,Y_train).score(X_test,Y_test))

0.8594779567262018


In [6]:
param_grid = { 
    'min_samples_split': [2,4,8,12,16],
    'max_features': ['auto', 'sqrt', 'log2'],
    'min_samples_leaf': [2,4,6,8],
    'max_depth' : [1,3,5,7,9,11,13,15],
}

In [7]:
dtr_cv = tree.DecisionTreeRegressor(random_state=16)

In [8]:
CV_dtr = GridSearchCV(estimator=dtr_cv, param_grid=param_grid, cv= 5)
CV_dtr.fit(X_train, Y_train)

GridSearchCV(cv=5, estimator=DecisionTreeRegressor(random_state=16),
             param_grid={'max_depth': [1, 3, 5, 7, 9, 11, 13, 15],
                         'max_features': ['auto', 'sqrt', 'log2'],
                         'min_samples_leaf': [2, 4, 6, 8],
                         'min_samples_split': [2, 4, 8, 12, 16]})

In [9]:
CV_dtr.score(X_test, Y_test)

0.8730574949203294

In [10]:
CV_dtr.best_params_

{'max_depth': 7,
 'max_features': 'auto',
 'min_samples_leaf': 2,
 'min_samples_split': 8}

In [11]:
dtr_cv2 = tree.DecisionTreeRegressor(random_state=6)

In [12]:
CV_dtr2 = GridSearchCV(estimator=dtr_cv2, param_grid=param_grid, cv= 5)
CV_dtr2.fit(X_train2, Y2_train)

GridSearchCV(cv=5, estimator=DecisionTreeRegressor(random_state=6),
             param_grid={'max_depth': [1, 3, 5, 7, 9, 11, 13, 15],
                         'max_features': ['auto', 'sqrt', 'log2'],
                         'min_samples_leaf': [2, 4, 6, 8],
                         'min_samples_split': [2, 4, 8, 12, 16]})

In [13]:
CV_dtr2.score(X_test2, Y2_test)

0.34724831489740293

Now let's save the best performing decision tree model as a "Pickle" file.

In [14]:
import pickle
with open('../trained_models/decision_tree_MRR.pkl','wb') as f:
    pickle.dump(CV_dtr,f)
with open('../trained_models/decision_tree_RA.pkl','wb') as f:
    pickle.dump(CV_dtr2,f)