In [1]:
import numpy as np
import pandas as pd
from sklearn import tree
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import auc
from sklearn.metrics import roc_curve
from sklearn.metrics import precision_recall_curve
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.datasets import make_regression
from sklearn.datasets import make_classification
from sklearn.datasets import make_gaussian_quantiles
from sklearn import ensemble

In [3]:
X,Y = make_regression(n_samples = 10000, n_features = 12, 
                     n_informative = 10)
print(X.shape)
print(Y.shape)

(10000, 12)
(10000,)


In [4]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size = 0.3)

In [5]:
score = np.array([])
for j in np.arange(20) + 2:
    mod_reg = tree.DecisionTreeRegressor(max_depth = j)
    mod_cv = cross_val_score(mod_reg, X_train,Y_train, 
                             cv = 10)
    score = np.append(score, np.mean(mod_cv))
    print("j->", j, "Mean->", np.mean(mod_cv))
param = np.argmax(score) + 2
param

j-> 2 Mean-> 0.27177322577802987
j-> 3 Mean-> 0.3545513125152174
j-> 4 Mean-> 0.43534273196589607
j-> 5 Mean-> 0.4935397764702021
j-> 6 Mean-> 0.5528544508662427
j-> 7 Mean-> 0.5963005771995353
j-> 8 Mean-> 0.6217378405144522
j-> 9 Mean-> 0.6352589144617788
j-> 10 Mean-> 0.6412020395547474
j-> 11 Mean-> 0.6343486016559993
j-> 12 Mean-> 0.6301854201667871
j-> 13 Mean-> 0.6310461156325143
j-> 14 Mean-> 0.6253308268155002
j-> 15 Mean-> 0.6195400223026322
j-> 16 Mean-> 0.6226722550187251
j-> 17 Mean-> 0.6195321597055081
j-> 18 Mean-> 0.6182976480923632
j-> 19 Mean-> 0.619591707257665
j-> 20 Mean-> 0.6228404424341974
j-> 21 Mean-> 0.6214266270130198


10

In [6]:
mod_reg = tree.DecisionTreeRegressor(max_depth = param)
mod_reg_fit = mod_reg.fit(X_train,Y_train)
error = mod_reg_fit.score(X_test,Y_test)
print(param)
print("Error - > ", error)

#mod = tree.DecisionTreeRegressor(max_depth = 3)
#mod.fit(X_train, Y_train)
#plt.figure(figsize = (12,12))
#tree.plot_tree(mod)
#plt.show()  

10
Error - >  0.6424933638929469


In [8]:
score = np.array([])
for j in np.arange(8) + 2:
    mod_reg = ensemble.GradientBoostingRegressor(max_depth = j)
    mod_cv = cross_val_score(mod_reg, X_train,Y_train, 
                             cv = 10)
    score = np.append(score, np.mean(mod_cv))
    print("j->", j, "Mean->", np.mean(mod_cv))
param = np.argmax(score) + 2
param

j-> 2 Mean-> 0.8940904342527383
j-> 3 Mean-> 0.9404597840133295
j-> 4 Mean-> 0.955429789898959
j-> 5 Mean-> 0.9584149428646699
j-> 6 Mean-> 0.9567010887441063
j-> 7 Mean-> 0.9484117841208087
j-> 8 Mean-> 0.936649266885099
j-> 9 Mean-> 0.9189638391211077


5

In [9]:
mod_reg_boost = ensemble.GradientBoostingRegressor(max_depth = param)
mod_reg_boost_fit = mod_reg_boost.fit(X_train,Y_train)
error = mod_reg_boost_fit.score(X_test,Y_test)
print(param)
print("Error - > ", error)

5
Error - >  0.961118616819794


In [11]:
score = np.array([])
for j in np.arange(20) + 2:
    mod_reg = ensemble.RandomForestRegressor(max_depth = j,
                                             max_features = 3,
                                             n_estimators = 100)
    mod_cv = cross_val_score(mod_reg, X_train,Y_train, 
                             cv = 10)
    score = np.append(score, np.mean(mod_cv))
    print("j->", j, "Mean->", np.mean(mod_cv))
param = np.argmax(score) + 2
param

j-> 2 Mean-> 0.3349416688896757
j-> 3 Mean-> 0.45100294282838005
j-> 4 Mean-> 0.5517211056459225
j-> 5 Mean-> 0.6317644906985473
j-> 6 Mean-> 0.6935739511343975
j-> 7 Mean-> 0.7419641638018034
j-> 8 Mean-> 0.7823404927226544
j-> 9 Mean-> 0.8107301261419682
j-> 10 Mean-> 0.8315743094322261
j-> 11 Mean-> 0.8440575162168299
j-> 12 Mean-> 0.8534505046275974
j-> 13 Mean-> 0.859356510863433
j-> 14 Mean-> 0.8616547276507907
j-> 15 Mean-> 0.8649673270933059
j-> 16 Mean-> 0.866306543017974
j-> 17 Mean-> 0.8662852719010286
j-> 18 Mean-> 0.865629398960244
j-> 19 Mean-> 0.8658695875275821
j-> 20 Mean-> 0.8670995260021035
j-> 21 Mean-> 0.8661698071670056


20

In [12]:
mod_reg_rf = ensemble.RandomForestRegressor(max_depth = param)
mod_reg_rf_fit = mod_reg_rf.fit(X_train,Y_train)
error = mod_reg_rf_fit.score(X_test,Y_test)
print(param)
print("Error - > ", error)



20
Error - >  0.8528288201366262


In [14]:
score = np.array([])
for j in np.arange(20) + 2:
    mod_reg = ensemble.AdaBoostRegressor(tree.DecisionTreeRegressor(max_depth = j), 
                                         n_estimators = 200)
    mod_cv = cross_val_score(mod_reg, X_train,Y_train, 
                             cv = 10)
    score = np.append(score, np.mean(mod_cv))
    print("j->", j, "Mean->", np.mean(mod_cv))
param = np.argmax(score) + 2
param

j-> 2 Mean-> 0.7675638901816279
j-> 3 Mean-> 0.8413892594606013
j-> 4 Mean-> 0.8594042109924718
j-> 5 Mean-> 0.8679541570268793
j-> 6 Mean-> 0.8719972733033021
j-> 7 Mean-> 0.8761190414996776
j-> 8 Mean-> 0.8768083539350512
j-> 9 Mean-> 0.8773345949442881


KeyboardInterrupt: 

In [None]:
mod_reg_ada = ensemble.AdaBoostRegressor(tree.DecisionTreeRegressor(max_depth = param), 
                                         n_estimators = 200)
mod_reg_ada_fit = mod_reg_ada.fit(X_train,Y_train)
error = mod_reg_ada_fit.score(X_test,Y_test)
print(param)
print("Error - > ", error)