## Train and fine-tune a Decision Tree for the moons dataset

In [1]:
from sklearn.datasets import make_moons

moons = make_moons(n_samples = 10000, noise= 0.4)
moons

(array([[ 0.46654404,  1.22651313],
        [ 0.58662884, -0.18575817],
        [ 0.7280187 ,  1.0420609 ],
        ...,
        [-0.17366043,  0.58585676],
        [ 0.86602821,  0.25804901],
        [-1.82357241, -0.03429702]]),
 array([0, 1, 0, ..., 0, 0, 0]))

In [2]:
moons[0]

array([[ 0.46654404,  1.22651313],
       [ 0.58662884, -0.18575817],
       [ 0.7280187 ,  1.0420609 ],
       ...,
       [-0.17366043,  0.58585676],
       [ 0.86602821,  0.25804901],
       [-1.82357241, -0.03429702]])

In [3]:
#Lets split the dataset in test and train datasets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(moons[0], moons[1])


In [5]:
#Now we'll do grid search cross validation to look for the best hyperparameters for a decision tree classifier
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier

param_grid = {'max_leaf_nodes':[2,5,7,10]}
DTreeClassifier = DecisionTreeClassifier()

grid_search = GridSearchCV(DTreeClassifier, param_grid)

grid_search.fit(X_train, y_train)


GridSearchCV(estimator=DecisionTreeClassifier(),
             param_grid={'max_leaf_nodes': [2, 5, 7, 10]})

In [6]:
from sklearn.metrics import mean_squared_error
import numpy as np

y_predicted = grid_search.predict(X_train)
mse = mean_squared_error(y_train, y_predicted)
np.sqrt(mse)


0.36239941133138354

In [7]:
from sklearn.metrics import accuracy_score

accuracy_score(y_train,y_predicted)

0.8686666666666667

## Now grow a forest

In [12]:
#We'll generate 1000 subsets first:
from sklearn.model_selection import ShuffleSplit

sp = ShuffleSplit(n_splits=1000, test_size= len(X_train)-100)
subsets = []

for train_index,test_index in sp.split(X_train):
    subset_X = X_train[train_index]
    subset_y = y_train[train_index]

    subsets.append((subset_X,subset_y))
