# Practicing Decision Trees 

In [5]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv('../input/iris/Iris.csv')
df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [7]:
df.columns

Index(['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',
       'Species'],
      dtype='object')

In [8]:
df.isnull().sum()

Id               0
SepalLengthCm    0
SepalWidthCm     0
PetalLengthCm    0
PetalWidthCm     0
Species          0
dtype: int64

In [9]:
df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Id,150.0,75.5,43.445368,1.0,38.25,75.5,112.75,150.0
SepalLengthCm,150.0,5.843333,0.828066,4.3,5.1,5.8,6.4,7.9
SepalWidthCm,150.0,3.054,0.433594,2.0,2.8,3.0,3.3,4.4
PetalLengthCm,150.0,3.758667,1.76442,1.0,1.6,4.35,5.1,6.9
PetalWidthCm,150.0,1.198667,0.763161,0.1,0.3,1.3,1.8,2.5


### Define Predictor and Target Variables

In [46]:
X = df[['SepalLengthCm', 'SepalWidthCm',
        'PetalLengthCm', 'PetalWidthCm']]
y = df['Species']

### Import Decision Tree Classifier and Fit into Dataset

In [66]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree

dtree = DecisionTreeClassifier(criterion= 'gini', max_depth= 2,min_samples_leaf= 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state=42)

In [48]:
y_train = np.array(y_train).reshape(-1, 1)
y_train.shape

(100, 1)

In [49]:
y_test = np.array(y_test).reshape(-1, 1)
y_test.shape

(50, 1)

### Fit Decision Tree Model

In [67]:
dtree_train = dtree.fit(X_train, y_train)
dtree_pred = dtree_train.predict(X_test)

### Evaluate Model

In [68]:
from sklearn.metrics import classification_report
print(classification_report(dtree_pred, y_test))

                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        19
Iris-versicolor       1.00      0.94      0.97        16
 Iris-virginica       0.94      1.00      0.97        15

       accuracy                           0.98        50
      macro avg       0.98      0.98      0.98        50
   weighted avg       0.98      0.98      0.98        50



### Visualize the Tree

In [69]:
fig1 = plt.figure(figsize=(16,10))
tree.plot_tree(dtree_pred, filled=True, 
              feature_names=X.columns, fontsize=8)

TypeError: ['Iris-versicolor' 'Iris-setosa' 'Iris-virginica' 'Iris-versicolor'
 'Iris-versicolor' 'Iris-setosa' 'Iris-versicolor' 'Iris-virginica'
 'Iris-versicolor' 'Iris-versicolor' 'Iris-virginica' 'Iris-setosa'
 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-versicolor'
 'Iris-virginica' 'Iris-versicolor' 'Iris-versicolor' 'Iris-virginica'
 'Iris-setosa' 'Iris-virginica' 'Iris-setosa' 'Iris-virginica'
 'Iris-virginica' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica'
 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-versicolor'
 'Iris-setosa' 'Iris-setosa' 'Iris-virginica' 'Iris-versicolor'
 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-virginica'
 'Iris-versicolor' 'Iris-versicolor' 'Iris-setosa' 'Iris-setosa'
 'Iris-versicolor' 'Iris-versicolor' 'Iris-virginica' 'Iris-versicolor'
 'Iris-virginica'] is not an estimator instance.

<Figure size 1152x720 with 0 Axes>

# Decision Trees with Hyperparameter Tuning

### Defining X and y Variables

In [42]:
X = df[['SepalLengthCm', 'SepalWidthCm',
        'PetalLengthCm', 'PetalWidthCm']]
y = df['Species']

### Import Grid Search 

In [61]:
from sklearn.model_selection import GridSearchCV
clf = GridSearchCV(DecisionTreeClassifier(), {
    'max_depth': [1, 2],
    'criterion': ['gini', 'entropy'],
    'min_samples_leaf': [1, 2]
}, cv = 5)

clf.fit(X, y)
clf.cv_results_
df = pd.DataFrame(clf.cv_results_)
df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_depth,param_min_samples_leaf,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.002113,0.000471,0.001363,0.000291,gini,1,1,"{'criterion': 'gini', 'max_depth': 1, 'min_sam...",0.666667,0.666667,0.666667,0.666667,0.666667,0.666667,0.0,5
1,0.001587,4.4e-05,0.00111,2.1e-05,gini,1,2,"{'criterion': 'gini', 'max_depth': 1, 'min_sam...",0.666667,0.666667,0.666667,0.666667,0.666667,0.666667,0.0,5
2,0.001667,0.000117,0.001124,1.7e-05,gini,2,1,"{'criterion': 'gini', 'max_depth': 2, 'min_sam...",0.933333,0.966667,0.9,0.866667,1.0,0.933333,0.04714,1
3,0.001643,8e-05,0.001133,3e-05,gini,2,2,"{'criterion': 'gini', 'max_depth': 2, 'min_sam...",0.933333,0.966667,0.9,0.866667,1.0,0.933333,0.04714,1
4,0.001722,0.000126,0.001174,5.9e-05,entropy,1,1,"{'criterion': 'entropy', 'max_depth': 1, 'min_...",0.666667,0.666667,0.666667,0.666667,0.666667,0.666667,0.0,5
5,0.001577,5.6e-05,0.001115,1.4e-05,entropy,1,2,"{'criterion': 'entropy', 'max_depth': 1, 'min_...",0.666667,0.666667,0.666667,0.666667,0.666667,0.666667,0.0,5
6,0.001667,3.7e-05,0.001137,1.2e-05,entropy,2,1,"{'criterion': 'entropy', 'max_depth': 2, 'min_...",0.933333,0.966667,0.9,0.866667,1.0,0.933333,0.04714,1
7,0.001639,6.8e-05,0.001138,5.6e-05,entropy,2,2,"{'criterion': 'entropy', 'max_depth': 2, 'min_...",0.933333,0.966667,0.9,0.866667,1.0,0.933333,0.04714,1


### Look at Best Combination and the Score

In [63]:
clf.best_score_

0.9333333333333332

In [64]:
clf.best_params_

{'criterion': 'gini', 'max_depth': 2, 'min_samples_leaf': 1}

In [60]:
DecisionTreeClassifier().get_params().keys()

dict_keys(['ccp_alpha', 'class_weight', 'criterion', 'max_depth', 'max_features', 'max_leaf_nodes', 'min_impurity_decrease', 'min_samples_leaf', 'min_samples_split', 'min_weight_fraction_leaf', 'random_state', 'splitter'])