## Decision Tree with different hyperparameters

In this example, we build different decision trees with different hyperparameters.  Hyperparameters are parameter values used to configure the model.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

### Load dataset
Let's use some large financial data from UCI 


In [None]:
df_lg = pd.read_csv('Wholesale.csv')
df_lg.head()

In [None]:
X2_train, X2_test, y2_train, y2_test = train_test_split(df_lg.iloc[:,2:], df_lg['Channel'], random_state =42)

In [None]:
X2_train.head()

### Build a function to build a decision tree
This function accepts different hyperparamters and print the accuracy scores

In [None]:
def decTreeScore2(crit = 'gini',  maxDepth = None, minSamples = 1, minSplit = 2):
    dect = DecisionTreeClassifier(criterion = crit, max_depth = maxDepth, min_samples_leaf = minSamples, 
                                 min_samples_split = minSplit, random_state= 42)
    dect.fit(X2_train, y2_train)
    accuracy = accuracy_score(y2_test, dect.predict(X2_test))
    print(accuracy)
    return accuracy

In [None]:
decTreeScore2()
# the accuracy score for our model with default hyperparams is about 0.88

In [None]:
decTreeScore2(crit = 'entropy')
# if we use entropy to calculate infomation gain instead of gini score, the accuracy drops

In [None]:
# use different maximum depth of the tree
for i in np.arange(1, 15, 1):
    decTreeScore2(maxDepth = i)
    
    
# from the result we see that when maximum depth allowed is 2 or 5, the accuracy score is highest

In [None]:
for i in np.arange(1, 15, 1):
    decTreeScore2(minSamples = i)
    
# when the minimum samples allowed in a leaf is  greater than 8, the accuracy score is the highest

In [None]:
for i in np.arange(2, 15,1):
    decTreeScore2(minSplit = i)
    
# the value of minimum samples required to split doesn't really affect accuracy much