# Other Classification Exercise - Decision Tree Model



Build and validate a decision tree classification model with the following dataset.

Input dataset: 'iris-data-clean.csv'

Features: 'sepal_length_cm', 'sepal_width_cm', 'petal_length_cm', 'petal_width_cm'

Label: 'class'

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv("iris-data-clean.csv")
df.head()

In [None]:
df_copy = df.copy()

In [None]:
df_copy['class'].value_counts()

### Convert the three classes to categories

In [None]:
categories = {
    'Setosa': 0,
    'Virginica': 1,
    'Versicolor': 2
}

In [None]:
def myfunction(x):
    if x in categories:
        return categories[x]
    else:
        return np.nan
    
df_copy['class'] = df_copy["class"].apply(myfunction)

### Split data

In [None]:
df_copy['class']

In [None]:
from sklearn.model_selection import train_test_split
x = df_copy.drop(['class'], axis=1)
y = df_copy['class']
x_train, x_test, y_train, y_test = train_test_split(x,y,random_state=42)

### Train model

In [None]:
from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier()
dt.fit(x_train, y_train)

### Evaluate the model

In [None]:
from sklearn.metrics import accuracy_score
y_pred = dt.predict(x_test)

In [None]:
accuracy_score(y_test, y_pred)

### Visualise the decision tree

In [None]:
from sklearn import tree
import graphviz
from graphviz import Source

Source(tree.export_graphviz(dt, out_file=None, class_names=True, feature_names=x_train.columns))

### Build a function to build a decision tree

In [None]:
def decTreeScore(crit='gini', maxDepth=None, minSamples=1, minSplit=2):
    dect = DecisionTreeClassifier(criterion=crit, max_depth=maxDepth, min_samples_leaf=minSamples,
                                 min_samples_split=minSplit, random_state=42)
    dect.fit(x_train, y_train)
    print(accuracy_score(y_test, dect.predict(x_test)))
    return


In [None]:
decTreeScore()

In [None]:
decTreeScore(crit='entropy')

#### Use different Maximum depth of tree

In [None]:
for i in np.arange(1,15):
    decTreeScore(maxDepth=i)

#### Use different min split

In [None]:
for i in np.arange(2,15):
    decTreeScore(minSplit=i)
    