# Prediction using Decision Tree Algorithm
**Author: Ishani Kathuria**

Given a dataset with Sepal and Petal lengths and width of flowers, I trained a Decision Tree Algorithm that would then be able to classify the data into the different target species.

In [1]:
# importing the necessary packages
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
pd.set_option('display.max_rows', None)

In [2]:
# importing the dataset as a pandas dataframe
complete_data = pd.read_csv("Iris.csv").drop(columns='Id')
complete_data.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [3]:
species_dict = {'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}
target_names = [i for i in species_dict.keys()]
y = np.array([species_dict[i] for i in complete_data.iloc[:,4].values])

complete_data['SpeciesTrue'] = pd.Series(y)
y = complete_data['SpeciesTrue']

complete_data.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species,SpeciesTrue
0,5.1,3.5,1.4,0.2,Iris-setosa,0
1,4.9,3.0,1.4,0.2,Iris-setosa,0
2,4.7,3.2,1.3,0.2,Iris-setosa,0
3,4.6,3.1,1.5,0.2,Iris-setosa,0
4,5.0,3.6,1.4,0.2,Iris-setosa,0


In [4]:
x = complete_data.iloc[:,:4]
feature_names = [i for i in x.columns]
features.head()

NameError: name 'features' is not defined

In [None]:
target_names, feature_names

## Train Test Split

x contains all the feature values and y contains the target values. These were separated into training and testing datasets.

In [None]:
x.shape, y.shape

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

In [None]:
print('Shape of training features', x_train.shape)
print('Shape of testing features', x_test.shape)
print('Shape of training targets', y_train.shape)
print('Shape of testing targets', y_test.shape)

## Decision Tree Model

### Train and Visualize the model

In [None]:
from sklearn import tree

In [None]:
dtree = tree.DecisionTreeClassifier()
dtree.fit(x_train, y_train)

In [None]:
text_representation = tree.export_text(dtree, feature_names=feature_names)
print(text_representation)

In [None]:
fig = plt.figure(figsize=(15,10))
_ = tree.plot_tree(dtree, feature_names=feature_names,
                   class_names=target_names,
                   filled=True, rounded=True)

In [None]:
from dtreeviz.trees import dtreeviz

In [None]:
viz = dtreeviz(dtree, x_train, y_train,
               target_name="target",
               feature_names=feature_names,
               class_names=target_names,
               fancy=False,
               scale=1.3)
viz

### Test the model

In [None]:
y_pred = dtree.predict(x_test)

In [None]:
pd.DataFrame(y_pred, y_test)

### Evaluate the model

In [None]:
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
report = classification_report(y_test, y_pred)
print('Classification report : \n', report)

conf_mat = confusion_matrix(y_test, y_pred)
fig = px.imshow(conf_mat, labels=dict(x='Predicted Values', y='True Values'),
                 title='Confusion Matrix',
                 x=target_names,
                 y=target_names,
                 color_continuous_scale='Magma')
fig.update_xaxes(side="top")
fig.show()