# Implementing Decision Trees with inbuilt  Classifier

Decision Trees split the data on a certain feature. In this example, the feature that has a lower value of gini index (a metric) is chosen first to split upon. The gini index value is between 0 and 1 and is calculated as (1-summation of pi^2 for every class). Lower the gini index, lower the impurity. 

In [1]:
#Necessary imports
from sklearn import datasets
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_graphviz
from sklearn.model_selection import train_test_split

In [2]:
#Loading and splitting the dataset
iris = datasets.load_iris()
x_train,x_test,y_train,y_test = train_test_split(iris.data,iris.target,random_state=1)
print(x_train.shape)

(112, 4)


In [3]:
#Creating the classifier and fitting the training data
clf = DecisionTreeClassifier()
clf.fit(x_train,y_train)

DecisionTreeClassifier()

In [6]:
#Pydotplus library helps us plotting the data like a tree
#Graphviz is used to represent structural information such as diagrams of abstract graphs and networks
import pydotplus
dot_data = export_graphviz(clf,out_file=None,feature_names = iris.feature_names,class_names = iris.target_names)
graph = pydotplus.graph_from_dot_data(dot_data)
graph.write_pdf("iris.pdf")

True

In [7]:
#Importing some metrics
from sklearn.metrics import confusion_matrix

In [8]:
#Predicting values for training and testing data
y_train_pred = clf.predict(x_train)
y_test_pred = clf.predict(x_test)

In [9]:
#Printing the confusion matrix for training data
confusion_matrix(y_train,y_train_pred)

array([[37,  0,  0],
       [ 0, 34,  0],
       [ 0,  0, 41]])

In [10]:
#Printing the confusion matrix for testing data
confusion_matrix(y_test,y_test_pred)

array([[13,  0,  0],
       [ 0, 15,  1],
       [ 0,  0,  9]])

In [11]:
#Fitting the classifier with OR gate information 
or_x_train=[[0,0],
          [0,1],
          [1,0],
          [1,1]]
or_y_train = [0,1,1,1]
clf.fit(or_x_train,or_y_train)

DecisionTreeClassifier()

In [12]:
#Plotting the data in a tree format to get more clarity
dot_data = export_graphviz(clf,out_file=None)
graph = pydotplus.graph_from_dot_data(dot_data)
graph.write_pdf("OR.pdf")

True