In [2]:
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier as DTC
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

In [3]:
def train_using_gini(X_train, X_test, y_train):
    
    clf_gini = DTC(criterion = "gini", random_state = 100, max_depth=3, min_samples_leaf=5)
    clf_gini.fit(X_train, y_train)
    return clf_gini

In [4]:
def train_using_entropy(X_train, X_test, y_train):
    
    clf_entropy = DTC(criterion = "entropy", random_state = 100, max_depth = 3, min_samples_leaf = 5)
    clf_entropy.fit(X_train, y_train)
    return clf_entropy

In [5]:
def prediction(X_test, clf_object):
    
    y_pred = clf_object.predict(X_test)
    print("Predicted Values:")
    print(y_pred)
    return y_pred

In [6]:
def cal_accuracy(y_test, y_pred):
    
    print("Confusion Matrix: ", confusion_matrix(y_test,y_pred))
    print("Accuracy: ", accuracy_score(y_test,y_pred)*100)
    print("Report: ", classification_report(y_test, y_pred))

In [7]:
data = pd.read_csv("Iris.csv")
print("Dataset Length: ", len(data))
print("Dataset Shape: ", data.shape)

Dataset Length:  150
Dataset Shape:  (150, 6)


In [8]:
print(data)

      Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm  \
0      1            5.1           3.5            1.4           0.2   
1      2            4.9           3.0            1.4           0.2   
2      3            4.7           3.2            1.3           0.2   
3      4            4.6           3.1            1.5           0.2   
4      5            5.0           3.6            1.4           0.2   
..   ...            ...           ...            ...           ...   
145  146            6.7           3.0            5.2           2.3   
146  147            6.3           2.5            5.0           1.9   
147  148            6.5           3.0            5.2           2.0   
148  149            6.2           3.4            5.4           2.3   
149  150            5.9           3.0            5.1           1.8   

            Species  
0       Iris-setosa  
1       Iris-setosa  
2       Iris-setosa  
3       Iris-setosa  
4       Iris-setosa  
..              ...  
145  

In [13]:
X = data.values[:, 0:4]
Y = data.values[:, 1]
Y= Y.astype('int')

In [14]:
Y

array([5, 4, 4, 4, 5, 5, 4, 5, 4, 4, 5, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5,
       4, 5, 4, 5, 5, 5, 5, 4, 4, 5, 5, 5, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5,
       5, 4, 5, 4, 5, 5, 7, 6, 6, 5, 6, 5, 6, 4, 6, 5, 5, 5, 6, 6, 5, 6,
       5, 5, 6, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 6, 5, 6, 6, 6,
       5, 5, 5, 6, 5, 5, 5, 5, 5, 6, 5, 5, 6, 5, 7, 6, 6, 7, 4, 7, 6, 7,
       6, 6, 6, 5, 5, 6, 6, 7, 7, 6, 6, 5, 7, 6, 6, 7, 6, 6, 6, 7, 7, 7,
       6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 5, 6, 6, 6, 6, 6, 6, 5])

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X,Y, test_size = 0.3, random_state = 100)
clf_gini = train_using_gini(X_train, X_test, y_train)
clf_entropy = train_using_entropy(X_train, X_test, y_train)

print("Results using gini index:")
y_pred_gini = prediction(X_test, clf_gini)
cal_accuracy(y_test, y_pred_gini)

print("Results using Entropy:")
y_pred_entropy = prediction(X_test, clf_entropy)
cal_accuracy(y_test, y_pred_entropy)

Results using gini index:
Predicted Values:
[6 4 7 5 6 7 5 4 6 4 5 6 5 5 6 6 6 6 7 6 7 5 6 4 5 5 6 5 6 6 5 5 6 4 5 6 5
 5 6 5 4 5 7 7 4]
Confusion Matrix:  [[ 7  0  0  0]
 [ 0 16  0  0]
 [ 0  0 16  0]
 [ 0  0  0  6]]
Accuracy:  100.0
Report:                precision    recall  f1-score   support

           4       1.00      1.00      1.00         7
           5       1.00      1.00      1.00        16
           6       1.00      1.00      1.00        16
           7       1.00      1.00      1.00         6

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45

Results using Entropy:
Predicted Values:
[6 4 7 5 6 7 5 4 6 4 5 6 5 5 6 6 6 6 7 6 7 5 6 4 5 5 6 5 6 6 5 5 6 4 5 6 5
 5 6 5 4 5 7 7 4]
Confusion Matrix:  [[ 7  0  0  0]
 [ 0 16  0  0]
 [ 0  0 16  0]
 [ 0  0  0  6]]
Accuracy:  100.0
Report:                precision    recall  f1-score   support

           4       1.00    

In [17]:
print(X)
print(data)

[[1 5.1 3.5 1.4]
 [2 4.9 3.0 1.4]
 [3 4.7 3.2 1.3]
 [4 4.6 3.1 1.5]
 [5 5.0 3.6 1.4]
 [6 5.4 3.9 1.7]
 [7 4.6 3.4 1.4]
 [8 5.0 3.4 1.5]
 [9 4.4 2.9 1.4]
 [10 4.9 3.1 1.5]
 [11 5.4 3.7 1.5]
 [12 4.8 3.4 1.6]
 [13 4.8 3.0 1.4]
 [14 4.3 3.0 1.1]
 [15 5.8 4.0 1.2]
 [16 5.7 4.4 1.5]
 [17 5.4 3.9 1.3]
 [18 5.1 3.5 1.4]
 [19 5.7 3.8 1.7]
 [20 5.1 3.8 1.5]
 [21 5.4 3.4 1.7]
 [22 5.1 3.7 1.5]
 [23 4.6 3.6 1.0]
 [24 5.1 3.3 1.7]
 [25 4.8 3.4 1.9]
 [26 5.0 3.0 1.6]
 [27 5.0 3.4 1.6]
 [28 5.2 3.5 1.5]
 [29 5.2 3.4 1.4]
 [30 4.7 3.2 1.6]
 [31 4.8 3.1 1.6]
 [32 5.4 3.4 1.5]
 [33 5.2 4.1 1.5]
 [34 5.5 4.2 1.4]
 [35 4.9 3.1 1.5]
 [36 5.0 3.2 1.2]
 [37 5.5 3.5 1.3]
 [38 4.9 3.1 1.5]
 [39 4.4 3.0 1.3]
 [40 5.1 3.4 1.5]
 [41 5.0 3.5 1.3]
 [42 4.5 2.3 1.3]
 [43 4.4 3.2 1.3]
 [44 5.0 3.5 1.6]
 [45 5.1 3.8 1.9]
 [46 4.8 3.0 1.4]
 [47 5.1 3.8 1.6]
 [48 4.6 3.2 1.4]
 [49 5.3 3.7 1.5]
 [50 5.0 3.3 1.4]
 [51 7.0 3.2 4.7]
 [52 6.4 3.2 4.5]
 [53 6.9 3.1 4.9]
 [54 5.5 2.3 4.0]
 [55 6.5 2.8 4.6]
 [56 5.7 2.8 4.5]
 