<h2> 1. Libraries </h2>

In [1]:
import numpy as np 
import pandas as pd 
from sklearn.metrics import confusion_matrix 
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier 
from sklearn.metrics import accuracy_score 
from sklearn.metrics import classification_report 

<h2> 2. Functions </h2>

In [38]:
def Import_Iris():
    url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
    columns = ['sepal length','sepal width','petal length','petal width','class']
    Df_Iris = pd.read_csv(url, sep= ',', header = None)
    Df_Iris.columns = columns
     # Printing the dataswet shape 
    print ("Dataset Lenght: ", len(Df_Iris)) 
    print ("Dataset Shape: ", Df_Iris.shape) 
      
    # Printing the dataset obseravtions 
    print ("Dataset:\n ",Df_Iris.head()) 
    return Df_Iris

In [47]:
def Pre_Process(df):
    ## Convert class column to category
    df["class"] = df["class"].astype('category')
    ## Label Encoding
    df["class_cat"] = Df_Iris["class"].cat.codes
    ## Reorder Df columns
    cols = ['sepal length','sepal width','petal length','petal width','class','class_cat']
    df = df[cols]
    ## Print new order
    print("Dataset with columns reordered: \n", df.head())
    return df    

In [40]:
# Function to split the dataset 
def Split_Dataset(Df): 
  
    # Seperating the target variable 
    X = Df.values[:, 0:4] 
    y = Df.values[:, -1] 
  
    # Spliting the dataset into train and test 
    X_train, X_test, y_train, y_test = train_test_split(  
    X, y, test_size = 0.3, random_state = 100) 
    
    print("Train dataset size :", len(X_train))
    print("Test dataset size :", len(X_test))
    return X_train, X_test, y_train, y_test 

In [41]:
# Function to perform training with giniIndex. 
def Train_Gini(X_train, y_train): 
  
    # Creating the classifier object 
    clf_gini = DecisionTreeClassifier(criterion = "gini", 
            random_state = 100,max_depth=3, min_samples_leaf=5) 
  
    # Performing training 
    clf_gini.fit(X_train, y_train) 
    return clf_gini 

In [42]:
# Function to perform training with entropy. 
def Train_Entropy(X_train, y_train): 
  
    # Decision tree with entropy 
    clf_entropy = DecisionTreeClassifier( 
            criterion = "entropy", random_state = 100, 
            max_depth = 3, min_samples_leaf = 5) 
  
    # Performing training 
    clf_entropy.fit(X_train, y_train) 
    return clf_entropy 

In [43]:
# Function to make predictions 
def prediction(X_test, clf_object): 
  
    # Predicton on test with giniIndex 
    y_pred = clf_object.predict(X_test) 
    print("Predicted values:") 
    print(y_pred) 
    return y_pred 

In [44]:
# Function to calculate accuracy 
def cal_accuracy(y_test, y_pred): 
      
    print("Confusion Matrix: ", 
        confusion_matrix(y_test, y_pred)) 
      
    print ("Accuracy : ", 
    accuracy_score(y_test,y_pred)*100) 
      
    print("Report : ", 
    classification_report(y_test, y_pred)) 

In [None]:
def main():
    Df_Iris = Import_Iris()
    Df_Iris = Pre_Process(Df_Iris)
    X_train,X_test, y_train, y_test = Split_Dataset(Df_Iris)
    clf_fini = Train_Gini(X_train,y_train.astype('int'))
    clf_entropy = Train_Entropy(X_train,y_train.astype('int'))
     # Operational Phase 
    print("Results Using Gini Index:") 
      
    # Prediction using gini 
    y_pred_gini = prediction(X_test, clf_gini) 
    cal_accuracy(y_test.astype('int'), y_pred_gini.astype('int')) 
      
    print("Results Using Entropy:") 
    # Prediction using entropy 
    y_pred_entropy = prediction(X_test, clf_entropy) 
    cal_accuracy(y_test.astype('int'), y_pred_entropy.astype('int')) 

<h2> 3.Main Function </h2>

In [54]:
# Calling main function 
if __name__=="__main__": 
    main() 

Dataset Lenght:  150
Dataset Shape:  (150, 5)
Dataset:
     sepal length  sepal width  petal length  petal width        class
0           5.1          3.5           1.4          0.2  Iris-setosa
1           4.9          3.0           1.4          0.2  Iris-setosa
2           4.7          3.2           1.3          0.2  Iris-setosa
3           4.6          3.1           1.5          0.2  Iris-setosa
4           5.0          3.6           1.4          0.2  Iris-setosa
Dataset with columns reordered: 
    sepal length  sepal width  petal length  petal width        class  \
0           5.1          3.5           1.4          0.2  Iris-setosa   
1           4.9          3.0           1.4          0.2  Iris-setosa   
2           4.7          3.2           1.3          0.2  Iris-setosa   
3           4.6          3.1           1.5          0.2  Iris-setosa   
4           5.0          3.6           1.4          0.2  Iris-setosa   

   class_cat  
0          0  
1          0  
2          0  
3 