In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix 
from sklearn import model_selection
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

In [2]:
def import_data():
    balance_data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-'+
                                'databases/balance-scale/balance-scale.data', 
                                 sep= ',', header = None)
    print("DataSet length:", len(balance_data))
    print("Dataset Shape: ", balance_data.shape)
    print("Dataset: ")
    print(balance_data.head())
    return balance_data

In [3]:
def splitDataSet(balance_data):
    X = balance_data.iloc[:, 1:5]
    Y = balance_data.iloc[:, 0]
    
    X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, 
                                                                        test_size = 0.3, 
                                                                        random_state = 100)
    return X, Y, X_train, X_test, Y_train, Y_test

In [4]:
def train_using_gini_index(X_train, Y_train):
    clf_gini = DecisionTreeClassifier(criterion="gini", 
                                      random_state=100, 
                                      max_depth=3, 
                                      min_samples_leaf=5)
    clf_gini.fit(X_train, Y_train)
    return clf_gini

In [5]:
def prediction(X_test, clf_obj):
    Y_test_pred = clf_obj.predict(X_test)
    print("Predicted Values : ")
    print(Y_test_pred)
    return Y_test_pred

In [6]:
def calculate_accuracy(Y_test, Y_test_pred):
    print("Confusion matrix ", confusion_matrix(Y_test, Y_test_pred))
    print("Accuracy : ", accuracy_score(Y_test, Y_test_pred) * 100)
    print("Report : ", classification_report(y_test, y_pred)) 

In [7]:
def main():
    data = import_data()
    X, Y, X_train, X_test, Y_train, Y_test = splitDataSet(data)
    
    print("Result using Gini Index :")
    clf_gini = train_using_gini_index(X_train, Y_train)
    Y_pred_gini = prediction(X_test, clf_gini)
    calculate_accuracy(X_test, Y_test_pred)

In [8]:
#calling main function
if __name__=="main":
    main()