In [1]:
# import needed library
from sklearn.datasets import load_iris
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import math
import numpy as np
import random

# train test split 
from sklearn.model_selection import train_test_split

# Kfold
from sklearn.model_selection import KFold

# regex
import re
 

In [2]:
#load iris dataset
data_iris = load_iris()
iris_X, iris_y = load_iris(return_X_y=True)
feature_iris = data_iris['feature_names']

In [3]:
#transform iris into dataframe
iris_X=pd.DataFrame(iris_X)
iris_y=pd.DataFrame(iris_y)

In [4]:
#create index so be merge
iris_X=iris_X.reset_index()
iris_y=iris_y.reset_index()

In [5]:
iris_X.rename(columns = {0:feature_iris[0],1:feature_iris[1],2:feature_iris[2],3:feature_iris[3]}, inplace = True)

In [6]:
iris_y.rename(columns = {0:'target'}, inplace = True) 

In [7]:
#merge dataset iris
iris=iris_X.merge(iris_y)

In [8]:
#drop index
iris.drop("index",axis=1,inplace=True)

In [9]:
def entropy(parsed_data, target_attribute):
    parsed_value_target = {}
    total_value_target = 0
  
    for i in parsed_data[target_attribute]:
        if i is not None:
            if i not in parsed_value_target:
                parsed_value_target[i] = 1
            else:
                parsed_value_target[i] += 1

            total_value_target += 1
  
    log_result = 0

    for i in parsed_value_target:
        log_result += float(parsed_value_target[i])/total_value_target * math.log((float(parsed_value_target[i])/total_value_target), 2)
  
    return -1 * log_result

In [10]:
# hasn't handle after universal entropy
def information_gain(data, gain_attribute, target_attribute):
    gain_result = 0
    attribute_entropy_result = 0
    parsed_attribute_count = {}
    total_attribute_count = 0
    
    for i in data[gain_attribute]:
        if i is not None:
            if i not in parsed_attribute_count:
                parsed_attribute_count[i] = 1
            else:
                parsed_attribute_count[i] += 1
            
            total_attribute_count += 1
    
    for i in parsed_attribute_count:
        parsed_data = data.loc[data[gain_attribute]==i]
        attribute_entropy_result += float(parsed_attribute_count[i])/total_attribute_count * entropy(parsed_data, target_attribute)    

    gain_result += entropy(data,target_attribute) + (-1 * attribute_entropy_result)
    return gain_result

In [11]:
def split_information(data, gain_attribute):
    res = entropy(data, gain_attribute)
    if(res==0):
        res=0.00000001
    return res

In [12]:
def gain_ratio(data, gain_attribute, target_attribute):
    res = information_gain(data, gain_attribute, target_attribute) / split_information(data, gain_attribute)
    return res

In [13]:
def gain_ratio_continous(data, gain_attribute, target_attribute):
    res = information_gain_continous(data, gain_attribute, target_attribute)[0] / split_information(data, gain_attribute)
    return res,information_gain_continous(data, gain_attribute, target_attribute)[1]

In [14]:
def best_attribute(data,target_attribute,is_IG):
    gain_attribute = {
        'value': 0,
        'name': ''
    }
    
    
    for i in data.columns:
        if (i != target_attribute):
            if is_IG:
                if information_gain(data, i, target_attribute) > gain_attribute['value']:
                    gain_attribute['value'] = information_gain(data, i, target_attribute)
                    gain_attribute['name'] = i
            else:
                if gain_ratio(data, i, target_attribute) > gain_attribute['value']:
                    gain_attribute['value'] = gain_ratio(data, i, target_attribute)
                    gain_attribute['name'] = i
                

    return gain_attribute['name']

In [15]:
import math

class Node:
    def __init__(self, attribute=None, label=None, vertex=None):
        self.attribute = attribute
        self.label = label
        self.vertex = vertex
        self.children = {}
        self.most_common_label = None
    
    def set_most_common_label(self, most_common_label):
        self.most_common_label = most_common_label
        
    def get_most_common_label(self):
        return self.most_common_label
        
    def setAttribute(self, attribute):
        self.attribute = attribute

    def setLabel(self, label):
        self.label = label
        
    def setVertex(self, vertex):
        self.vertex = vertex
  
    def addChildren(self, attributeValue, node):
        self.children[attributeValue] = node
    
    def getChildren(self):
        return self.children
    
    def getLabel(self):
        return self.label
    
    def getVertex(self):
        return self.vertex

In [16]:
def get_most_common_label(data, target_attribute):
    parsed_value_target = {}
  
    for i in data[target_attribute]:
        if i is not None:
            if i not in parsed_value_target:
                parsed_value_target[i] = 1
            else:
                parsed_value_target[i] += 1

    most_common = {
        'value': 0,
        'name': ''
    }
    
    for i in parsed_value_target:
        if parsed_value_target[i] > most_common['value']:
            most_common['value'] = parsed_value_target[i]
            most_common['name'] = i
    
    return most_common['name']

In [17]:
def most_common_label_target(data,target_attribute):
    most_comm = None
    occ = 0;
    for i in data[target_attribute].unique():
        if data[data[target_attribute] == i].shape[0] >  occ:
            most_comm = i
            occ = data[data[target_attribute] == i].shape[0]
    return most_comm

In [18]:
def id3(data, target_attribute, is_IG):
    node = Node()
    if data[target_attribute].nunique()==1:
        node.setLabel(data[target_attribute].unique()[0])
        return node
    
    elif len(data.columns)==1:
        node.setLabel(get_most_common_label(data, target_attribute))
        return node
    
    else:
        best_attribute_ = best_attribute(data,target_attribute,is_IG)
        node.setAttribute(best_attribute_)
        for i in data[best_attribute_].unique():
            node.addChildren(i,id3(data.loc[data[best_attribute_]==i],target_attribute,is_IG))
            node.set_most_common_label(most_common_label_target(data,target_attribute))        
            
    return node

In [19]:
def print_tree(node,depth):
    if node.label is not None: 
        print("    "*(depth+1) +str(node.label))
    else:
        print("    "*depth + "["+ node.attribute +"]")
        for i in node.children:
            print("----"*(depth+1) +str(i))
            print_tree(node.children[i],depth+1)        

In [20]:
def copy_tree(node):
    temp_node = Node()
    if node.label is not None: 
        temp_node.setLabel(node.label)
    else:
        temp_node.setAttribute(node.attribute)
        for i in node.children:
            temp= Node()
            temp= node.children[i]
            temp_node.addChildren(i, temp)
    return temp_node

In [21]:
def check_tree(node,data,index,result, target_attribute):
    if node.label is not None: 
        result.append(node.getLabel())
    else:
        if data.loc[index, node.attribute] is None:
            result.append(node.get_most_common_label())
        for i in node.children:
            if i==data.loc[index,node.attribute]:
                check_tree(node.children[i],data,index,result, target_attribute)

In [22]:
def pred(data,model,target_attribute):
    result = []
    for i in range(len(data)):
        check_tree(model,data[i:i+1],i,result, target_attribute)
        data = {target_attribute:result}
    return pd.DataFrame(data)
    

In [23]:
# hasn't handle after universal entropy
def information_gain_continous(data, gain_attribute, target_attribute):
    gain_result = 0
    save_boundary = -1
    min_entropy = 999999 
    data = data.sort_values(gain_attribute)
    data=data.reset_index().drop('index',axis=1)
    length_data = len(data)
    for i in range(length_data):
        if (i!=length_data-1):
            if (data.loc[i,target_attribute] != data.loc[i+1,target_attribute]):
                temp_boundary = (data.loc[i,gain_attribute] + data.loc[i+1,gain_attribute])/2
                parsed_data_upper = data.loc[data[gain_attribute]>=temp_boundary]
                parsed_data_lower = data.loc[data[gain_attribute]<temp_boundary]
                len_parsed = len(parsed_data_upper)               
                temp_entropy = float(len_parsed)/length_data * entropy(parsed_data_upper, target_attribute) + float((length_data-len_parsed))/length_data * entropy(parsed_data_lower, target_attribute)     
                if temp_entropy < min_entropy:
                    #print(gain_attribute, temp_boundary, temp_entropy)
                    min_entropy = temp_entropy 
                    save_boundary = temp_boundary

    gain_result += entropy(data,target_attribute) + (-1 * min_entropy)
    return gain_result,save_boundary

In [24]:
information_gain_continous(iris, 'petal length (cm)', 'target')

(0.9182958340544894, 2.45)

In [25]:
def best_attribute_c45(data,target_attribute,is_IG):
    gain_attribute = {
        'value': 0,
        'name': '',
        'boundary': -99999999
    }
    
    
    for i in data.columns:
        if (i != target_attribute):
            if is_IG:
                if data[i].dtypes in [pd.np.dtype('float64'), pd.np.dtype('float32')]:
                    ig = information_gain_continous(data, i, target_attribute) 
                    if ig[0] > gain_attribute['value']:
                            gain_attribute['value'] = ig[0]
                            gain_attribute['name'] = i
                            gain_attribute['boundary'] = ig[1]
                else:
                    if information_gain(data, i, target_attribute) > gain_attribute['value']:
                            gain_attribute['value'] = information_gain(data, i, target_attribute)
                            gain_attribute['name'] = i
            else:
                if data[i].dtypes in [pd.np.dtype('float64'), pd.np.dtype('float32')]:
                    ig = gain_ratio_continous(data, i, target_attribute) 
                    if ig[0] > gain_attribute['value']:
                            gain_attribute['value'] = ig[0]
                            gain_attribute['name'] = i
                            gain_attribute['boundary'] = ig[1]
                else:
                    if gain_ratio(data, i, target_attribute) > gain_attribute['value']:
                            gain_attribute['value'] = gain_ratio(data, i, target_attribute)
                            gain_attribute['name'] = i

    return gain_attribute['name'],round(gain_attribute['boundary'],2)

In [26]:
best_attribute_c45(iris,'target',False)

('petal width (cm)', 0.8)

In [27]:
def c45(data, target_attribute,is_IG):
    node = Node()
    if data[target_attribute].nunique()==1:
        node.setLabel(data[target_attribute].unique()[0])
        return node
    
    elif len(data.columns)==1:
        node.setLabel(get_most_common_label(data, target_attribute))
        return node
    
    else:
        best_attribute_,bound  = best_attribute_c45(data,target_attribute,is_IG)
        #print(best_attribute_)
        node.setAttribute(best_attribute_)
        node.set_most_common_label(most_common_label_target(data,target_attribute))
        if bound==-99999999:
            for i in data[best_attribute_].unique():
                node.addChildren(i,c45(data.loc[data[best_attribute_]==i],target_attribute,is_IG))
        else:
            node.addChildren('>='+str(bound),c45(data.loc[data[best_attribute_]>=bound],target_attribute,is_IG))
            node.addChildren('<'+str(bound),c45(data.loc[data[best_attribute_]<bound],target_attribute,is_IG))
    return node

In [28]:
def c45_prun(data, target_attribute,is_IG,vertex_=None):
    node = Node(vertex = vertex_)
    if data[target_attribute].nunique()==1:
        node.setLabel(data[target_attribute].unique()[0])
        return node
     
    elif len(data.columns)==1:
        node.setLabel(get_most_common_label(data, target_attribute))
        return node
    
    else:
        best_attribute_,bound  = best_attribute_c45(data,target_attribute,is_IG)
        #print(best_attribute_)
        node.setAttribute(best_attribute_)
        node.set_most_common_label(most_common_label_target(data,target_attribute))
        if bound==-99999999:
            for i in data[best_attribute_].unique():
                node.addChildren(i,c45_prun(data.loc[data[best_attribute_]==i],target_attribute,is_IG,i))
        else:
            node.addChildren('>='+str(bound),c45_prun(data.loc[data[best_attribute_]>=bound],target_attribute,is_IG,'>='+str(bound)))
            node.addChildren('<'+str(bound),c45_prun(data.loc[data[best_attribute_]<bound],target_attribute,is_IG,'<'+str(bound)))
    return node

In [29]:
c45(iris, "target",True)

<__main__.Node at 0x7f2d0bddc198>

In [30]:
print_tree(c45(iris, "target",True),0)

[petal length (cm)]
---->=2.45
    [petal width (cm)]
-------->=1.75
        [sepal width (cm)]
------------>=3.15
            [sepal length (cm)]
---------------->=6.05
                    2
----------------<6.05
                    1
------------<3.15
                2
--------<1.75
        [petal length (cm)]
------------>=4.95
            [petal width (cm)]
---------------->=1.55
                [sepal length (cm)]
-------------------->=6.95
                        2
--------------------<6.95
                        1
----------------<1.55
                    2
------------<4.95
            [petal width (cm)]
---------------->=1.65
                    2
----------------<1.65
                    1
----<2.45
        0


In [31]:
def check_tree_c45(node,data,index,result, target_attribute):
    if node.label is not None: 
        result.append(node.getLabel())
    else:
        if data.loc[index, node.attribute] is None:
            result.append(node.get_most_common_label())
        else:
            for i in node.children:
                #print(i)
                if i[0]=='<':
                    #print(i,2)
                    bound=float(i[1:])
                    if bound>data.loc[index,node.attribute]:
                        #print(data.loc[index,node.attribute])
                        check_tree_c45(node.children[i],data,index,result, target_attribute)
                elif i[0]=='>':
                    bound=float(i[2:])
                    #print(i,1)
                    if bound<=data.loc[index,node.attribute]:
                        #print(data.loc[index,node.attribute])
                        check_tree_c45(node.children[i],data,index,result, target_attribute)
                else:
                    if i==data.loc[index,node.attribute]:
                        check_tree(node.children[i],data,index,result, target_attribute)
                    

In [32]:
def pred_c45(data,model,target_attribute):
    result = []
    for i in range(len(data)):
        check_tree_c45(model,data[i:i+1],i,result, target_attribute)
    data = { target_attribute: data[target_attribute]
            ,'prediction':result}
    return pd.DataFrame(data)
    

In [33]:
p=iris

In [34]:
p=p.drop('target',axis=1)

In [35]:
p=p.reset_index().drop('index',axis=1)

In [36]:
p['sepal width (cm)'] = None

In [37]:
def accuracy(pred,data):
    cnt = 0
    for i in range(len(pred)):
        if pred.loc[i] == data.loc[i]:
            cnt+=1
    return cnt*100/len(pred)

In [38]:
def get_data_validate(data):
    data_column = data.columns
    data_10 = pd.DataFrame(columns=data_column)
    data_90 = pd.DataFrame(columns=data_column)
    #print(data_column)
    count_10 = 0
    count_90 = 0
    
    for i in range(data.shape[0]):
        if(i%10 == 1):
            # Pass the row elements as key value pairs to append() function 
            data_10 = data_10.append(data.loc[[i]] , ignore_index=True)
        else:
            data_90 = data_90.append(data.loc[[i]] , ignore_index=True)
    return data_10, data_90

In [39]:
def prune_tree(node, attribute, vertex):
    if(node.label is not None):
        return node,0
    elif(node.attribute == attribute and node.vertex == vertex):
        node.children = {}
        node.attribute = None
        node.label = node.most_common_label
        
        return node,1
    else:
        for i in node.children:
            a,b = prune_tree(node.children[i], attribute, vertex)
            
        return node,b

In [40]:
def post_pruning(data, node, current_node, children_node, target_attribute):
    if children_node == 0:
        return node
    else:
        for i in current_node.children:
            if current_node.vertex is None: 
                #print(i,1)
                post_pruning(data, node, current_node.children[i], children_node-1, target_attribute)
            else:
                if current_node.attribute is not None:
                    data10, data90 = get_data_validate(data)
                    #print(current_node.attribute,2)
                    save = copy_tree(current_node)
                    temp_node,c = prune_tree(node, current_node.attribute, current_node.vertex)
                    train=data90.drop(target_attribute,axis=1)
                    #print_tree(node,0)
                    temp_node_pred = pred_c45(train, node, target_attribute)
                    #print(temp_node_pred)
                    temp_node_accuracy = accuracy(temp_node_pred[target_attribute], data90[target_attribute])
                    #print(temp_node_accuracy)
                    if temp_node_accuracy == 100:
                        post_pruning(data, temp_node, current_node, children_node, target_attribute)
                    else :
                        
                        node.addChildren(current_node.vertex, save)
                        #print_tree(node,0)


                        post_pruning(data, node, save.children[i], children_node, target_attribute)
        return node

### ANN

In [41]:
def sigmoid(x):
    return 1/(1+math.exp(-x))

In [42]:
def sign(x):
    if (x>0.5):
        return 1
    else:
        return 0

In [43]:
def count_error(target, output):
    return 0.5*(target-output)*(target-output)

In [44]:
class Neuron:
    def __init__(self, out=None, w=None, is_used=None, error=None):
        self.out = out
        self.w = []
        self.is_used = is_used
        self.error = 0
        self.deltaW = []
        
    def set_out(self, out):
        self.out = out
        
    def get_out(self):
        return self.out
    
    def add_deltaW(self, value):
        self.deltaW.append(value)
        
    def get_deltaW(self, index):
        return self.deltaW[index]
    
    def get_arrdW(self):
        return self.deltaW
    
    def set_deltaW(self, index, value):
        self.deltaW[index] = value
        
    def add_w(self, value):
        self.w.append(value)
        
    def set_w(self, index, value):
        self.w[index] = value
    
    def get_w(self, index):
        return self.w[index]
    
    def get_arrW(self):
        return self.w
    
    def set_is_used(self, is_used):
        self.is_used = is_used
        
    def get_is_used(self):
        return self.is_used
    
    def set_error(self, error):
        self.error = error
    
    def get_error(self):
        return self.error

In [45]:
x = [Neuron() for i in range (5)]
y = [x for i in range(5)]

In [46]:
def LenCol(arr):
    return len(arr)
def LenRow(inp,hid,out):
    return max(inp,max(hid)+1,out)
def MakeMatrix(row,col):
    return [[Neuron() for i in range (col)] for j in range(row)]

In [47]:
def printMatrixMLP(matrix):
    for i in range(len(matrix)):
        temp_matrix_out = []
        for j in range(len(matrix[i])):
            temp_matrix_out.append(matrix[i][j].get_arrW())
        print(temp_matrix_out)

In [48]:
def convTarget(output,unique):
    res = []
    for i in range(unique):
        if (i!=output):
            res.append(0)
        else:
            res.append(1)
    return res
    

In [49]:
def resetDeltaWeight(mat,layer):
    for i in range(len(layer)-1):
        for j in range(layer[i]):
            for k in range(layer[i+1]):
                mat[j][i].set_deltaW(k,0)

In [50]:
def initW(mat,layer):
    for i in range(len(layer)-1):
        for j in range(layer[i]):
            for k in range(layer[i+1]):
                mat[j][i].add_w(np.random.uniform(-1,1)) 
                mat[j][i].add_deltaW(0)

In [51]:
def feedforward(mat,layer):
    for i in range(1,len(layer)):
        if (i!=len(layer)-1):
            mat[layer[i]][i].set_out(1)
        for j in range(layer[i]):
            net = 0;
            for k in range(layer[i-1]):
                #print(k,i-1,j)
                net = net + mat[k][i-1].get_out()*mat[k][i-1].get_w(j)
#             if (i!=len(layer)-1):
            mat[j][i].set_out(sigmoid(net))
#             else:
#                 mat[j][i].set_out(sign(sigmoid(net)))

In [52]:
def get_error_total(matrix, n_output, target_arr):
    output_col = len(matrix[0])-1
    total_error = 0
    for i in range(n_output):
        total_error += 0.5*(target_arr[i] - matrix[i][output_col].get_out())**2
    
    return total_error

In [53]:
def set_output_error(matrix, row, col, target_value):
    output_value = matrix[row][col].get_out()
    #print(output_value, target_value)
#     print(output_value * (1 - output_value) * (target_value - output_value))
    matrix[row][col].set_error(output_value * (1 - output_value) * (target_value - output_value))

In [54]:
def set_hidden_error(matrix, row, col):
    output_value = matrix[row][col].get_out()
    delta_weight_error = 0
    for i in range(len(matrix[row][col].w)):
        delta_weight_error += matrix[row][col].get_w(i) * matrix[i][col+1].get_error()
    matrix[row][col].set_error(output_value * (1 - output_value) * delta_weight_error)

In [55]:
def update_delta_weight(matrix, row, col, idx , learning_rate):
    #print(matrix[i][col+1].get_error(),matrix[row][col].get_out(),row,col)
    #print(row,col+1)
    delta_weight = learning_rate * matrix[row][col].get_error() * matrix[idx][col-1].get_out()
    #print(delta_weight)
    
    current_weight = matrix[idx][col-1].get_deltaW(row)
    matrix[idx][col-1].set_deltaW(row, current_weight + delta_weight)
    #print(idx,col-1,row,current_weight + delta_weight)

In [56]:
def update_weight(mat,layer):
    for i in range(len(layer)-1):
        for j in range(layer[i]):
            for k in range(layer[i+1]):
                curr_weight = mat[j][i].get_w(k)
                delta_weight = mat[j][i].get_deltaW(k)
                mat[j][i].set_w(k,curr_weight + delta_weight) 

In [57]:
def backpropagation(matrix, layer, learning_rate, target_arr):
    for i in range(len(layer)-1, 0, -1):
        if i == len(layer)-1:
            for j in range(layer[i]):
                set_output_error(matrix, j, i, target_arr[j])
        else:
            for j in range(layer[i]):
                set_hidden_error(matrix, j, i)
        for j in range(layer[i-1]):
            for k in range(layer[i]):
                update_delta_weight(matrix, k, i, j, learning_rate)

In [58]:
hidden_node = [3,4]
def MLP(data,target, hidden_node, epochs, learning_rate):
    output_node = data[target].nunique()
    input_node = len(data.columns)
    hidden_node.insert(0,input_node)
    hidden_node.append(output_node)
    layer_node = hidden_node
    mlp = MakeMatrix(LenRow(input_node,hidden_node,output_node),LenCol(hidden_node))
    initW(mlp,layer_node)
    train = data.drop(target,axis=1)
    batch = len(data)/10;
    for k in range(epochs):
        error = 0
        for i in range(int(batch)):
            for j in range(10):
                for index,col in enumerate(train.columns):
                    mlp[index][0].set_out(data[col][j+i*10])
                mlp[input_node-1][0].set_out(1)
                feedforward(mlp,layer_node)
                backpropagation(mlp, layer_node, learning_rate, convTarget(data[target][j+i*10],output_node))
                error += get_error_total(mlp, len(convTarget(data[target][j+i*10],output_node)), convTarget(data[target][j+i*10],output_node))
            update_weight(mlp,layer_node)
            resetDeltaWeight(mlp,layer_node)
        print(error)
    print()
    print()
    printMatrixMLP(mlp)

    return mlp,layer_node

In [59]:
def predMyMLP(data, target, model, output_layer):
    out = []
    mlp = model[0]
    for i in range(0,data.shape[0],1):
        for index,col in enumerate(data.columns):
            mlp[index][0].set_out(data[col][i])
        mlp[len(data.columns)-1][0].set_out(1)
        feedforward(mlp,model[1])
        output_col = len(mlp[0])-1
        #print(output_col)
        mx=0
        res=-1
        for j in range(output_layer):
            # print(mlp[j][output_col].get_out())
            if mlp[j][output_col].get_out()>mx:
                mx = mlp[j][output_col].get_out()
                res = j
        # print()
        out.append(res)
    data = { target: data[target],
             'prediction':out}
    return pd.DataFrame(data)


# Testing

In [60]:
class_list = iris['target'].unique()

# Create confusion matrix from testing result of a model
#
# Input:
#    pandas.DataFrame   prediction_data
#    String             actual_column
#    String             prediction_column
#    Array              class_list
# Output:
#    pandas.DataFrame   confusion_matrix
#
def confusion_matrix(prediction_data, actual_column, prediction_column, class_list):
    actual_data = pd.Categorical(prediction_data[actual_column], categories=class_list)
    prediction_data = pd.Categorical(prediction_data[prediction_column], categories=class_list)
    confusion_matrix = pd.crosstab(actual_data, prediction_data, rownames=['Actual'], colnames=['Predicted'], dropna=False)
    return confusion_matrix

In [61]:
# Get accuracy from testing result of a model
#
# Input:
#    pandas.DataFrame   prediction_data
#    String             actual_column
#    String             prediction_column
# Output:
#    float              accuracy
#
def testing_accuracy(prediction_data, actual_column, prediction_column):
    count = 0
    for index, row in prediction_data.iterrows():
        if(row[actual_column] == row[prediction_column]):
            count = count + 1
    return round(count/prediction_data.shape[0],2)

In [62]:
def testing_precision(confusion_matrix):
    conf_matrix = confusion_matrix.to_numpy()
    all_precisions = []
    
    for idx_col in range(len(conf_matrix[0])):
        true_positive = conf_matrix[idx_col][idx_col]
        false_positive = 0
        
        for idx_row in range(len(conf_matrix)):
            if idx_col != idx_row:
                false_positive += conf_matrix[idx_row][idx_col]
                
        temp_precision = true_positive / (true_positive + false_positive)
        
        all_precisions.append(temp_precision)
    
    return sum(all_precisions)/len(all_precisions)

In [63]:
def testing_recall(confusion_matrix):
    conf_matrix = confusion_matrix.to_numpy()
    all_recalls = []
    
    for idx_row in range(len(conf_matrix)):
        true_positive = conf_matrix[idx_row][idx_row]
        false_negative = 0
        
        for idx_col in range(len(conf_matrix[0])):
            if idx_col != idx_row:
                false_negative += conf_matrix[idx_row][idx_col]
                
        temp_recall = true_positive / (true_positive + false_negative)

        all_recalls.append(temp_recall)
    
    return sum(all_recalls)/len(all_recalls)

In [64]:
def testing_f1(precision, recall):
    
    return 2*precision*recall/(precision+recall)

## Train Test Split

In [65]:
# Drop index columns
iris_X.drop('index',axis=1,inplace=True);
iris_y.drop('index',axis=1,inplace=True);

In [66]:
# spliting data
X_train, X_test, y_train, y_test = train_test_split(iris_X, iris_y, test_size=0.1, random_state=42)

In [67]:
# merge data from train test split
# output is train and testing data
def mergeData(X_train, X_test, y_train, y_test):
    X_train = X_train.reset_index()
    y_train = y_train.reset_index()
    X_test = X_test.reset_index()
    y_test = y_test.reset_index()
    trainData = X_train.merge(y_train)
    testData = X_test.merge(y_test)
    trainData.drop("index",axis=1,inplace=True)
    testData.drop("index",axis=1,inplace=True)
    return trainData,testData

In [68]:
trainData,testData = mergeData(X_train, X_test, y_train, y_test)

## DTL

In [69]:
# training model
model = c45(trainData, "target",True)
# predict test
prediction_dtl = pred_c45(testData, model, "target")
print(prediction_dtl)

    target  prediction
0        1           1
1        0           0
2        2           2
3        1           1
4        1           1
5        0           0
6        1           1
7        2           2
8        1           1
9        1           1
10       2           2
11       0           0
12       0           0
13       0           0
14       0           0


In [70]:
print(confusion_matrix(prediction_dtl,'target','prediction', class_list))

Predicted  0  1  2
Actual            
0          6  0  0
1          0  6  0
2          0  0  3


In [71]:
testing_accuracy(prediction_dtl,'target','prediction')

1.0

In [72]:
conf_matrix_dtl = confusion_matrix(prediction_dtl,'target','prediction', class_list)

In [73]:
print(testing_precision(conf_matrix_dtl))
precision_dtl = testing_precision(conf_matrix_dtl)

1.0


In [74]:
print(testing_recall(conf_matrix_dtl))
recall_dtl = testing_recall(conf_matrix_dtl)

1.0


In [75]:
print(testing_f1(precision_dtl, recall_dtl))

1.0


## ANN

In [76]:
# training model
hidden_node = [4,4,2]
x = MLP(trainData, 'target', hidden_node,1500,0.1)

48.456721355679846
45.32696331992125
44.14392528309512
43.71411870171913
43.559554993978715
43.50207822587371
43.47903490330358
43.46859802006472
43.46302453065801
43.45947070696696
43.45683798822838
43.454671036116
43.45276256631283
43.4510067022132
43.449342066746205
43.44772836911882
43.44613603183114
43.44454117431175
43.442922946243755
43.4412619710839
43.439539355375885
43.4377360070559
43.43583213255181
43.43380684214369
43.43163782293198
43.42930105410534
43.42677054654692
43.42401809107743
43.42101299818068
43.41772180766855
43.41410794003443
43.41013125292245
43.40574745705747
43.40090733704053
43.39555571439331
43.389630083432095
43.38305884453567
43.375759052791935
43.36763359061422
43.35856765777765
43.348424448118564
43.337039845323964
43.32421591687216
43.30971291002995
43.29323934829396
43.27443967495492
43.252878662626145
43.22802144887891
43.19920747497639
43.16561565167906
43.12621655360405
43.079705115047105
43.024403914575394
42.958122388600614
42.877950629957205
4

12.936331588553646
12.929104175033144
12.921896238873906
12.91470828974828
12.907540800596713
12.900394207335122
12.893268908510693
12.886165264912783
12.879083599147883
12.872024195187878
12.864987297903296
12.857973112593072
12.850981804522743
12.844013498483232
12.837068278381654
12.83014618687469
12.823247225054459
12.816371352195155
12.809518485566981
12.802688500323319
12.79588122946407
12.789096463877815
12.782333952462897
12.77559340232647
12.76887447905974
12.762176807085556
12.755499970074915
12.748843511427562
12.742206934812147
12.73558970476123
12.728991247316843
12.722410950723342
12.71584816616464
12.709302208544576
12.702772357310927
12.696257857324772
12.689757919779005
12.683271723172917
12.676798414350714
12.670337109615796
12.663886895934485
12.657446832246437
12.651015950901149
12.64459325924385
12.638177741376154
12.63176836012037
12.625364059218409
12.618963765798497
12.612566393144913
12.606170843806137
12.599776013077216
12.593380792889976
12.586984076142029
12

11.291189317873105
11.28949197984536
11.287797893635576
11.286107044817326
11.284419419551025
11.282735004537326
11.281053786974553
11.279375754519902
11.277700895253805
11.276029197647523
11.27436065053338
11.272695243077548
11.271032964755095
11.269373805327184
11.267717754820051
11.266064803505857
11.264414941885015
11.262768160669962
11.261124450770362
11.259483803279327
11.257846209460881
11.256211660738389
11.254580148683845
11.252951665008124
11.25132620155182
11.24970375027695
11.248084303259258
11.246467852681027
11.24485439082453
11.243243910065914
11.241636402869565
11.240031861782901
11.238430279431494
11.236831648514539
11.235235961800766
11.233643212124477
11.23205339238195
11.230466495528088
11.22888251457327
11.227301442580416
11.225723272662254
11.224147997978756
11.222575611734774
11.221006107177745
11.21943947759562
11.217875716314907
11.216314816698773
11.214756772145323
11.213201576085924
11.211649221983722
11.210099703332112
11.208553013653377
11.207009146497406
1

10.76909360839035
10.76825596967009
10.767419404723784
10.76658391061659
10.765749484415593
10.76491612318981
10.7640838240101
10.763252583949107
10.762422400081247
10.761593269482669
10.76076518923112
10.75993815640608
10.759112168088377
10.758287221360646
10.757463313306804
10.756640441012223
10.755818601563652
10.754997792049357
10.754178009558615
10.75335925118219
10.752541514011883
10.751724795140822
10.750909091663111
10.750094400674065
10.749280719269917
10.748468044548028
10.747656373606562
10.746845703544716
10.746036031462552
10.745227354460905
10.744419669641461
10.74361297410664
10.742807264959596
10.742002539304105
10.741198794244694
10.740396026886394
10.739594234334838
10.738793413696268
10.73799356207721
10.73719467658488
10.736396754326774
10.735599792410873
10.73480378794546
10.734008738039076
10.73321463980065
10.73242149033938
10.731629286764528
10.730838026185666
10.730047705712565
10.729258322455001
10.728469873522887
10.727682356026282
10.726895767075057
10.72611

In [77]:
# predict
prediction_ann = predMyMLP(testData, 'target', x, 3)
print(prediction_ann)

    target  prediction
0        1           1
1        0           0
2        2           2
3        1           1
4        1           1
5        0           0
6        1           1
7        2           2
8        1           2
9        1           1
10       2           2
11       0           0
12       0           0
13       0           0
14       0           0


In [78]:
# confusion matrix
conf_matrix_ann = confusion_matrix(prediction_ann,'target','prediction', class_list)
print(confusion_matrix(prediction_ann,'target','prediction', class_list))

Predicted  0  1  2
Actual            
0          6  0  0
1          0  5  1
2          0  0  3


In [79]:
# accurcy
testing_accuracy(prediction_ann,'target','prediction')

0.93

In [80]:
print(testing_precision(conf_matrix_ann))
precision_ann = testing_precision(conf_matrix_ann)

0.9166666666666666


In [81]:
print(testing_recall(conf_matrix_ann))
recall_ann = testing_recall(conf_matrix_ann)

0.9444444444444445


In [82]:
print(testing_f1(precision_ann, recall_ann))

0.9303482587064678


In [83]:
iris.describe()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
count,150.0,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333,1.0
std,0.828066,0.435866,1.765298,0.762238,0.819232
min,4.3,2.0,1.0,0.1,0.0
25%,5.1,2.8,1.6,0.3,0.0
50%,5.8,3.0,4.35,1.3,1.0
75%,6.4,3.3,5.1,1.8,2.0
max,7.9,4.4,6.9,2.5,2.0


## Cross Validation

## DTL

In [84]:
# make kfold
kf = KFold(n_splits=10,shuffle=True)

In [85]:
accuracy_DTL_KF = []
precisison_DTL_KF = []
recall_DTL_KF = []
f1_DTL_KF = []
for train_index, test_index in kf.split(iris_X):
    X_train, X_test = iris_X.loc[train_index], iris_X.loc[test_index]
    y_train, y_test = iris_y.loc[train_index], iris_y.loc[test_index]
    trainData,testData =  mergeData(X_train, X_test, y_train, y_test)
    # training model
    model = c45(trainData, "target",True)
    # predict
    prediction_dtl = pred_c45(testData, model, "target")
    # measure model from kfold
    conf_matrix_dtl = confusion_matrix(prediction_dtl,'target','prediction', class_list)
    accuracy_DTL_KF.append(testing_accuracy(prediction_dtl,'target','prediction'))
    save_precision = testing_precision(conf_matrix_dtl)
    precisison_DTL_KF.append(save_precision)
    save_recall = testing_recall(conf_matrix_dtl)
    recall_DTL_KF.append(save_recall)
    f1_DTL_KF.append(testing_f1(save_precision, save_recall))

In [86]:
round(np.mean(accuracy_DTL_KF)*100,2)

94.0

In [87]:
round(np.mean(precisison_DTL_KF)*100,2)

94.06

In [88]:
round(np.mean(recall_DTL_KF)*100,2)

93.5

In [89]:
round(np.mean(f1_DTL_KF)*100,2)

93.77

## ANN

In [90]:
accuracy_MLP_KF = []
precisison_MLP_KF = []
recall_MLP_KF = []
f1_MLP_KF = []
for train_index, test_index in kf.split(iris_X):
    #print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = iris_X.loc[train_index], iris_X.loc[test_index]
    y_train, y_test = iris_y.loc[train_index], iris_y.loc[test_index]
    trainData,testData =  mergeData(X_train, X_test, y_train, y_test)
    hidden_node = [4,4,2]
    # train
    model = MLP(trainData, 'target', hidden_node,1000,0.1)
    # predict
    prediction_ann = predMyMLP(testData, 'target', x, 3)
    # save accuracy result from kfold
    conf_matrix_ann = confusion_matrix(prediction_ann,'target','prediction', class_list)
    accuracy_MLP_KF.append(testing_accuracy(prediction_ann,'target','prediction'))
    save_precision = testing_precision(conf_matrix_ann)
    precisison_MLP_KF.append(save_precision)
    save_recall = testing_recall(conf_matrix_ann)
    recall_MLP_KF.append(save_recall)
    f1_MLP_KF.append(testing_f1(save_precision, save_recall))

46.25266509891336
45.24960297837079
44.834416917286134
44.644873549951654
44.54413430693954
44.48061814240039
44.43426082608803
44.39675931717577
44.36437861125092
44.335276488427965
44.308462103047574
44.2833627745268
44.25962848359456
44.23703568881701
44.21543611099164
44.194727671012835
44.174837221491885
44.15570996484921
44.13730284238267
44.119580358424095
44.102511929749795
44.08607020335824
44.070229993996605
44.054967619810675
44.04026049373594
44.026086878509815
44.01242574545072
43.99925669804464
43.98655993500489
43.974316236399524
43.96250696231135
43.951114057363675
43.94012005698903
43.92950809299197
43.91926189704472
43.90936580145869
43.89980473701879
43.89056422794083
43.8816303841707
43.87298989132956
43.86462999864376
43.85653850520415
43.848703744885555
43.84111457023275
43.8337603355883
43.826630879706876
43.819716508067444
43.81300797506418
43.806496466228694
43.800173580610654
43.79403131342001
43.788062039013866
43.782258494294055
43.77661376256473
43.77112125

19.59281644838353
19.748811318406073
19.5304951142948
19.458638825968794
19.474153195467604
19.3375243904628
19.42184591437092
19.367951955006685
19.225200400567203
19.09345190006135
19.167858325664014
19.15591003478062
18.998459976878824
18.899255738005106
18.86245857374812
18.816996204637213
18.79197687646172
18.747810837667583
18.791293571023044
18.813581943180186
18.61879343674363
18.536894534795387
18.501620356602498
18.48256914594195
18.434777114131357
18.466014192617177
18.437655182576773
18.297958783729694
18.33842554715121
18.2872867911447
18.203735214753067
18.172791635310265
18.174471917472765
18.132650331165653
18.098268931692985
18.06181386528157
18.03409739067602
17.992392937085405
17.967733226368285
17.921810960429795
17.90213707140888
17.851754759535403
17.834564961268097
17.781238448012328
17.765403530893117
17.710468665691955
17.69439470450703
17.639392895728843
17.6217654342077
17.56807148270646
17.547949632544853
17.496665643531408
17.473516708724798
17.425412594867

14.101308667567656
14.096665800455716
14.091912834937688
14.087031194940836
14.081996532345055
14.076776272525395
14.071325766661449
14.065582034368514
14.059453149905227
14.052799341285562
14.045397373095323
14.03686885668062
14.026524521665488
14.01299580435861
13.99328701538644
13.960259431885673
13.898014703397626
13.806609717270602
13.839026320616126
13.847770042662932
13.830903343348767
13.84172404326638
13.819725828509913
13.838735816681098
13.803055876221618
13.842428734309381
13.775146373263691
13.862278521551033
13.72440587627544
13.912530068371378
13.648425666377747
13.968972206003658
13.601257621310985
13.967122083908556
13.583434319075852
13.956766285760054
13.566350473897879
13.947575674615084
13.54921557591228
13.936669874428947
13.532804777005486
13.924083124237018
13.517415795982958
13.909636520781104
13.503537323726073
13.893475690672336
13.491614821411
13.876125213201494
13.481829265321808
13.85833123318852
13.473957633517735
13.840669509978056
13.467563784048961
13.

14.2071592531535
14.198145767625531
14.189312102867946
14.180655469868613
14.172173109422548
14.163862293078658
14.15572032419276
14.147744539092246
14.139932308357388
14.132281038223875
14.124788172110327
14.117451192274387
14.110267621600222
14.103235025519954
14.09635101407068
14.089613244088659
14.083019421541337
14.076567303997496
14.070254703235367
14.064079487988018
14.058039586824936
14.052132991167785
14.046357758438933
14.040712015339702
14.03519396125573
14.029801871786361
14.02453410239433
14.019389092172007
14.01436536772015
14.009461547134574
14.004676344096401
14.000008572060645
13.995457148538339
13.991021099466474
13.986699563660155
13.982491797340991
13.978397178735005
13.974415212733213
13.970545535607227
13.96678791977166
13.963142278583875
13.959608671170905
13.956187307271916
13.95287855208274
13.949682931087647
13.946601134860606
13.943634023816095
13.940782632886364
13.938048176098018
13.935432051017298
13.932935843027805
13.930561329399731
13.92831048310292
13.

13.165959820534818
13.162110149529285
13.158271050999275
13.15444259130859
13.15062482754631
13.146817808164835
13.143021573581722
13.13923615674545
13.135461583666693
13.131697873915874
13.12794504108858
13.124203093240034
13.12047203328988
13.116751859399288
13.11304256532131
13.109344140726249
13.10565657150339
13.101979840040755
13.098313925483929
13.094658803975728
13.09101444887771
13.087380830974782
13.083757918664524
13.08014567813185
13.076544073510261
13.072953067030994
13.069372619160495
13.065802688727674
13.062243233041315
13.05869420799873
13.055155568186239
13.051627266972261
13.048109256593431
13.044601488234628
13.041103912103168
13.037616477497856
13.034139132873149
13.030671825899082
13.02721450351712
13.023767111992433
13.020329596962764
13.01690190348431
13.013483976074886
13.010075758754398
13.006677195083153
13.003288228197933
12.999908800846155
12.99653885541824
12.99317833397834
12.989827178293506
12.98648532986149
12.983152729937265
12.979829319558267
12.97651

36.46037163896608
35.43330087188135
34.34773427956012
33.25927224276295
32.22318086072384
31.26535676691285
30.349162204072627
29.43056817466468
29.01106873344916
28.55419242440873
27.795080281105854
26.787615570587725
28.037967649129733
27.895738744361626
26.588572367829855
26.62864620708514
26.957369581861393
25.042217936228727
24.60225755722151
24.251689119135705
24.717503279190332
24.772399900641474
24.92478218277522
23.93796226437837
24.73139830924584
25.112385888998446
23.872027649650757
24.596126544097352
23.629501173813765
23.585945791174428
23.541375402394863
24.246119143685533
23.401946120308153
23.26680412172253
23.77421034449856
23.078716832337967
23.141821901851554
23.66024572053335
23.00610908436579
22.99809351101117
23.3583088105937
22.876205137360486
22.90454695121115
22.932215905898268
23.452842562743662
22.922929044367983
22.762206330051114
22.83698469641289
22.80586791117099
23.15151253159588
22.672128472715656
22.718891239651715
22.707655066924957
22.764895584486304

16.604467871288648
16.573643451317466
16.542828467410967
16.512031021139226
16.481262714733628
16.450538539723922
16.419876508542153
16.389297046390933
16.358822188146448
16.32847465085709
16.298276871063752
16.268250102610526
16.23841366193626
16.208784384782103
16.179376325466126
16.150200694663344
16.12126600158009
16.09257834698037
16.06414180661352
16.035958848636792
16.008030739866864
15.980357909867823
15.952940255544592
15.925777380012443
15.898868767337694
15.872213899488207
15.845812324138892
15.819663682586407
15.793767706591844
15.768124191978485
15.742732955594182
15.717593781009178
15.692706357170794
15.668070213229516
15.643684651902081
15.619548683043211
15.595660958543714
15.572019709242495
15.548622684214422
15.525467092561637
15.502549547676324
15.47986601385237
15.457411755091636
15.43518128597872
15.413168324584788
15.391365747515565
15.369765547449289
15.348358793838498
15.327135597897268
15.306085083597115
15.28519536719403
15.26445354885574
15.243845721315264
15

14.148863865907792
14.179067194633651
14.149213331436306
14.158955884724284
14.143700288892235
14.143444861884552
14.134544923255701
14.130288290187934
14.123562078974667
14.117959467403326
14.111748866186218
14.105704365434391
14.099488819288183
14.093229348278648
14.086871822187568
14.080428640236782
14.073889825283182
14.067252610571124
14.06051227239963
14.053664691742368
14.046705656008381
14.039630988238974
14.03243648502374
14.025117890826486
14.017670861609652


[[5.10797365170617, -1.2532390451705917, -0.4875340987413482, -0.4170309485148809], [-2.2552200431510028, -2.9676550123549617, 1.2775064908063796, 2.0184136082239714], [-4.388788715206808, 4.4179066416686785], [3.4028721560780726, -1.804694747876901, -16.479952753608895], []]
[[7.168696333837301, 0.4553404149095309, 2.560608073605749, -0.22230653390051866], [0.21186917636444172, 0.7837303260982877, 0.17116670834065986, 0.49552697221265046], [-4.567370806212864, 4.839438994759593], [-15.10798051868046, -1.424940801939980

16.230020734235122
16.24111325596991
16.251292074724073
16.260526214947316
16.26880164629968
16.27611132459468
16.282448219013666
16.28780117374512
16.292152882735067
16.295479290804273
16.297749937637963
16.298928950678146
16.298976522997144
16.297850784222735
16.295509997862293
16.291915007645294
16.287031818198543
16.28083414185004
16.273305687157013
16.264441923157456
16.254251045216936
16.242753908802722
16.229982790804314
16.215978971877764
16.20078927889213
16.18446184610374
16.16704141352779
16.14856446413998
16.129054413188992
16.10851692642367
16.08693529101569
16.06426562378232
16.04043160280941
16.015318383006434
15.98876546567387
15.960558676679957
15.930422370491128
15.898015121291596
15.862936540591374
15.824760862097534
15.78312490029141
15.737908106424452
15.68952746301253
15.639281695432526
15.589491373266716
15.543046832927676
15.502285050847545
15.467907211089951
15.438910775818691
15.413514377699416
15.390120864859556
15.367707015092588
15.345757376035664
15.324060

11.44896341925225
11.445998607913515
11.4430420745415
11.440093786530348
11.437153707366429
11.434221796769469
11.431298010832842
11.428382302162415
11.425474620013299
11.42257491042383
11.419683116346153
11.41679917777303
11.413923031860444
11.41105461304537
11.408193853158696
11.405340681532612
11.402495025102608
11.399656808503272
11.396825954158261
11.394002382363697
11.391186011365138
11.388376757427702
11.385574534899257
11.382779256266499
11.379990832203497
11.377209171612735
11.37443418165836
11.371665767790985
11.368903833764385
11.366148281643062
11.36339901180081
11.360655922909396
11.357918911917096
11.355187874016364
11.3524627025999
11.349743289204227
11.347029523440039
11.344321292907932
11.341618483098456
11.338920977274793
11.336228656336479
11.33354139866217
11.33085907992875
11.328181572904665
11.325508747213574
11.32284046906503
11.320176600947539
11.317517001278778
11.314861524006862
11.312210018155124
11.309562327301872
11.306918288984546
11.304277734015411
11.301

43.550822345865285
43.550068603348805
43.54932163907809
43.54858134951568
43.54784763331165
43.54712039124914
43.54639952619124
43.54568494302965
43.544976548634594
43.54427425180631
43.54357796322755
43.542887595417845
43.542203062688664
43.54152428109989
43.540851168417404
43.5401836440723
43.53952162912031
43.53886504620299
43.53821381951
43.53756787474153
43.53692713907298
43.536291541119354
43.53566101090151
43.535035479812585
43.53441488058583
43.53379914726293
43.5331882151633
43.53258202085421
43.53198050212127
43.53138359794035
43.530791248449695
43.53020339492288
43.52961997974242
43.52904094637424
43.528466239342414
43.52789580420499
43.52732958753
43.526767536872406
43.5262096007513
43.525655728628045
43.52510587088456
43.52455997880246
43.52401800454256
43.52347990112485
43.52294562240913
43.522415123076044
43.52188835860841
43.5213652852732
43.52084586010416
43.52033004088426
43.51981778612909
43.51930905507061
43.51880380764113
43.51830200445755
43.51780360680653
43.5173

43.43153390640064
43.4314365539374
43.4313394507244
43.43124259568064
43.43114598773157
43.43104962580898
43.430953508851005
43.43085763580196
43.430762005612564
43.430666617239595
43.43057146964596
43.430476561800766
43.43038189267908
43.43028746126203
43.43019326653667
43.430099307496036
43.430005583138914
43.42991209247014
43.42981883450014
43.42972580824523
43.4296330127274
43.42954044697431
43.42944811001923
43.42935600090105
43.42926411866423
43.429172462358764
43.42908103104004
43.42898982376892
43.42889883961171
43.42880807763999
43.428717536930826
43.42862721656631
43.42853711563408
43.42844723322673
43.4283575684422
43.42826812038354
43.42817888815878
43.42808987088117
43.42800106766893
43.427912477645314
43.427824099938405
43.427735933681454
43.4276479780124
43.427560232074114
43.4274726950143
43.427385365985465
43.427298244144865
43.42721132865445
43.427124618680956
43.427038113395696
43.426951811974604
43.42686571359833
43.426779817452
43.42669412272528
43.42660862861229
4

43.7815886229517
43.770670916570836
43.76001952347386
43.74961243850383
43.739431288968305
43.72946066417605
43.71968757687334
43.710101029433766
43.70069166317682
43.69145147369857
43.682373578717026
43.67345202781129
43.66468164570162
43.65605790249807
43.647576805738865
43.63923481012934
43.63102874174331
43.6229557341113
43.615013174141005
43.607198656219616
43.59950994316647
43.59194493295293
43.58450163030341
43.57717812244469
43.56997255839438
43.56288313127493
43.555908063217565
43.54904559248181
43.54229396246665
43.535651412327624
43.52911616894834
43.52268644003983
43.51636040816197
43.51013622547912
43.504012009074366
43.497985836656525
43.49205574250218
43.48621971347789
43.480475684989806
43.4748215367054
43.46925508788797
43.46377409217498
43.45837623161882
43.45305910979146
43.447820243729986
43.44265705447107
43.43756685588352
43.432546841458986
43.42759406866253
43.42270544036634
43.417877682799386
43.41310731932632
43.408390639227775
43.40372366047287
43.399102085257

14.588070665119599
14.53852554809157
14.532856720055353
14.534048148799906
14.605702225027875
14.533321706610243
14.46495237621894
14.458262204084777
14.442303106858859
14.442576825698499
14.42715825809158
14.431838592515536
14.417864955889538
14.42239407659641
14.414226743659968
14.414618317533904
14.412013058615283
14.410662811523203
14.409173893037124
14.40751346032206
14.4054958558588
14.402890391496296
14.3993818221746
14.394636907512272
14.388370379781547
14.380352141251635
14.370412734119881
14.35859829974713
14.344944267133721
14.331817289733358
14.340746695737803
14.33077724601271
14.357748528055893
14.463762691133983
14.660172386862058
14.501579821817417
14.48286287059033
14.352783684993966
14.43620150915046
14.58488305389349
14.496343096110328
14.462657663934102
14.494988687202085
14.492848497443761
14.492871102252758
14.57347209556016
14.409682034216084
14.502131738422706
14.540643011839187
14.581463134073468
14.450844697512311
14.820151281222499
15.017764387409704
14.72406

13.465253288997687
13.462883256615667
13.460521580931921
13.458168225342376
13.4558231529538
13.453486326594241
13.451157708823928
13.448837261946162
13.446524948018718
13.444220728865119
13.44192456608632
13.439636421072336
13.437356255014105
13.435084028915341
13.432819703604453
13.430563239746562
13.428314597855373
13.426073738305252
13.423840621343023
13.421615207099933
13.41939745560335
13.417187326788532
13.414984780510226
13.41278977655408
13.410602274648067
13.408422234473598
13.406249615676602
13.40408437787839
13.401926480686294
13.399775883704214
13.39763254654285
13.395496428829794
13.393367490219436
13.391245690402545
13.389130989115744
13.387023346150633
13.384922721362809
13.382829074680524
13.380742366113191
13.378662555759576
13.376589603815798
13.374523470583066
13.372464116475191
13.37041150202575
13.368365587895171
13.366326334877414
13.364293703906537
13.362267656062867
13.360248152579157
13.358235154846206
13.356228624418549
13.354228523019671
13.35223481254715
13

14.304897614926997
14.304752343526753
14.304206933281412
14.303247889547253
14.30186656361741
14.300059565292427
14.297829009027017
14.295182555220748
14.292133228753025
14.288699019303575
14.284902289290589
14.280769032436629
14.27632803683462
14.27161000994948
14.26664671962328
14.261470196345032
14.256112029954132
14.250602780826059
14.244971513356642
14.239245449512707
14.233449732972185
14.22760728997655
14.221738771108443
14.215862558224527
14.209994822124063
14.204149618672727
14.19833901360849
14.19257322882475
14.186860805368909
14.181208780598116
14.175622878852243
14.170107716614435
14.164667024432799
14.159303888867406
14.154021018400366
14.148821037587178
14.143706813713582
14.1386818198331
14.13375053728099
14.128918899597968
14.124194778288084
14.119588509059337
14.115113455280804
14.11078660350406
14.106629184226994
14.102667309772926
14.098932620233256
14.095462927609217
14.092302846804309
14.08950439835057
14.087127558842857
14.085240716434601
14.083920953751033
14.08

11.796504967867829
11.79407007630449
11.791644261715565
11.78922741623455
11.786819434558012
11.784420213893911
11.782029653906376
11.77964765665808
11.777274126550816
11.774908970264715
11.772552096697002
11.770203416900186
11.767862844020684
11.765530293237614
11.763205681702322
11.760888928478911
11.758579954485509
11.756278682437074
11.753985036789164
11.751698943683248
11.749420330893424
11.747149127774467
11.74488526521157
11.742628675571398
11.740379292654813
11.738137051650904
11.735901889092784
11.73367374281457
11.731452551910023
11.729238256692478
11.727030798656275
11.724830120439362
11.722636165787465
11.720448879519433
11.71826820749355
11.716094096575677
11.713926494607897
11.711765350378741
11.709610613594249
11.707462234850304
11.70532016560567
11.703184358156383
11.70105476561076
11.698931341865494
11.696814041582565
11.694702820167016
11.692597633745383
11.690498439145328
11.688405193875331
11.686317856105815
11.684236384650418
11.682160738948275
11.680090879046585
1

22.337763299385212
22.324813096861025
22.312450000689452
22.300630595393727
22.289315482644017
22.27846884140695
22.268058043550898
22.258053317052234
22.248427450173786
22.239155531005128
22.230214717602227
22.22158403467301
22.213244193348455
22.20517743107865
22.197367369113156
22.189798885380636
22.182458000883916
22.175331777982
22.168408229149012
22.161676234985144
22.155125470414752
22.148746338142065
22.14252990855359
22.136467865356146
22.13055245632766
22.124776448632616
22.11913308822021
22.113616062879675
22.10821946857678
22.102937778738998
22.097765816194194
22.092698727500956
22.08773195943748
22.08286123744169
22.07808254581709
22.073392109539125
22.068786377514023
22.064262007157406
22.059815850173678
22.055444939429314
22.051146476824005
22.046917822072736
22.042756482321057
22.038660102522698
22.03462645651597
22.030653438741084
22.026739056546358
22.022881423035454
22.019078750412955
22.015329343788984
22.011631595407
22.007983979262654
22.004385046083677
22.0008334

21.57328939628165
21.572917277390548
21.57254632205216
21.572176524252182
21.57180787801899
21.57144037742323
21.571074016577505
21.570708789635955
21.570344690793956
21.569981714287657
21.569619854393697
21.56925910542878
21.56889946174946
21.568540917751612
21.56818346787022
21.56782710657894
21.56747182838987
21.5671176278531
21.566764499556488
21.56641243812523
21.56606143822164
21.565711494544743
21.56536260182996
21.56501475484889
21.56466794840885
21.564322177352743
21.563977436558577
21.563633720939315
21.563291025442414
21.56294934504971
21.562608674776957
21.5622690096737
21.561930344822848
21.561592675340464
21.561255996375436
21.56092030310927
21.56058559075576
21.560251854560725
21.559919089801724
21.559587291787857
21.559256455859387
21.558926577387588
21.55859765177442
21.55826967445229
21.557942640883827
21.55761654656156
21.557291387007744
21.556967157774046
21.556643854441365
21.55632147261956
21.556000007947198
21.555679456091347
21.55535981274728
21.555041073638346


21.473243400295253
21.473104560253724
21.47296594297193


[[0.08183923236579389, 0.9125550318244873, 1.0015469241574773, -0.1861218543367889], [-1.3473956518013603, -0.6807226488557829, 3.329897998105373, -1.5045964830004848], [1.6036060789961872, -0.8457696089324225], [6.003650060964902, -4.421815786890649, -4.976507682676376], []]
[[-1.5768030035834697, -0.3205286979795171, -0.02982223554229193, -1.0446602610478188], [0.6116636202700428, 0.40521332890123307, -1.328348159499827, 0.28008605230036043], [1.1802663165970548, -0.9771521253890739], [-8.415102152961342, 0.26324291820429546, 0.03616705404321245], []]
[[2.0755000820693574, 0.9961805079488815, 0.33349785678371946, 2.333368137072023], [-0.03601818078273589, 0.7147778477813567, -1.7163885855989338, 1.1219482003228916], [-5.863489786744429, 0.460531394198974], [], []]
[[1.1703474005616878, -0.18350032915235734, 0.8191377132438337, 0.18554981200462067], [-0.24904380950481536, -0.8171887073961632, 2.706398049884898, -1.7056916296865

43.25485260746561
43.254666955456976
43.25448202949833
43.25429782468703
43.254114336165074
43.25393155911867
43.25374948877783
43.25356812041589
43.25338744934927
43.2532074709369
43.253028180579946
43.25284957372131
43.25267164584534
43.252494392477416
43.25231780918353
43.25214189156995
43.251966635282756
43.25179203600772
43.25161808946956
43.251444791431965
43.25127213769693
43.25110012410456
43.25092874653269
43.250758000896596
43.250587883148384
43.250418389277044
43.25024951530782
43.25008125730199
43.249913611356476
43.249746573603545
43.2495801402105
43.249414307379325
43.24924907134631
43.24908442838191
43.24892037479027
43.248756906908866
43.24859402110839
43.248431713792236
43.24826998139641
43.248108820388985
43.247948227269944
43.247788198570944
43.24762873085482
43.247469820715494
43.24731146477756
43.24715365969603
43.246996402156036
43.24683968887259
43.246683516590224
43.24652788208285
43.246372782153244
43.24621821363304
43.246064173382294
43.24591065828926
43.24575

43.20918587047724
43.209129744766194
43.20907372504747
43.20901781095707
43.20896200213257
43.208906298213115
43.20885069883955
43.208795203654375
43.2087398123016
43.20868452442693
43.20862933967762
43.20857425770261
43.20851927815227
43.20846440067874
43.20840962493567
43.20835495057828
43.208300377263406
43.2082459046494
43.20819153239622
43.208137260165394
43.20808308761992
43.20802901442447
43.20797504024516
43.20792116474978
43.207867387607465
43.20781370848901
43.20776012706675
43.2077066430144
43.207653256007404
43.207599965722466
43.20754677183801
43.20749367403382
43.20744067199125
43.207387765393094
43.20733495392363
43.20728223726866
43.20722961511543
43.20717708715261
43.2071246530704
43.20707231256041
43.20702006531565
43.20696791103072
43.20691584940155
43.206863880125475
43.20681200290131
43.20676021742939
43.206708523411216
43.20665692054993
43.206605408549954
43.206553987117154
43.206502655958786
43.20645141478345
43.206400263301234
43.20634920122346
43.20629822826294

16.16274114870854
16.129186953455267
16.096191766852023
16.063736319348173
16.031797450037285
16.00034912878544
15.969363397163631
15.938811198217792
15.908663081323883
15.87888978086734
15.84946267603739
15.820354144074598
15.791537821612522
15.762988789118925
15.73468369258863
15.706600815125325
15.678720109272092
15.651023199162832
15.623493359915745
15.596115480245551
15.56887601305351
15.541762917755033
15.514765597300398
15.487874832204975
15.461082713401844
15.434382575332666
15.407768930379639
15.381237405490241
15.354784681640568
15.328408436610177
15.302107291386884
15.275880760377923
15.24972920546379
15.223653793788042
15.197656459025135
15.171739865704835
15.145907375997245
15.120163018175518
15.094511455781936
15.068957956334785
15.043508358242017
15.018169034451818
14.992946851291054
14.967849120944948
14.94288354613885
14.918058155816885
14.893381230984433
14.868861220392413
14.844506646374988
14.820326001873127
14.796327640431166
14.772519661674549
14.748909795388473
1

12.677998705387472
12.675029733678867
12.672077185774558
12.669140231381542
12.666218059003233
12.663309875896875
12.660414908010033
12.657532399896153
12.654661614609859
12.651801833582583
12.648952356478928
12.646112501034446
12.643281602875735
12.640459015323136
12.637644109177224
12.634836272489704
12.632034910319506
12.629239444475084
12.626449313243706
12.623663971108671
12.62088288845548
12.61810555126766
12.615331460813573
12.61256013332474
12.609791099666976
12.607023905005123
12.604258108462304
12.601493282774792
12.598729013943158
12.595964900880869
12.593200555061035
12.590435600162165
12.587669671713941
12.584902416743526
12.58213349342345
12.579362570721592
12.576589328054226
12.573813454942425
12.571034650672948
12.568252623963701
12.565467092634766
12.56267778328529
12.559884430976817
12.557086778923418
12.554284578189325
12.551477587394038
12.548665572425639
12.545848306162487
12.543025568203499
12.540197144607513
12.537362827641605
12.534522415539161
12.53167571226703

In [91]:
accuracy_MLP_KF

[1.0, 1.0, 0.93, 0.93, 1.0, 1.0, 0.93, 1.0, 1.0, 1.0]

In [92]:
round(np.mean(f1_MLP_KF)*100,2)

97.69

In [93]:
round(np.mean(recall_MLP_KF)*100,2)

97.75

In [94]:
round(np.mean(precisison_MLP_KF)*100,2)

97.67

In [95]:
round(np.mean(accuracy_MLP_KF)*100,2)

97.9

## Save and Load

In [96]:
# this function will create file to save our DTL model
# our model representation will be likely printed tree

def create_file_DTL(node, depth):
    
    if node.label is not None: 
        file.write(" "*(depth+1) +str(node.label))
        file.write('\n')
    else:
        file.write(" "*depth + "["+ node.attribute +"]")
        file.write('\n')
        for i in node.children:
            file.write("----"*(depth+1) +str(i))
            create_file_DTL(node.children[i],depth+1)        

In [97]:
# load file txt representation of DTL and return an model as root

def load_file_dtl(parent, depth, file):
    line = file.readline().rstrip()

    while line:
        tabs = line.count('----')

        if tabs < depth:
            break
        else :
            node = Node()
            parsed_line = ''
            vertex = ''

            if depth == 0:
                line = line[1:-1]
                parent.setAttribute(line)

            else:
                if (line[-1] == ']'):
                    parsed_line = line.replace('----', '').split('[')
                    vertex = parsed_line[0].rstrip()
                    attribute = parsed_line[1].strip()[:-1]
                    node.setAttribute(attribute)
                    node.setVertex(vertex)
                    
                else:
                    parsed_line = line.replace('----', '').split(' ' * (depth+1))
                    vertex = parsed_line[0].rstrip()
                    label = parsed_line[1].strip()
                    node.setVertex(vertex)
                    node.setLabel(label)

            parent.children[vertex] = node
            
            line = load_file_dtl(node, depth + 1, file)
    
    return line 

In [99]:
# save DTL model into file
model = c45(iris, "target",True)
file = open("text_dtl.txt", "w")
create_file_DTL(model, 0)
file.close()

In [100]:
# load DTL model from file
node = Node() # node is a parent node
file = open("text_dtl.txt", "r")

#call the function to load DTL model from file, 0 (depth) represent a root node
load_file_dtl(node, 0 , file)
node.children[''].setAttribute(node.attribute)
node = node.children['']
print_tree(node, 0)
file.close()

[petal length (cm)]
---->=2.45
    [petal width (cm)]
-------->=1.75
        [sepal width (cm)]
------------>=3.15
            [sepal length (cm)]
---------------->=6.05
                    2
----------------<6.05
                    1
------------<3.15
                2
--------<1.75
        [petal length (cm)]
------------>=4.95
            [petal width (cm)]
---------------->=1.55
                [sepal length (cm)]
-------------------->=6.95
                        2
--------------------<6.95
                        1
----------------<1.55
                    2
------------<4.95
            [petal width (cm)]
---------------->=1.65
                    2
----------------<1.65
                    1
----<2.45
        0


In [101]:
# this function will create file to save our ANN model
# we save for each line represent a node j on layer i
# for every line we print into file in format; i,j = array_W | array_dW | out | error

def create_file_ANN():
    file = open("text_ann.txt", "w")

    for i in range(0,len(x[1])):
        for j in range(0,x[1][i]):
            file.write(str(i));
            file.write(',')
            file.write(str(j));
            file.write('=')
            file.write(str(x[0][i][j].get_arrW()))
            file.write('|')
            file.write(str(x[0][i][j].get_arrdW()))
            file.write('|')
            file.write(str(x[0][i][j].get_out()))
            file.write('|')
            file.write(str(x[0][i][j].get_error()))
            file.write('\n');

    file.close()

In [102]:
# load file txt that contain model representation of ANN

def load_file_ann(file):
    neurons = file.read().split("\n")[:-1]

    rows = []
    columns = []

    curr_row = 0

    for neuron in neurons:
        neuron_pos = neuron.split('=')[0].split(',')
        if int(neuron_pos[0]) != curr_row:
            rows.append(columns)
            columns = []
            curr_row += 1

        new_neuron = Neuron()
        neuron_elements = neuron.split('=')[1].rstrip().split('|')
        weights = neuron_elements[0]
        delta_weights = neuron_elements[1]
        out = neuron_elements[2]
        err = neuron_elements[3]

        if len(weights) > 2:
            parsed_weights = weights[1:-1].split(',')
            for weight in parsed_weights:
                new_neuron.add_w(float(weight.strip()))
        
        if len(delta_weights) > 2:
            parsed_delta_weights = delta_weights[1:-1].split(',')
            for delta_weight in parsed_delta_weights:
                new_neuron.add_deltaW(float(delta_weight.strip()))

        new_neuron.set_out(float(out))
        new_neuron.set_error(float(err))

        columns.append(new_neuron)

    max_col_len = max([len(row) for row in rows])
    for row in rows:
        if len(row) < max_col_len:
            for i in range(max_col_len-len(row)):
                row.append(Neuron())

    return rows

In [103]:
# save ANN model into file
create_file_ANN()

In [104]:
# load ANN model from file
file = open("text_ann.txt", "r")
rows = load_file_ann(file)
printMatrixMLP(rows)
file.close()

[[-5.006562282470774, -1.141373048555413, 0.9892535474134329, 0.3361251295921864], [-2.6951006135534286, -1.7432078114205836, 5.147018592572216, 0.7250842546828201], [1.3999664875375386, -3.6928975488678133], [3.242445154294069, -1.9877068823246162, -21.74739348489987], []]
[[-9.00189769995629, 0.31946033644706734, -0.16781241656238483, 2.2963485037419074], [-0.46129513771118336, 0.6873755241641054, 0.21017950981829767, 0.5036436933867913], [3.8252979189378493, -4.2742527780793464], [-21.489943415461536, -1.8283290681395148, 2.9994582359407365], []]
[[9.59964346143173, -0.6608476968063707, 0.5309998506280069, -3.713939810552608], [0.9284868796382807, -0.5434414036770522, -2.264166667137126, 1.8397732792101458], [-3.069931162757634, 6.028772492880621], [], []]
[[11.185182529465825, 0.3093841942047711, -0.2201949144507605, -0.7994472288924946], [2.2446953944807455, 3.375115187892445, -2.83638021292122, -4.169156219264595], [], [], []]


# Generate New Instance

In [105]:
# Generate n instance like iris dataset
# return new n instance as new dataframe
def generateIris(n):
    a = []
    b = []
    c = []
    d = []
    e = []
    for i in range(n):
        a.append(random.uniform(4.5, 7.5))
        b.append(random.uniform(2, 4.4))
        c.append(random.uniform(1, 6.9))
        d.append(random.uniform(0.1, 2.5))
        e.append(1)
    iris = {'sepal length (cm)': a,
            'sepal width (cm)': b,
            'petal length (cm)':c,
            'petal width (cm)':d,
            'target': e
            }
    return pd.DataFrame(iris, columns = ['sepal length (cm)', 'sepal width (cm)','petal length (cm)','petal width (cm)','target'])

In [106]:
# Make 100 instance
new_ins = generateIris(100)

In [107]:
# save model
model = c45(iris, "target",True)
file = open("text_dtl.txt", "w")
create_file_DTL(model, 0)
file.close()

In [108]:
# load model
node = Node()
file = open("text_dtl.txt", "r")
load_file_dtl(node, 0 , file)
node.children[''].setAttribute(node.attribute)
node = node.children['']
file.close()

In [109]:
# prediction model
prediction_dtl = pred_c45(new_ins, node, "target")

In [110]:
new_ins['target']=prediction_dtl['prediction']

In [111]:
new_ins['target'].value_counts()

2    55
1    27
0    18
Name: target, dtype: int64

# Analisis

### Analisis Data
Dataset iris memiliki target yang yang seimbang yang mana banyaknya 0,1,2 mempunyai porsi yang sama. Range data pada data setiap kolom juga tidak terlalu besar atau bisa dibilang standar deviasi nya kecil. Data ini juga tidak memiliki pencilan

### Train test split DTL
* acc = 100
* precision = 100
* recall = 100
* f1 = 100

Train test split cuma 1 kali, Sehingga kemungkinan akan terjadi overfit pada data tertentu. Saat kita mengambil data sebanyak 90 persen sebagai data train. Bisa jadi model yang dibuat sudah bisa memprediksi dengan baik, karena data test 10 persen tersebut tidak terlalu beda dengan data train 90 persen. Hal ini bisa dilihat pada penjelasan Analisis data diatas

Pada hasil eksperimen, nilai Accuracy, Precision, Recall, dan F1 score mirip. Hal ini dikarenakan data yang digunakan (dataset iris) memiliki target yang seimbang

### Cross validation DTL
* acc = 92.8
* precision = 92.14
* recall = 93.23
* f1 = 92.67

Model DTL ini cukup overfit. Terbukti saat kami menggunakan cross validation nilai nya lebih kecil jika dibandingkan dengan metode train test split. Karena cross validation ini menggunakan skema train test secara bergantian setiap fold. Solusi, kami menyarankan untuk mengetes setiap model dengan cross validation agar pengukuran lebih akurat

Pada hasil eksperimen, nilai Accuracy, Precision, Recall, dan F1 score mirip. Hal ini dikarenakan data yang digunakan (dataset iris) memiliki target yang seimbang

### Train test split ANN 
* acc = 93
* precision = 91.6
* recall = 94.4
* f1 = 93.0

Dari hasil eksperimen ini didapatkan nilai Accuracy, Precision, Recall, dan F1 yang lebih kecil dari train test split pada DTL. Hal ini karena pada model ANN didasarkan oleh perkalian bobot dan input untuk menghasilkan output. Sehingga tidak rawan overfit seperti yang terjadi pada DTL

Semakin banyak epoch cenderung lebih bagus. Karena semakin besar epoch yang digunakan nilai error di akhir cenderung lebih kecil. Sebagai contoh, kami pernah melakukan pembelaran dengan epoch 100 mendapatkan akurasi sekitar 30 persen. Dan saat kita menggunakan epoch => 1000 kita mendapatkan nilai akurasi > 90 persen 

Selain itu learning rate juga mempegaruhi pengurangan error. Kita harus memiliki nilai learning rate yang tepat. Karena ketika learning rate terlalu tinggi, bisa jadi mengakibatkan model gagal mencapai minimum global untuk fungsi error atau juga bisa terjadi gagal mencapai konvergen. Dan apabila kita memilih learning rate terlalu kecil akan lama mencapai nilai optimum 

Dalam pemilihan banyaknya layer dan perceptron setiap layer. Semakin banyak layer dan perceptron setiap layer, semakin sedikit minimum lokal yang terbentuk, sehingga model cenderung dapat mencapai minimum global. Namun ada trade off ketika menggunkan banyak layer dan banyak perceptron, waktu training menjadi lebih lama

Pada hasil eksperimen, nilai Accuracy, Precision, Recall, dan F1 score mirip. Hal ini dikarenakan data yang digunakan (dataset iris) memiliki target yang seimbang

### Cross validation ANN
* acc = 97.2
* precision = 97.5
* recall = 96.47
* f1 = 96.92

Dari hasil eksperiman diatas, secara umum model ANN lebih bagus jika dibandingkan dengan DTL untuk data yang cukup

Pada hasil eksperimen, nilai Accuracy, Precision, Recall, dan F1 score mirip. Hal ini dikarenakan data yang digunakan (dataset iris) memiliki target yang seimbang

Catatan : Nilai Accuracy, Precision, Recall, dan F1 score pada setiap eksperimen mungkin berbeda beda setiap kali run program