In [1]:
from utils import *
from Backpropagation import *
from sklearn.datasets import load_iris
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import confusion_matrix as cm
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
import copy

# Define Model

In [2]:
# load iris data
inputData = load_iris()
target_unencoded = inputData.target
encoder = OneHotEncoder(sparse=False)
reshape = inputData["target"].reshape(len(inputData["target"]), 1)
target = encoder.fit_transform(reshape)

# Define parameters
n_layer = 5
array_neuron_layer = [16,8,4, 3,3]
array_activation = ["linear", "relu", 'linear', "relu", "sigmoid"]
learning_rate = 0.001
error_threshold = 0.01
max_iter = 100
batch_size = 1

# create model
backprop = Backpropagation(n_layer = n_layer, array_neuron_layer=array_neuron_layer, array_activation=array_activation, learning_rate=learning_rate, error_threshold=error_threshold, max_iter=max_iter, batch_size=batch_size)

# Train Model

In [3]:
# NOMOR 3
# split data
X = inputData["data"].tolist()
y = target.tolist()

# X, y, target_unencoded = shuffle(X, y, target_unencoded)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=499)
X_test_backup = copy.deepcopy(X_test)
y_test_backup = copy.deepcopy(y_test)

# train model
backprop.backpropagation(X_train, y_train)

# save model
backprop.saveModel("model.json")


In [4]:
#print info
print("Info")
backprop.printModel()
print("-------------------------")

Info
Hidden Layer-1 :
Activation Function: linear
Unit : 16
Weight: [[ 0.52812198 -0.28663021  0.2572192  -0.09697042]
 [-0.61997439  0.40104696 -0.1942939   0.76733364]
 [ 0.76396972 -0.08741171 -0.13829069  0.10314078]
 [ 0.3947783  -0.81402159  0.5685246  -0.53646699]
 [ 0.3526754  -0.41792237  0.06337861  0.53787406]
 [-0.35751319  1.37819592  0.07679456  0.43489336]
 [ 0.56016182  0.14203499 -0.20534852 -0.05362538]
 [-0.69602831 -0.45415493  0.52979832  0.36568689]
 [ 0.10176863 -0.33653302  0.34615428 -1.27253329]
 [-1.09190163  0.42912275  0.3467157  -0.27431499]
 [-0.65536812  0.5419365   0.82848837 -0.07693481]
 [ 0.68133456 -0.02966785 -0.97163114 -0.11638349]
 [-0.27423886  0.96955958 -0.45331713 -0.13114251]
 [-0.01722194 -0.33235649 -0.26509116  0.55092116]
 [-0.63481667 -0.35951883 -0.01803708  0.27800219]
 [ 0.65236467 -0.00242827 -0.22552732 -0.05018924]]
Weight Bias: [0.007787643569426365, -0.019321862312520643, -0.004881246258402332, -0.013367203014848339, -0.0099234

In [5]:
#print result
predicted = backprop.predict(X_test)
target_unencoded = [ y_test[x].index(max(y_test[x])) for x in range(len(y_test))]
print("Predicted Value")
print(predicted)
print("Real Value")
print(target_unencoded)
print()

# print score accuracy
print("Score Accuracy")
print(score_accuracy(predicted, target_unencoded))
print()

confusion_matrix_3 = confusion_matrix(predicted, target_unencoded)
print("Our Confusion Matrix")
print(confusion_matrix_3)
print("Our Confusion Matrix Statistics")
print(confusion_matrix_statistics(confusion_matrix_3))

# Generate confusion_matrix with sklearn's confusion_matrix
sklearn_confusion_matrix = cm(target_unencoded, predicted)
print("Sklearn Confusion Matrix")
print(sklearn_confusion_matrix)
print("Sklearn Confusion Matrix Statistics")
print("Accuracy:", accuracy_score(target_unencoded, predicted))
# Prevent ill-defined warning in precision and recall
print("Precision:", precision_score(target_unencoded, predicted, average='micro'))
print("Recall:", recall_score(target_unencoded, predicted, average='micro'))
print("F1 Score:", f1_score(target_unencoded, predicted, average='micro'))

Predicted Value
[1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 2, 0]
Real Value
[1, 0, 1, 0, 1, 1, 0, 2, 1, 2, 0, 1, 0, 1, 0]

Score Accuracy
0.8

Our Confusion Matrix
[[6, 0, 2], [0, 6, 0], [1, 0, 0]]
Our Confusion Matrix Statistics
[0.8571428571428571, 0.8, 0.8, 0.8000000000000002]
Sklearn Confusion Matrix
[[6 0 0]
 [0 6 1]
 [0 2 0]]
Sklearn Confusion Matrix Statistics
Accuracy: 0.8
Precision: 0.8
Recall: 0.8
F1 Score: 0.8000000000000002


8. Lakukan analisis hasil dari 2 dan 3

# Analisis 2
Dapat dilihat bahwa terdapat perbedaan antara confusion matrix yang kami buat dengan confusion matrix yang dibuat oleh Sklearn. Perbedaan ini terdapat pada bagian accuracy kami yang sebesar 0.8571428571428571, sedangkan Sklearn menghasilkan accuracy sebesar 0.8. Hal ini dikarenakan mungkin terdapat beberapa kesalahan pada kode yang kita buat. Kode yang dibuat menggunakan rumus berikut:
(TP + TN) / (TP + TN + FP + FN) = accuracy
Diketahui bahwa TP = True Positive, FP = False Positive, FN = False Negative, dan TN = True Negative.
Confusion matrix kami ialah [[6, 0, 2], [0, 6, 0], [1, 0, 0]], bila ditranspos dan posisinya diubah, maka akan sama dengan confusion matrix yang dibuat oleh Sklearn.

# Analisis 3
Diketahui bahwa kami mendapatkan akurasi 0.8 atau 80%, sehingga prediksinya sudah sangat baik. Namun, kita tidak mendapatkan akurasi yang baik. Kita dapat melihat bahwa kita menggunakan model yang tidak dapat menghasilkan akurasi yang baik. Perbedaan tsb. dikarenakan dataset yang digunakan berbeda antara data training dan data testing.

# 10 fold cross validation

In [6]:
kf = KFold(n_splits=10, shuffle=True)
for train_index, test_index in kf.split(inputData["data"]):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = inputData["data"][train_index], inputData["data"][test_index]
    y_train, y_test = target[train_index], target[test_index]

    X_train = X_train.tolist()
    X_test = X_test.tolist()
    y_train = y_train.tolist()
    y_test = y_test.tolist()

    backprop.backpropagation(X_train, y_train)
    #print result
    predicted = backprop.predict(X_test)
    target_unencoded = [ y_test[x].index(max(y_test[x])) for x in range(len(y_test))]
    print("Predicted Value")
    print(predicted)
    print("Real Value")
    print(target_unencoded)
    print()

    # print score accuracy
    print("Score Accuracy")
    print(score_accuracy(predicted, target_unencoded))
    print()
    print("Accuracy:", accuracy_score(target_unencoded, predicted))
    # Prevent ill-defined warning in precision and recall
    print("Precision:", precision_score(target_unencoded, predicted, average='micro'))
    print("Recall:", recall_score(target_unencoded, predicted, average='micro'))
    print("F1 Score:", f1_score(target_unencoded, predicted, average='micro'))


TRAIN: [  0   1   2   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19
  20  21  22  23  24  25  26  27  29  30  31  32  34  35  36  37  38  39
  40  41  43  44  45  46  48  49  50  51  52  53  54  55  56  57  58  59
  60  61  62  64  65  66  67  68  69  70  71  73  74  75  76  77  78  79
  80  81  82  83  84  85  86  87  90  91  94  95  96  97  98  99 100 101
 102 103 104 105 106 107 109 110 111 112 113 114 115 116 117 118 119 121
 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
 140 141 143 144 145 146 147 148 149] TEST: [  3   4  28  33  42  47  63  72  88  89  92  93 108 120 142]
Predicted Value
[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]
Real Value
[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2]

Score Accuracy
0.8

Accuracy: 0.8
Precision: 0.8
Recall: 0.8
F1 Score: 0.8000000000000002
TRAIN: [  0   1   2   3   4   5   6   7   8   9  10  11  12  14  15  16  18  19
  20  21  22  23  24  25  26  27  28  29  30  31  32  33  35  36  37  38
  39  41  42 

# New Model and Data

In [7]:
# Define parameters
n_layer = 5
array_neuron_layer = [16,8,4, 3,3]
array_activation = ["linear", "relu", 'linear', "relu", "sigmoid"]
learning_rate = 0.001
error_threshold = 0.01
max_iter = 300
batch_size = 1

# create model and load
newBackprop = Backpropagation(n_layer = n_layer, array_neuron_layer=array_neuron_layer, array_activation=array_activation, learning_rate=learning_rate, error_threshold=error_threshold, max_iter=max_iter, batch_size=batch_size)
newBackprop.loadModels("model.json")



In [8]:
predicted = newBackprop.predict(X_test_backup)
target_unencoded = [ y_test_backup[x].index(max(y_test_backup[x])) for x in range(len(y_test_backup))]
print("Predicted Value")
print(predicted)
print("Real Value")
print(target_unencoded)
print()

# print score accuracy
print("Score Accuracy")
print(score_accuracy(predicted, target_unencoded))

Predicted Value
[1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 2, 0]
Real Value
[1, 0, 1, 0, 1, 1, 0, 2, 1, 2, 0, 1, 0, 1, 0]

Score Accuracy
0.8


## Analysis 2 dan 3