In [1]:
from model import Model
import numpy as np
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.datasets import load_iris
iris_X, iris_y = load_iris(return_X_y = True)

1. Implementasi pembentukan confusion matrix saat prediksi batch, dan implementasi juga perhitungan akurasi, precision, recall, dan F1.

In [2]:
model = Model("model.txt")
    
mini_batch_X = []
mini_batch_y = []
batches_X = []
batches_y = []

for i in range (len(iris_X)):
    mini_batch_X.append(iris_X[i])
    mini_batch_y.append(iris_y[i])
    if (i % model.batch_size == model.batch_size - 1): # isi mini_batch sebanyak batch_size
        batches_X.append(mini_batch_X)
        batches_y.append(mini_batch_y)
        mini_batch_X = []
        mini_batch_y = []
outputs = model.fit(batches_X, batches_y, 0.1, 1, 20)
pred = np.concatenate(outputs, axis=0) 
pred = np.round(pred)
pred = pred.astype(int)

In [3]:
# fungsi fungsi
def confusion_matrix_scratch(true, pred):

  N = len(set(true))
  indexnum = [i for i in range(N)]
  key =["{}{}".format(x,y) for x in indexnum for y in indexnum]
  temp = dict((el,0) for el in key)  
  for i in range(len(true)):
    key = "{}{}".format(true[i],pred[i])
    if key in temp:
      temp[key] +=1
    else :
      temp[key] = 1
  reslist = list(temp.values())
  return np.array([reslist[x:x+N] for x in range(0, len(reslist), N)])


def accuracy(true,pred):
  count =0
  for i in range (len(true)):
    count += 1 if(true[i]==pred[i]) else 0 
  return count/len(true)

def zero_division(n, d, div):
    if (d == 0):
        return div
    else:
        return n / d

def recall(confMatrix, zero_div):
    rec_arr = []
    for i in range(len(confMatrix)):
        sum = 0
        for j in range(len(confMatrix)):
            sum = sum + confMatrix[i][j]
        rec_arr.append(zero_division(confMatrix[i][i], sum, zero_div))
        
    return rec_arr


def weighted_recall(confMatrix, samples, zero_div):
    rec_arr = recall(confMatrix, zero_div)
    
    w_rec = 0
    sum = 0
    for i in range(len(rec_arr)):
        w_rec = w_rec + rec_arr[i] * samples[i]
        sum = sum + samples[i]
    
    return zero_division(w_rec, sum, zero_div)

def precision(confMatrix, zero_div):
    prec_arr = []
    for i in range(len(confMatrix)):
        sum = 0
        for j in range(len(confMatrix)):
            sum = sum + confMatrix[j][i]
        prec_arr.append(zero_division(confMatrix[i][i], sum, zero_div))
        
    return prec_arr

def weighted_precision(confMatrix, samples, zero_div):
    prec_arr = precision(confMatrix, zero_div)
    w_prec = 0
    sum = 0
    for i in range(len(prec_arr)):
        w_prec = w_prec + prec_arr[i] * samples[i]
        sum = sum + samples[i]
    
    return zero_division(w_prec, sum, zero_div)


def f1_scratch(prec, rec,zero_div):
    f1_arr = []
    for i in range(len(prec)):
        f1 = zero_division(2 * prec[i] * rec[i], (prec[i] + rec[i]),zero_div)
        f1_arr.append(f1)
        
    return f1_arr

def weighted_f1(confMatrix, samples, zero_div):
    prec_arr = precision(confMatrix,zero_div)
    rec_arr = recall(confMatrix,zero_div)
    f1_arr = f1_scratch(prec_arr, rec_arr,zero_div)

    w_f1 = 0
    sum = 0
    for i in range(len(f1_arr)):
        w_f1 = w_f1 + f1_arr[i] * samples[i]
        sum = sum + samples[i]
    
    return zero_division(w_f1, sum, zero_div)

def weight_f1score(arr):
    N = len(set(arr))
    temp = [0]*N
    for i in range(len(arr)):
        temp[arr[i]] +=1
    return temp


2. Lakukan pengujian dengan membandingkan confusion matrix dan perhitungan kinerja dari sklearn.


In [4]:
# Implementasi 
print("Pengujian menggunakan fungsi implementasi:")
print("confusion_matrix")
conf = confusion_matrix_scratch(iris_y,pred)
print(conf)
print("accuracy = ",accuracy(iris_y,pred))
weight = weight_f1score(iris_y)
print("precision = ", weighted_precision(conf,weight ,0))
print("recall = ", weighted_recall(conf,weight,0))
print("f1 score = ",weighted_f1(conf,weight,0))


Pengujian menggunakan fungsi implementasi:
confusion_matrix
[[35 10  5]
 [10 40  0]
 [ 0 10 40]]
accuracy =  0.7666666666666667
precision =  0.7777777777777778
recall =  0.7666666666666667
f1 score =  0.7687400318979266


In [5]:
#sklearn
print("Pengujian menggunakan sklearn:")
conf_sklearn = confusion_matrix(iris_y,pred)
print("confusion_matrix")
print(conf_sklearn)
print("accuracy = ",accuracy_score(iris_y,pred))
weight = weight_f1score(iris_y)
print("precision = ", precision_score(iris_y,pred,average="weighted",zero_division=0))
print("recall = ", weighted_recall(conf,weight,0))
print("f1 score = ",weighted_f1(conf,weight,0))



Pengujian menggunakan sklearn:
confusion_matrix
[[35 10  5]
 [10 40  0]
 [ 0 10 40]]
accuracy =  0.7666666666666667
precision =  0.7777777777777778
recall =  0.7666666666666667
f1 score =  0.7687400318979266


3. Lakukan pembelajaran FFNN untuk dataset iris dengan skema split train 90% dan test 10%, dan menampilkan kinerja serta confusion matrixnya.

In [6]:
def kinerja(true,output):
    pred = np.concatenate(output, axis=0) 
    pred = np.round(pred)
    pred = pred.astype(int)
    conf_sklearn = confusion_matrix(true,pred)
    print("confusion_matrix")
    print(conf_sklearn)
    print("accuracy = ",accuracy_score(true,pred))
    weight = weight_f1score(true)
    print("precision = ", precision_score(true,pred,average="weighted",zero_division=0))
    print("recall = ", weighted_recall(conf,weight,0))
    print("f1 score = ",weighted_f1(conf,weight,0))

In [10]:
import numpy as np
from model import Model

# split
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris

iris_feature_names = load_iris().feature_names
iris_target_names = load_iris().target_names
iris_X, iris_y = load_iris(return_X_y = True)

def train_test(model, X_train, y_train, X_test, y_test):

    # Membagi dataset train menjadi batch
    mini_batch_X_train = []
    mini_batch_y_train = []
    batches_X_train = []
    batches_y_train = []

    for i in range (len(X_train)):
        mini_batch_X_train.append(X_train[i])
        mini_batch_y_train.append(y_train[i])
        if (i % model.batch_size == model.batch_size - 1): # isi mini_batch sebanyak batch_size
            batches_X_train.append(mini_batch_X_train)
            batches_y_train.append(mini_batch_y_train)
            mini_batch_X_train = []
            mini_batch_y_train = []
    
    # Melakukan pembelajaran FFNN
    outputs_train = model.fit(batches_X_train, batches_y_train, 0.1, 1, 20)
    # print(model)
    
    # Melakukan FFNN pada test data
    outputs_test = model.doffnn(X_test)
    return outputs_train,outputs_test
    # print(outputs[0])

print("Method Splitting")

iris_X_train_stt, iris_X_test_stt, iris_y_train_stt, iris_y_test_stt = train_test_split(iris_X, iris_y, test_size = 0.1)

# proses
model = Model("model.txt")
pred_train, pred_test = train_test(model, iris_X_train_stt, iris_y_train_stt, iris_X_test_stt, iris_y_test_stt)


Method Splitting


In [11]:
print("kinerja Dataset test")
kinerja(iris_y_test_stt,pred_test)

kinerja Dataset test
confusion_matrix
[[0 4 0]
 [0 5 0]
 [0 6 0]]
accuracy =  0.3333333333333333
precision =  0.1111111111111111
recall =  0.7733333333333334
f1 score =  0.7757575757575758


4. Lakukan pembelajaran FFNN untuk dataset iris (tersedia di Internet) dengan skema 10-fold cross validation, dan menampilkan kinerjanya.

In [13]:
print("Method 10-fold")

# cross val
from sklearn.model_selection import KFold

model = Model("model.txt")

X = np.array(iris_X)
y = np.array(iris_y)
kf = KFold(n_splits=10)
# kf.get_n_splits(X)
for train_index, test_index in kf.split(X):
    # print("TRAIN:", train_index, "TEST:", test_index)
    iris_X_train_10f, iris_X_test_10f = X[train_index], X[test_index]
    iris_y_train_10f, iris_y_test_10f = y[train_index], y[test_index]
    # proses
    train_test(model, iris_X_train_10f, iris_y_train_10f, iris_X_test_10f, iris_y_test_10f)

Method 10-fold


In [14]:
print("kinerja Dataset test")
kinerja(iris_y_test_stt,pred_test)

kinerja Dataset test
confusion_matrix
[[0 4 0]
 [0 5 0]
 [0 6 0]]
accuracy =  0.3333333333333333
precision =  0.1111111111111111
recall =  0.7733333333333334
f1 score =  0.7757575757575758


5. Simpan (save) model/hipotesis hasil pembelajaran skema full training ke file eksternal.

In [15]:
model.saveModel("trained_model.txt")

6. Baca (load) model/hipotesis dari file eksternal

In [16]:
model2 = Model("trained_model.txt")

7. Buat instance baru dengan memberi nilai untuk setiap atribut, lalu prediksi dengan memanfaatkan model/hipotesis dari hasil 6.

In [23]:
new_instance = np.array([[4.3,3.2,2.1,1.4]])

In [27]:
np.round(model.doffnn(new_instance)[0])

array([2.])