# Backpropagation

In [58]:
import numpy as np
import pandas as pd
import time

Pada post-test kali ini akan membandingkan dua jenis fungsi aktivasi yang biasa digunakan dalam backpropogation

In [59]:
#Fungsi Aktivasi Sigmoid dengan turunannya
def sig(X):
  return [1 / (1 + np.exp(-x)) for x in X]

def sigd(X):
  output = []
  for x in X:
      s = sig([x])[0]
      output.append(s * (1 - s))
  return output

#Fungsi Aktivasi Hyperbolic Tangent dengan turunannya
def tanh(X):
  return [np.tanh(x) for x in X]

def tanhd(X):
  output = []
  for x in X:
      t = tanh([x])[0]
      output.append(1 - t**2)
  return output

In [60]:
def onehot_enc(lbl, min_val=0):
  mi = min(lbl)
  enc = np.full((len(lbl), max(lbl) - mi + 1), min_val, np.int8)

  for i, x in enumerate(lbl):
    enc[i, x - mi] = 1

  return enc

def onehot_dec(enc, mi=0):
  return [np.argmax(e) + mi for e in enc]

### a) Fungsi *Training* Backpropagation

Tulis kode ke dalam *cell* di bawah ini:

In [61]:
def bp_fit_sig(X, target, layer_conf, max_epoch, max_error=.1, learn_rate=.1, print_per_epoch=100):
  start_time = time.time()
  np.random.seed(1)
  # Lengkapi kode Dibawah ini
  # These lines were incorrectly indented
  nin = [np.zeros(layer_conf[i]) for i in range(len(layer_conf))]  # net inputs
  n = [np.zeros(layer_conf[i] + 1) for i in range(len(layer_conf))]  # activations (+1 for bias)
  w = np.array([np.random.rand(layer_conf[i] + 1, layer_conf[i + 1]) for i in range(len(layer_conf) - 1)]) # This line was causing the error

  dw = [np.empty((layer_conf[i] + 1, layer_conf[i + 1])) for i in range(len(layer_conf) - 1)]
  d = [np.empty(s) for s in layer_conf[1:]]
  din = [np.empty(s) for s in layer_conf[1:-1]]
  epoch = 0
  mse = 1
  for i in range(0, len(n)-1):
    n[i][-1] = 1
  while (max_epoch == -1 or epoch < max_epoch) and mse > max_error:
    epoch += 1
    mse = 0
    for r in range(len(X)):
      n[0][:-1] = X[r]
      for L in range(1, len(layer_conf)):
        nin[L] = np.dot(n[L-1], w[L-1])
        n[L][:len(nin[L])] = sig(nin[L])
      e = target[r] - n[-1]
      mse += sum(e ** 2)
      d[-1] = e * sigd(nin[-1])
      dw[-1] = learn_rate * d[-1] * n[-2].reshape((-1, 1))
      for L in range(len(layer_conf) - 1, 1, -1):

        # Lengkapi kode Dibawah ini
        din[L-2] = np.dot(d[L-1], w[L-1][:-1].T)  # Calculate the gradient of the error for the previous layer
        d[L-2] = din[L-2] * sigd(nin[L-2])       # Calculate the delta for the previous layer
        dw[L-2] = learn_rate * np.outer(n[L-2], d[L-2])  # Weight update for the previous layer


      w += dw
    mse /= len(X)
    if print_per_epoch > -1 and epoch % print_per_epoch == 0:
      print(f'Epoch {epoch}, MSE: {mse}')
  execution = time.time() - start_time
  print("Waktu eksekusi: %s detik" % execution)
  return w, epoch, mse

In [64]:
def bp_fit_tanh(X, target, layer_conf, max_epoch, max_error=.1, learn_rate=.1, print_per_epoch=100):
  start_time = time.time()
  np.random.seed(1)
  # Initialize weights, activations, and other variables similar to bp_fit_sig
  nin = [np.zeros(layer_conf[i]) for i in range(len(layer_conf))]
  n = [np.zeros(layer_conf[i] + 1) for i in range(len(layer_conf))]
  w = np.array([np.random.rand(layer_conf[i] + 1, layer_conf[i + 1]) for i in range(len(layer_conf) - 1)])
  dw = [np.empty((layer_conf[i] + 1, layer_conf[i + 1])) for i in range(len(layer_conf) - 1)]
  d = [np.empty(s) for s in layer_conf[1:]]
  din = [np.empty(s) for s in layer_conf[1:-1]]
  epoch = 0
  mse = 1

  # Set bias nodes to 1
  for i in range(0, len(n)-1):
    n[i][-1] = 1

  # Training loop
  while (max_epoch == -1 or epoch < max_epoch) and mse > max_error:
    epoch += 1
    mse = 0
    for r in range(len(X)):
      n[0][:-1] = X[r]

      # Forward propagation
      for L in range(1, len(layer_conf)):
        nin[L] = np.dot(n[L-1], w[L-1])
        n[L][:len(nin[L])] = tanh(nin[L])  # Use tanh activation

      # Calculate error and deltas
      e = target[r] - n[-1]
      mse += sum(e ** 2)
      d[-1] = e * tanhd(nin[-1])  # Use tanhd for derivative
      dw[-1] = learn_rate * d[-1] * n[-2].reshape((-1, 1))

      # Backpropagation
      for L in range(len(layer_conf) - 1, 1, -1):
        din[L-2] = np.dot(d[L-1], w[L-1][:-1].T)
        d[L-2] = din[L-2] * tanhd(nin[L-2])  # Use tanhd for derivative
        dw[L-2] = learn_rate * np.outer(n[L-2], d[L-2])

      # Update weights
      w += dw

    mse /= len(X)
    if print_per_epoch > -1 and epoch % print_per_epoch == 0:
      print(f'Epoch {epoch}, MSE: {mse}')
  execution = time.time() - start_time
  print("Waktu eksekusi: %s detik" % execution)
  return w, epoch, mse

### b) Fungsi *Testing* Backpropagation

Tulis kode ke dalam *cell* di bawah ini:

In [65]:
def bp_predict_sig(X, w):
  n = [np.empty(len(i)) for i in w]
  nin = [np.empty(len(i[0])) for i in w]
  predict = []
  n.append(np.empty(len(w[-1][0])))
  for x in X:
    n[0][:-1] = x
    for L in range(0, len(w)):
      nin[L] = np.dot(n[L], w[L])
      n[L + 1][:len(nin[L])] = sig(nin[L])
    predict.append(n[-1].copy())
  return predict

In [66]:
def bp_predict_tanh(X, w):
  n = [np.empty(len(i)) for i in w]
  nin = [np.empty(len(i[0])) for i in w]
  predict = []
  n.append(np.empty(len(w[-1][0])))
  for x in X:
    n[0][:-1] = x
    for L in range(0, len(w)):
      nin[L] = np.dot(n[L], w[L])
      n[L + 1][:len(nin[L])] = tanh(nin[L])  # Use tanh activation
    predict.append(n[-1].copy())
  return predict

### c) Klasifikasi dataset wine


Lakukan pelatihan pada dataset wine dengan menggunakan 2 fungsi pelatihan yang telah dibuat!

Konfigurasi kedua pelatihan harus sama (epoch, hidden layer, learning rate, dll).
Akurasi yang diharapkan di setiap pelatihan adalah > 0.98

In [68]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import minmax_scale
from sklearn.metrics import accuracy_score

wine = datasets.load_wine()
X = minmax_scale(wine.data)
Y = onehot_enc(wine.target)

X_train, X_test, y_train, y_test = train_test_split(X, Y,
test_size=.3,random_state=1)
#Isi jumlah layer yang digunakan dengan jumlah hidden layer #
w, ep, mse = bp_fit_sig(X_train, y_train, layer_conf=(13, 4, 3),
                        learn_rate=0.1, max_epoch=1000, max_error=0.1, print_per_epoch=25)

print(f'Epochs: {ep}, MSE: {mse}')

predict = bp_predict_sig(X_test, w)
predict = onehot_dec(predict)
y_test = onehot_dec(y_test)
accuracy = accuracy_score(predict, y_test)

print('Output:', predict)
print('True :', y_test)
print('Accuracy:', accuracy)

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2,) + inhomogeneous part.

In [None]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import minmax_scale
from sklearn.metrics import accuracy_score

wine = datasets.load_wine()
X = minmax_scale(wine.data)
Y = onehot_enc(wine.target)

X_train, X_test, y_train, y_test = train_test_split(X, Y,
test_size=.3,random_state=1)
#Isi jumlah layer yang digunakan dengan jumlah hidden layer #
w, ep, mse = bp_fit_tanh(X_train, y_train, layer_conf=(13, 8, 3),
learn_rate=0.1, max_epoch=1000, max_error=0.1, print_per_epoch=25)


print(f'Epochs: {ep}, MSE: {mse}')

predict = bp_predict_tanh(X_test, w)
predict = onehot_dec(predict)
y_test = onehot_dec(y_test)
accuracy = accuracy_score(predict, y_test)

print('Output:', predict)
print('True :', y_test)
print('Accuracy:', accuracy)

TypeError: cannot unpack non-iterable NoneType object

# Pertanyaan

1.  Apa perbedaan dari penggunaan fungsi aktivasi sigmoid dengan fungsi aktivasi hyperbolic tangent?
2. Coba jelaskan alasan dari perbedaan tersebut sebisa kalian

# Jawaban

# **1**.  Oke, berikut ringkasan perbedaan fungsi aktivasi sigmoid dan hyperbolic tangent (tanh):

Perbedaan:

Rentang Output: Sigmoid menghasilkan output antara 0 dan 1, sedangkan tanh menghasilkan output antara -1 dan 1.
Sentrisitas: Tanh bersifat centered di 0, sedangkan sigmoid tidak. Artinya, output tanh berpusat di sekitar 0, sedangkan output sigmoid berpusat di sekitar 0,5.
Bias Shift: Tanh mengurangi bias shift dibandingkan sigmoid karena sentrisitasnya. Ini dapat mempercepat konvergensi selama training.
Gradient Vanishing: Sigmoid lebih rentan terhadap vanishing gradient, di mana gradien menjadi sangat kecil dan menghambat training. Tanh sedikit mengurangi masalah ini karena rentang output yang lebih luas.

2.  Formula tanh menghasilkan output yang berpusat di sekitar 0 dan memiliki rentang output yang lebih luas. Hal ini memberikan keuntungan dalam hal mengurangi bias shift dan vanishing gradient.