<a href="https://colab.research.google.com/github/borcsdori/deeplearning/blob/main/dlseminar_homework_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Első Házi feladat: 

Módosítsátok az alábbi kódot úgy, hogy az utolsó rétegben az aktivációs függvény ne relu legyen, hanem **sigmoid**! A beadott kódnak futnia kell. 

Szorgalmi:

Készítsetek egy olyan legalább 3 rétegű kódot, aminél az átlagos accuracy nagyobb, mint az eredeti. 

Beadáshoz ezt a formot töltsétek ki: https://docs.google.com/forms/d/1gx3HwcszRE-BzWAWt07hND-XtS4cG0uOmW8CMg6btaI/viewform?edit_requested=true

Beadási határidő: március 1.

https://towardsdatascience.com/lets-code-a-neural-network-in-plain-numpy-ae7e74410795

Kérdés esetén: melindafkiss@gmail.com-ra írjatok.

In [None]:
import numpy as np

EPS = 1e-8

In [None]:
nn_architecture = [
    {"input_dim": 2, "output_dim": 10, "activation": "relu"},
    {"input_dim": 10, "output_dim": 1, "activation": "relu"},
]

In [None]:
def init_network(nn_architecture):
    #np.random.seed(seed)
    number_of_layers = len(nn_architecture)
    network_params = {}

    for idx, layer in enumerate(nn_architecture):
        layer_idx = idx + 1
        layer_input_size = layer["input_dim"]
        layer_output_size = layer["output_dim"]
        
        network_params['W' + str(layer_idx)] = np.random.rand(
            layer_output_size, layer_input_size) * 2 - 1
        network_params['b' + str(layer_idx)] = np.random.rand(
            layer_output_size, 1) * 2 -1
    
    return network_params

In [None]:
def relu(x):
    return np.maximum(0,x)
    
def relu_backward(dA, x):
    dx = np.array(dA, copy = True)
    dx[x <= 0] = 0;
    return dx;

In [None]:
def forward_propagation(input, network_params, nn_architecture):
    forward = {}
    curr_input = input
    
    for idx, layer in enumerate(nn_architecture):
        layer_idx = idx + 1
        prev_input = curr_input
     
        activ_function_curr = relu
        W_curr = network_params["W" + str(layer_idx)]
        b_curr = network_params["b" + str(layer_idx)]
        lin_output = np.dot(W_curr, prev_input) + b_curr
        curr_input = activ_function_curr(lin_output)

        forward["input" + str(idx)] = prev_input
        forward["lin_output" + str(layer_idx)] = lin_output

    return curr_input, forward

In [None]:
def binary_crossentropy(Y_hat, Y):
    n = Y_hat.shape[1]
    cost = -1 / n * (np.dot(Y, np.log(Y_hat + EPS).T) + np.dot(1 - Y, np.log(1 - Y_hat + EPS).T))
    return np.squeeze(cost)

In [None]:
def single_layer_backward_propagation(d_curr, W_curr, b_curr, lin_output, prev_input, activation):
    #n = prev_input.shape[1]

    backward_activation_func = relu_backward
    d_lin_output = backward_activation_func(d_curr, lin_output)

    dW_curr = np.dot(d_lin_output, prev_input.T) / prev_input.shape[1]
    db_curr = np.sum(d_lin_output, axis=1, keepdims=True) / prev_input.shape[1]
    d_prev = np.dot(W_curr.T, d_lin_output)

    return d_prev, dW_curr, db_curr

In [None]:
def full_backward_propagation(Y_hat, Y, forward, network_params, nn_architecture):
    grads = {}
   
    d_prev = - (np.divide(Y, Y_hat + EPS) - np.divide(1 - Y, 1 - Y_hat + EPS));
    
    for layer_idx_prev, layer in reversed(list(enumerate(nn_architecture))):
        layer_idx_curr = layer_idx_prev + 1
        activ_function_curr = layer["activation"]
        
        d_curr = d_prev
        
        prev_input = forward["input" + str(layer_idx_prev)]
        lin_output = forward["lin_output" + str(layer_idx_curr)]
        W_curr = network_params["W" + str(layer_idx_curr)]
        b_curr = network_params["b" + str(layer_idx_curr)]
        
        d_prev, dW_curr, db_curr = single_layer_backward_propagation(
            d_curr, W_curr, b_curr, lin_output, prev_input, activ_function_curr)
        
        grads["dW" + str(layer_idx_curr)] = dW_curr
        grads["db" + str(layer_idx_curr)] = db_curr
    
    return grads

In [None]:
def update(network_params, grads, nn_architecture, learning_rate):
    for idx, layer in enumerate(nn_architecture):
      layer_idx = idx + 1
      network_params["W" + str(layer_idx)] -= learning_rate * grads["dW" + str(layer_idx)]        
      network_params["b" + str(layer_idx)] -= learning_rate * grads["db" + str(layer_idx)]

    return network_params

In [None]:
def train(X, Y, network_params, learning_rate):
    cost_history = []
    
    Y_hat, forward = forward_propagation(X, network_params, nn_architecture)

    cost = binary_crossentropy(Y_hat, Y)
    cost_history.append(cost)
        
    grads = full_backward_propagation(Y_hat, Y, forward, network_params, nn_architecture)
    network_params = update(network_params, grads, nn_architecture, learning_rate)
            
    return network_params, cost_history

In [None]:
def predict(X, network_params, nn_architecture):
  prediction = 0
  Y_hat, forward = forward_propagation(X, network_params, nn_architecture)
  if Y_hat > 0.5:
    prediction = 1
  return prediction

In [None]:
data_1 = np.random.rand(2, 200) * 3 + 2
labels_1 = np.zeros((1, 200))
data_1 = np.append(data_1, labels_1, axis = 0)
data_2 = np.random.rand(2, 200) * 3
labels_2 = np.ones((1, 200))
data_2 = np.append(data_2, labels_2, axis = 0)

data = np.append(data_1, data_2, axis = 1)
np.random.shuffle(np.transpose(data))

train_size = int(len(np.transpose(data)) * 0.6)
test_size = len(np.transpose(data)) - train_size
train_data = data[:, :train_size]
test_data = data[:, train_size:]

train_points, train_labels = train_data[:2], train_data[2]
test_points, test_labels = test_data[:2], test_data[2]

epochs = 10

accuracies = []
for seed in range(30):
  np.random.seed(seed)
  network_params = init_network(nn_architecture)
  for _ in range(epochs):
    indices = np.random.permutation(train_size)
    for i in indices:
      point = np.expand_dims(train_points[:, i], axis = 1) 
      label = train_labels[i]
      network_params, cost_history = train(point, label, network_params, 0.1)

  correct = 0

  for i in range(test_size):
      point = np.expand_dims(test_points[:, i], axis = 1)
      label = test_labels[i]
      pred = predict(point, network_params, nn_architecture)
      #print("Címke: ", test_labels[i], "Predikció: ", pred)
      if pred == label:
        correct += 1
  accuracies.append(correct / test_size)
  print("seed:{} jól eltalált címkék aránya:{}".format(seed, correct / test_size))
average_acc = np.mean(accuracies)
print("Átlagos pontosság: ", average_acc)