In [1]:
import numpy as np

In [2]:
X = np.array([[0, 0, 1, 1],[0, 1, 0, 1]])
Y = np.array([[0,1,1,0]])
# 1 hidden layer with 4 hidden units

In [3]:
def sigmoid(z):
    s = 1/(1 + np.exp(-z))
    return s  

In [4]:
def layers_size(X, Y):
    n_x = X.shape[0]
    n_h = 4
    n_y = Y.shape[0]
    
    return n_x, n_h, n_y

In [5]:
n_x, n_h, n_y = layers_size(X, Y)

In [6]:
def initialize_parameters(n_x, n_h, n_y):
    W1 = np.random.randn(n_h,n_x)
    b1 = np.zeros((n_h,1))
    W2 = np.random.rand(n_y,n_h)
    b2 = np.zeros((n_y,1))
    
    parameters = {"W1":W1,
                  "b1":b1,
                  "W2":W2,
                  "b2":b2}
    return parameters

In [7]:
parameters = initialize_parameters(n_x, n_h, n_y)

In [8]:
print(str(parameters))

{'W1': array([[-1.54670982,  1.87891854],
       [ 1.03532024,  0.98681585],
       [-2.5428616 , -0.7064708 ],
       [ 0.16070176, -1.20250224]]), 'b1': array([[0.],
       [0.],
       [0.],
       [0.]]), 'W2': array([[0.31998153, 0.73010562, 0.55639206, 0.05663578]]), 'b2': array([[0.]])}


In [9]:
def forward_prop(X, Y, parameters):
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    
    Z1 = np.dot(W1,X) + b1
    A1 = np.tanh(Z1) #tanh activation fuction
    Z2 = np.dot(W2,A1) + b2
    A2 = sigmoid(Z2) # sigmoid activation fuction
    
    cashe = {"Z1":Z1,
             "A1":A1,
             "Z2":Z2,
             "A2":A2,
             }
    return A2, cashe

In [10]:
A2, cashe = forward_prop(X, Y, parameters)

In [11]:
print(str(cashe["A2"]))    #ο στόχος είναι να προσεγγίσει το [0, 1, 1, 0]

[[0.5        0.61575062 0.43384944 0.55198626]]


In [12]:
def compute_cost(A2, Y):
    m = X.shape[1]
    
    A2 = cashe["A2"]
    
    cost = (-1/m)*np.sum(np.dot(Y,np.log(A2).T)+np.dot((1-Y),np.log(1-A2).T))
    
    cost = np.squeeze(cost)
    
    return cost
    
    

In [13]:
cost = compute_cost(A2, Y)

In [14]:
print(cost)

0.7040123770069366


In [15]:
def backward_prop(parameters, cashe, X, Y):
    m = X.shape[1]
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    
    Z1 = cashe["Z1"]
    A1 = cashe["A1"]
    Z2 = cashe["Z2"]
    A2 = cashe["A2"]
    
    dZ2 = A2 - Y
    dW2 = (1/m) * np.dot(dZ2,A1.T)
    db2 = (1/m) * np.sum(dZ2, axis=1,keepdims = True)
    dZ1 = np.dot(W2.T,dZ2) * (1 - np.power(A1,2))
    dW1 = (1/m) * np.dot(dZ1, X.T)
    db1 = (1/m) * np.sum(dZ1, axis=1,keepdims = True)
    
    grads = {"dW1": dW1,
             "db1": db1,
             "dW2": dW2,
             "db2": db2}
    
    return grads
    
    
    

In [16]:
grads = backward_prop(parameters, cashe, X, Y)

In [17]:
def update_parameters(parameters, grads, learning_rate):
    
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    
    
    dW1 = grads["dW1"]
    db1 = grads["db1"]
    dW2 = grads["dW2"]
    db2 = grads["db2"]
    
    W1 = W1 - (learning_rate) * dW1
    b1 = b1 - (learning_rate) * db1
    W2 = W2 - (learning_rate) * dW2
    b2 = b2 - (learning_rate) * db2
    
    parameters = {"W1":W1,
                  "b1":b1,
                  "W2":W2,
                  "b2":b2}
    
    return parameters

In [18]:
parameters = update_parameters(parameters, grads, 0.01)

In [19]:
def nn_model(X, Y, n_h, learning_rate, num_iterations):    
    n_x = layers_size(X, Y)[0]
    n_y = layers_size(X, Y)[2]
    
    parameters = initialize_parameters(n_x, n_h, n_y)
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    m = X.shape[1]
    
    
    # loop, gradient descent
    
    for i in range(num_iterations):                       
        A2, cashe = forward_prop(X, Y, parameters)
          
        cost = compute_cost(A2, Y)
          
        grads = backward_prop(parameters, cashe, X, Y)
        
        parameters = update_parameters(parameters, grads, learning_rate)
        
        if i % 1000 == 0:
            print("cost = " + str(cost),"A2 = " + str(A2))
        
            
    return parameters       
        
        
        
        
    

In [20]:
parameters = nn_model(X, Y, 4, 0.01, 10000) # το cost θα έπρεπε να πέφτει. Το Α2(activation του last layer)
# τουλάχιστον προσεγγιζει το output που θελω

cost = 0.7040123770069366 A2 = [[0.5        0.45546396 0.82041858 0.74057981]]
cost = 0.7040123770069366 A2 = [[0.25804307 0.61665484 0.7576959  0.3493509 ]]
cost = 0.7040123770069366 A2 = [[0.12445735 0.76144942 0.83829421 0.2449657 ]]
cost = 0.7040123770069366 A2 = [[0.06305363 0.84651542 0.88689872 0.17421535]]
cost = 0.7040123770069366 A2 = [[0.03742793 0.89248984 0.915094   0.12981571]]
cost = 0.7040123770069366 A2 = [[0.02503889 0.91900929 0.93286493 0.1016062 ]]
cost = 0.7040123770069366 A2 = [[0.01816417 0.93569693 0.94487179 0.08273473]]
cost = 0.7040123770069366 A2 = [[0.0139382  0.94698022 0.95343387 0.06943878]]
cost = 0.7040123770069366 A2 = [[0.01113775 0.95504566 0.95980363 0.05965195]]
cost = 0.7040123770069366 A2 = [[0.00917421 0.96106501 0.96470519 0.05218652]]


In [21]:
A2, cashe = forward_prop(X, Y, parameters)

In [22]:
print(A2)


[[0.00773619 0.96571264 0.96858157 0.04632411]]


In [23]:
print(str(Y))

[[0 1 1 0]]


In [24]:
cost = compute_cost(A2, Y)

In [25]:
print(cost)

0.030502303692236003
