# 1) MLP (multi layer perceptron)

In [1]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
import numpy as np

In [2]:
iris = datasets.load_iris()
x = iris.data
y = iris.target
x_train , x_test , y_train , y_test = train_test_split(x , y)

In [3]:
clf = MLPClassifier(hidden_layer_sizes = (20,) , max_iter = 2000 )
## We can't use different activation function for different hidden layers in MLPClassifier
## y_train is not one hot encoded but still algorithm will take care of it
clf.fit(x_train , y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(20,), learning_rate='constant',
              learning_rate_init=0.001, max_iter=2000, momentum=0.9,
              n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
              random_state=None, shuffle=True, solver='adam', tol=0.0001,
              validation_fraction=0.1, verbose=False, warm_start=False)

In [4]:
clf.score(x_test , y_test)

1.0

In [5]:
print(len(clf.coefs_))
clf.coefs_
## this not include the biases

2


[array([[ 4.21437183e-01, -2.43333585e-01, -1.65301206e-01,
          1.05311109e-01,  3.91025514e-01, -1.03884267e-01,
         -2.99333080e-02, -8.51611747e-03,  7.20377915e-02,
          6.01646091e-01, -4.83492645e-04,  3.47012095e-01,
         -5.20502043e-02, -6.06912084e-02,  5.55551833e-01,
         -3.81439453e-03,  4.95750337e-01,  2.76454649e-01,
         -2.67923644e-01, -5.91872528e-22],
        [-5.78311661e-01,  2.82053208e-03,  5.20548540e-01,
          7.23018191e-01, -3.99872448e-01, -1.81662417e-02,
         -1.94602241e-02, -4.78672496e-06,  4.93081804e-02,
          6.79055461e-01, -2.50302556e-03, -3.65661326e-01,
          7.00150289e-01, -7.51968638e-01,  6.18523015e-01,
         -6.68393052e-08, -3.69926364e-01, -4.93167715e-01,
          1.20227090e-01, -1.51472542e-18],
        [ 8.30725790e-01,  1.48418748e-01, -2.06968781e-02,
          3.12101290e-01,  4.68794215e-01, -5.59762908e-02,
         -7.40720009e-02,  2.73289273e-04,  3.13379868e-02,
         -8.

In [6]:
clf.coefs_[0].shape , clf.coefs_[1].shape

((4, 20), (20, 3))

In [7]:
## intercepts are here represent as biases
clf.intercepts_[0].shape , clf.intercepts_[1].shape

((20,), (3,))

# 2) Forward Propogation && Backward Propagation

### a) No - Hidden Layer

In [8]:
x = np.array([[0 , 0] , [0 , 1] , [1 , 0] , [1 , 1]])
y = np.array([[0 , 0 , 0 , 1]]).T
x.shape , y.shape

((4, 2), (4, 1))

In [9]:
def sig(z):
    return 1 / (1 + np.exp(-z))

In [10]:
def derivativeSig(z):
    return sig(z) * (1 - sig(z))

In [11]:
# no hidden layer weights 
weights = 2 * np.random.random((2 , 1)) - 1
bias = 2 * np.random.random(1) - 1
lr = 0.1
## (2 , 1) && 1 is to select the shape of random no. which we want
## multiply by 2 and then subtract it from 1 will basically change the range from -1 to 1
weights , bias

(array([[-0.33432302],
        [-0.04496778]]), array([0.97089544]))

In [12]:
for iter in range(3000):
    ### forward propogation without any hidden layer
    output0 = x
    output = sig(np.dot(output0 , weights) + bias)
    # print(output)

    ### Backward propagation without any hidden layer
    first_term = output - y
    input_for_last_layer = np.dot(output0 , weights) + bias
    second_term = derivativeSig(input_for_last_layer)
    first_two = first_term + second_term
    # print(first_term.shape , second_term.shape , first_two.shape)

    ### update weights and bias
    changes = np.array([[0.0] , [0.0]])
    for i in range(2):
        for j in range(4):
            changes[i][0] += first_two[j][0] * output0[j][i]
    weights = weights - lr * changes

    bias_change = 0.0
    for j in range(4):
        bias_change += first_two[j][0] * 1
    bias = bias - lr * bias_change
    
output = sig(np.dot(x , weights) + bias)
weights , bias , output

(array([[6.75176914],
        [6.75176928]]), array([-11.68835591]), array([[8.39088720e-06],
        [7.12788984e-03],
        [7.12788887e-03],
        [8.59987062e-01]]))

In [13]:
## a little optimisation in the above code using vector multiplication
for iter in range(3000):
    ### forward propogation without any hidden layer
    output0 = x
    output = sig(np.dot(output0 , weights) + bias)
    ### Backward propagation without any hidden layer
    first_term = output - y
    input_for_last_layer = np.dot(output0 , weights) + bias
    second_term = derivativeSig(input_for_last_layer)
    first_two = first_term + second_term
    ### update weights and bias
    changes = np.dot(output0.T , first_two)
    weights = weights - lr * changes
    bias_change = np.sum(first_two)
    bias = bias - lr * bias_change
    
output = sig(np.dot(x , weights) + bias)
weights , bias , output

(array([[7.86464194],
        [7.86464195]]), array([-13.51296413]), array([[1.35329867e-06],
        [3.51105255e-03],
        [3.51105253e-03],
        [9.01705488e-01]]))

### b) Hidden Layer is there

In [24]:
x = np.array([[0 , 0] , [0 , 1] , [1 , 0] , [1 , 1]])
y = np.array([[0 , 0 , 0 , 1]]).T
print(x.shape , y.shape)

wh = 2 * np.random.random((2 , 2)) - 1
bh = 2 * np.random.random((1 , 2)) - 1
wo = 2 * np.random.random((2 , 1)) - 1
bo = 2 * np.random.random((1 , 1)) - 1
lr = 0.1

(4, 2) (4, 1)


In [25]:
for iter in range(1000):
    # forward propagation with one hidden layer
    output0 = x
    inputHidden = np.dot(output0 , wh) + bh
    outputHidden = sig(inputHidden)
    # print(np.dot((output0 , wh)).shape)
    # print(temp.shape)
    # print(bh.shape)
    inputForOutputLayer = np.dot(outputHidden , wo) + bo
    output = sig(inputForOutputLayer)
    # output



    # backward propagation with one hidden layer
    first_term_output_layer = output - y
    second_term_output_layer = derivativeSig(inputForOutputLayer)
    first_two_output_layer = first_term_output_layer * second_term_output_layer


    first_term_hidden_layer = np.dot(first_two_output_layer , wo.T)
    second_term_hidden_layer = derivativeSig(inputHidden)
    first_two_hidden_layer = first_term_hidden_layer * second_term_hidden_layer


    changes_output = np.dot(outputHidden.T , first_two_output_layer)
    changes_output_bias = np.sum(first_two_output_layer , keepdims = True , axis = 0)

    changes_hidden = np.dot(output0.T , first_two_hidden_layer)
    changes_hidden_bias = np.sum(first_two_hidden_layer , keepdims = True , axis = 0)



    wo = wo - lr * changes_output
    bo = bo - lr * changes_output_bias

    wh = wh - lr * changes_hidden
    bh = bh - lr * changes_hidden_bias
    
    
output0 = x
inputHidden = np.dot(output0 , wh) + bh
outputHidden = sig(inputHidden)
inputForOutputLayer = np.dot(outputHidden , wo) + bo
output = sig(inputForOutputLayer)
output , wh , bh , wo , bo
## see the below output and figure out the function which it learn ( like and , or , nor).

(array([[0.13401114],
        [0.29650527],
        [0.23948181],
        [0.42544316]]), array([[-1.45755448, -0.10165064],
        [-0.83949521, -1.93504426]]), array([[0.56890557, 0.12851258]]), array([[-1.9337132 ],
        [-1.54489806]]), array([[0.19076282]]))

In [22]:
## some calculation
x = np.array([[0 , 0] , [0 , 1] , [1 , 0] , [1 , 1]])
y = np.array([[1 , 1]])
x , y
print(x , '\n')
print(y , '\n')
print(x + y)

[[0 0]
 [0 1]
 [1 0]
 [1 1]] 

[[1 1]] 

[[1 1]
 [1 2]
 [2 1]
 [2 2]]
