In [1]:
from collections import OrderedDict 
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

%matplotlib inline
plt.style.use("ggplot")

In [2]:
"""
perturbation learning -> inspired by perturbation theory, 
study of small changes in a system which can be result of 
interacting with the system,  
the inspiration in neural network is to add some gaussian noise to the network, so the 
model is more robust during training, it can also helps avoiding overfitting 

gamma : parameter => default = 0.55
eta : learning_rate => default = 0.01
"""

def decay_var(eta : float =0.01, gamma : float =0.55):
    return eta / ( ( 1 + w ) ** gamma )

In [3]:
"""
hebbian learning -> also called local learning, unlike backpropagation, it's weight update is not dependent on
other weights or layer of N + 1, instead, each weights learns independently
it is popular on unsupervised learning to find the patterns of in it's environment (mostly data)
update w => w_new = w_new + ∆w
∆w -> ß * x * y
"""
print(".")

.


In [4]:
def one_hot(y):
    y_ohe = np.zeros((len(y), len(np.unique(y))))
    y_ohe[np.arange(len(y)), y] = 1
    return y_ohe

In [5]:
data = load_breast_cancer()
X, y = data.data, data.target
y_ohe = one_hot(y)

X_train, X_val, y_train, y_val = train_test_split(X, y_ohe, test_size=0.10, random_state=42)

sc = MinMaxScaler()
sc.fit(X_train)

sc_train = sc.transform(X_train)
sc_val = sc.transform(X_val)

In [6]:
def relu(z, derivative=False):
    if derivative:
        z[z<=0] = 0 
        z[z>0] = 1
        return z
    else : 
        return np.maximum(0, z)

def sigmoid(z, derivative=False):
    if derivative :
        return (1 / ( 1 + np.exp(-z)) ) - (1 - (1 / ( 1 + np.exp(-z) ) ) )
    else : 
        return 1 / (1 + np.exp(-z))

In [7]:
"""
1 layer_in, 1 h_layer, 1 out_layer
"""

'\n1 layer_in, 1 h_layer, 1 out_layer\n'

In [20]:
# initialize weights
W1 = np.random.normal(loc=0., scale=0.05, size=(X.shape[1], 32)) * np.sqrt(X.shape[1] / 32)
b1 = np.random.normal(loc=0., scale=0.05, size=(1, 32)) * np.sqrt(X.shape[1] / 32)

W2 = np.random.normal(loc=0., scale=0.05, size=(32, 16)) * np.sqrt(32 / 16)
b2 = np.random.normal(loc=0., scale=0.05, size=(1, 16)) * np.sqrt(32 / 16)

W3 = np.random.normal(loc=0., scale=0.05, size=(16, y_ohe.shape[1])) * np.sqrt(16 / y_ohe.shape[1])
b3 = np.random.normal(loc=0., scale=0.05, size=(1, y_ohe.shape[1])) * np.sqrt(16 / y_ohe.shape[1])

nn = {}
nn['W1'] = W1
nn['b1'] = b1
nn['W2'] = W2
nn['b2'] = b2
nn['W3'] = W1
nn['b3'] = b3

In [22]:
# ffn 
Z1 = np.dot(sc_train, W1) + b1
A1 = relu(Z1)

Z2 = np.dot(A1, W2) + b2
A2 = relu(Z2)

Z3 = np.dot(A2, W3) + b3
y_hat = sigmoid(Z3)

In [24]:
y_hat.shape

(512, 2)

In [None]:
# backprop
dW1 = (1 / len(y_train)) * np.dot(A2.T, (y_hat - y_train))
db1 = (1 / len(y_train)) * np.sum(y_hat - y_train) 
dA1 = sigmoid(dW1, derivative=True)

dW2 = (1 / len(y_train)) * np.dot(A1.T, (y_hat - y_train))
db2 = (1 / len(y_train)) * np.sum(y_hat - y_train)
dA1 = relu(dW2, derivative=True)

In [10]:
class Linear :
    def __init__(self, 
                 inNodes : int, 
                 outNodes : int, 
                 use_bias : bool =True,
                 w_init : str = "uniform"):
        self.inNodes = inNodes
        self.outNodes = outNodes
        self.use_bias = use_bias
        self.w_init = w_init.lower()
        if self.use_bias : 
            if self.w_init == "normal":
                w = np.random.normal(loc=0., scale=0.05, size=(self.inNodes, self.outNodes))
                b = np.random.normal(loc=0., scale=0.05, size=((self.outNodes)))
                self.w, self.b = w, b
            elif w_init == "uniform":
                w = np.random.uniform(low=-0.05, high=0.05, size=(self.inNodes, self.outNodes))
                b = np.random.uniform(low=-0.05, high=0.05, size=((self.outNodes)))
                self.w, self.b = w, b
            elif w_init == "he_normal":
                w = np.random.normal(loc=0., scale=0.05, size=(self.inNodes, self.outNodes)) * np.sqrt(2 / self.inNodes)
                b = np.random.normal(loc=0., scale=0.05, size=(self.inNodes, self.outNodes)) * np.sqrt(2 / self.inNodes)
                self.w, self.b = w, b
            elif w_init == "he_uniform":
                limits = np.sqrt(6 / self.inNodes)
                w = np.random.uniform(low=-limits, high=limits, size=(self.inNodes, self.outNodes))
                b = np.random.uniform(low=-limits, high=limits, size=(self.outNodes))
                self.w, self.b = w, b
            else :
                raise ValueError("Weights initializer is not valid")
        else : 
            if self.w_init == "normal":
                w = np.random.normal(loc=0., scale=0.05, size=(self.inNodes, self.outNodes))
                self.w = w
            elif w_init == "uniform":
                w = np.random.uniform(low=-0.05, high=0.05, size=(self.inNodes, self.outNodes))
                self.w = w
            elif w_init == "he_normal":
                w = np.random.normal(loc=0., scale=0.05, size=(self.inNodes, self.outNodes)) * np.sqrt(2 / self.inNodes)
                self.w = w
            elif w_init == "he_uniform":
                limits = np.sqrt(6 / self.inNodes)
                w = np.random.uniform(low=-limits, high=limits, size=(self.inNodes, self.outNodes))
                self.w = w 
            else :
                raise ValueError("Weights initializer is not valid")

    def forward(self, inputs):
        if self.use_bias:
            self.out = np.matmul(inputs, self.w) + self.b
        else :
            self.out = np.matmul(inputs, self.w)


In [11]:
x1 = Linear(sc_train.shape[1], 16)
x2 = Linear(16, 8)
out = Linear(8, y_ohe.shape[1])

In [19]:
z1 = np.matmul(sc_train, x1.w)
z2 = np.matmul(z1, x2.w)