In [2]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

In [3]:
X, y = make_blobs(n_samples=1000, n_features=2, centers=2)

In [8]:
ohe = OneHotEncoder(categories="auto")
y_hot = ohe.fit_transform(y.reshape(-1, 1)).todense()

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y_hot, test_size=0.33, random_state=42)

In [15]:
def generate_model(layers_list):
    model = {}
    model["input_size"] = layers_list[0]
    model["output_size"] = layers_list[-1]
    model["layers"] = []
    
    for i in range(1, len(layers_list)):
        layer = {}
        layer["w"] = np.zeros([layers_list[i-1], layers_list[i]])
        layer["b"] = np.zeros([1, layers_list[i]])
        model["layers"].append(layer)
    
    return model

In [16]:
m1 = generate_model([2, 3, 2])

In [24]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def softmax(z):
    expo = np.exp(z)
    total = np.sum(expo, axis=1, keepdims=True)
    return expo / total

In [22]:
val = sigmoid(np.array([-4, 0, 4]))

In [25]:
val = softmax(np.array([[1, 6, 10], [2, 5, 9]]))

In [39]:
def forword(model, x_data):
    
    layers = model["layers"]
    
    a = x_data
    
    activations = []
    
    for l, layer in enumerate(layers):
        z = np.dot(a, layer["w"]) - layer["b"]
        
        if l == (len(layers)-1):
            a = softmax(z)
        else:
            a = sigmoid(z)
        
        activations.append(a)
    
    return activations
        

In [95]:
def backward(x_data, y_data, model, activations, lr = .01):
    layers = model["layers"]
    
    for l in range(len(layers) - 1, -1, -1):
        if l == len(layers) - 1:
            delta = y_data - activations[l]
        else:
            print(np.dot(delta, layers[l+1]["w"].T).shape)
            delta = np.dot(delta, layers[l+1]["w"].T) * (activations[l] * (1 - activations[l]))
        
        if l != 0:
            deltaw = np.dot(activations[l-1].T, delta)
        else:
            deltaw = np.dot(x_data.T, delta)
            
        deltab = np.sum(delta, axis=0)/delta.shape[0]
        
        layers[l]["w"] -=  deltaw * lr
        layers[l]["b"] -= deltab * lr
        
        

In [96]:
m2 = generate_model([2, 3, 2])

In [102]:
# act = forword(m2, X_train)
# backward(X_train, y_train, m2, act)


In [103]:
def predictions(model, x_data):
    y_ = forword(model, x_data)[-1]
    return y_

def accuracy(model, x_data, y_data):
    pred = predictions(model, x_data)
    
    return (np.argmax(pred, axis=1) == np.argmax(y_data, axis=1)).mean()

In [107]:
print(predictions(m2, X_train))

[[9.99999549e-01 4.51469228e-07]
 [9.99999549e-01 4.51469228e-07]
 [9.99999549e-01 4.51469228e-07]
 ...
 [9.99999549e-01 4.51469228e-07]
 [9.99999549e-01 4.51469228e-07]
 [9.99999549e-01 4.51469228e-07]]


In [108]:
accuracy(m2, X_train, y_train)

0.47313432835820896

In [None]:
for 