In [21]:
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import  OneHotEncoder
from sklearn.model_selection import train_test_split
np.random.seed(2)

In [22]:
class Module:
    def __init__(self,module_sequence):
        self.module_sequence = module_sequence
        self.retain_forward_graph = []
    
    def clear_grad(self):
        self.retain_forward_graph = []
    
    def forward(self):
        pass

class Linear:
    def __init__(self,input_dim,output_dim):
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.weight = np.random.randn(output_dim,input_dim)
        self.bias = np.random.randn(output_dim)
    
    def forward(self,x):
        weight_output = np.dot(self.weight,x)
        output = weight_output+self.bias
        return output
    
    def backward(self,retain_forward,learning_rate,loss,):
        bias_delta = loss
        weight_delta = np.dot(np.expand_dims(loss, 1),np.expand_dims(retain_forward, 0))
#         print(weight_delta.shape)
        self.weight += learning_rate*weight_delta
        self.bias += learning_rate*bias_delta
        new_loss = np.sum(np.transpose(np.transpose(self.weight)*loss),axis=0)
        return new_loss

In [23]:
class BPNN(Module):
    def __init__(self,module_sequence):
        super().__init__(module_sequence)
        
    def forward(self,x):
        for module in module_sequence:
            self.retain_forward_graph.append(x)
            x = module.forward(x)
        return x
    
    def __call__(self,x):
        return self.forward(x)

In [24]:
class SGD:
    def __init__(self,model,learning_rate,loss_fn):
        self.learning_rate = learning_rate
        self.loss_fn = loss_fn
        self.model = model
        
    def step(self):
        loss = self.loss_fn.gradient
        for idx,module in enumerate(reversed(self.model.module_sequence),start=1):
            retain_output = self.model.retain_forward_graph[len(model.retain_forward_graph)-idx]
            loss = module.backward(retain_output,self.learning_rate,loss) 
    
    def clear_grad(self):
        self.model.clear_grad()

In [25]:
class MSELoss:
    def __init__(self,reduction="mean"):
        self.reduction = reduction
        self.gradient = None
    
    def __call__(self,x,target):
        self.gradient = -x + target
        if self.reduction=="mean":
            return 0.5 * np.mean((target - x) ** 2)
        elif self.reduction=="sum":
            return 0.5 * np.sum((target - x) ** 2)

class BCELoss:
    def __init__(self,reduction=None):
        self.reduction = reduction
        self.gradient = None
    
    def __call__(self,x,target):
        output = self.softmax(x)
        self.gradient = -output + target
        if self.reduction==None:
            return self.cross_entropy(output,target)
    
    def softmax(self,x):
        return np.exp(x) / np.sum(np.exp(x))
    
    def cross_entropy(self,y,target):
        return -np.sum(target * np.log(y))

In [26]:
data = datasets.load_iris()
X = data.data
onehot = OneHotEncoder()
y = data.target
y = np.expand_dims(y,1)
onehot.fit([[0],[1],[2]])
y = onehot.transform(y).toarray()

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=0)

In [27]:
module_sequence = [
    Linear(4,32),
    Linear(32,3),
]
lr = 0.003
epochs = 20

In [28]:
model = BPNN(module_sequence)
loss_fn = BCELoss()
optim = SGD(model,lr,loss_fn)

In [29]:
for epoch in range(epochs):
    acc = 0
    for i in range(len(X_train)):
        data = X_train[i]
        target = y_train[i]
        output = model(data)
        if np.argmax(loss_fn.softmax(output))==np.argmax(target): acc+=1
        loss = loss_fn(output,target)
#         print(loss)
        optim.step()
        optim.clear_grad()
    print(acc/len(X_train))

0.5714285714285714
0.6875
0.7232142857142857
0.8482142857142857
0.7857142857142857
0.7857142857142857
0.8660714285714286
0.7946428571428571
0.8482142857142857
0.8482142857142857
0.8392857142857143
0.8839285714285714
0.875
0.8928571428571429
0.9017857142857143
0.9375
0.9375
0.9285714285714286
0.9196428571428571
0.9196428571428571


In [30]:
acc = 0
for i in range(len(X_test)):
    data = X_test[i]
    target = y_test[i]
    output = model(data)
    if np.argmax(loss_fn.softmax(output))==np.argmax(target): acc+=1
    optim.clear_grad()
print(acc/len(X_test))

0.9736842105263158
