In [1]:
import numpy as np
import torch
import torch.nn as nn
from torchvision import datasets, transforms
from torch.autograd import Function
from torch.nn.modules.module import Module
from torch.nn.functional import fold, unfold
from torchvision.utils import make_grid
import math
import matplotlib.pyplot as plt


transform = transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.5,), (0.5,)),
                              ])

def sigmoid(x):
    return 1/(1 + np.exp(-x))

def softmax(x):  
    base = np.sum(np.exp(x), axis=1)
    base.resize(np.exp(x).shape[0], 1)
    return np.exp(x) / base

def initWeights(image_dim, d1, d2):
    # w1=np.random.uniform(low=-1, high=1, size=(image_dim * image_dim, d1))
    # w2=np.random.uniform(low=-1, high=1, size=(d1, d2))
    w1 = np.zeros((image_dim * image_dim, d1))
    w2 = np.zeros((d1,d2))
    return [w1,w2]

def forward(x, w1, w2):
    x = x.numpy()
    z1 = x @ w1 
    a1 = sigmoid(z1)
    z2 = a1 @ w2
    a2 = softmax(z2)
    return [z1, a1, z2, a2, w1, w2]

def backward(x, y, output, forwardParams):
    batch_size = y.shape[0]
    z1 = forwardParams[0]
    a1 = forwardParams[1]
    z2 = forwardParams[2]
    a2 = forwardParams[3]
    w1 = forwardParams[4]
    w2 = forwardParams[5]
    crossentropy_derivative = output - y

    dw2 = (1./batch_size) * np.matmul(a1.T, crossentropy_derivative)
    da1 = np.matmul(crossentropy_derivative, w2.T)

    dz1 = da1 * (sigmoid(z1) * (1 - sigmoid(z1)))
    dw1 = (1./batch_size) * np.matmul(x.T, dz1)
    return [dw1,dw2]

def update_weights(w1,w2, dw1, dw2, lr):
    w1 -= lr * dw1
    w2 -= lr * dw2
    return [w1,w2]

def compute_loss(y, y_hat):

    batch_size = y.shape[0]
    #y = y.numpy()
    loss = np.sum(np.multiply(y, np.log(y_hat)))
    loss = -(1./batch_size) * loss
    return loss

def predict(y, y_hat):
    predictions = 0
    for y1, y_hat1 in zip(y, y_hat):
        pred = np.argmax(y_hat1)
        if(pred == np.argmax(y1)):
            predictions = predictions + 1
        
    return predictions

def train(input_size, hiddenlayer_size, output_size, epochs, train_loader, test_loader, lr):
    weights = initWeights(input_size,hiddenlayer_size,output_size)
    lossArr = [];
    epochArr = []
    bsize = 0;
    loss = 0
    for epoch in range(epochs):
        for i, (data, target) in enumerate(train_loader):
            x = data
            y = target
            y = torch.nn.functional.one_hot(y,10)
            x = x.view(-1, input_size*input_size)
            forwardParams = forward(x, weights[0], weights[1])
            gradients = backward(x.numpy(), y.numpy(), forwardParams[3], forwardParams)
            weights = update_weights(weights[0], weights[1], gradients[0], gradients[1], lr)
            loss += compute_loss(y.numpy(), forwardParams[3])
            bsize = bsize + 1;
        if(epoch % 5 == 4):
            lossArr.append(loss/bsize)
            epochArr.append(epoch)
            print(lossArr, epochArr)
        
        correct = 0
        total = 0
        
        plt.plot(lossArr,epochArr)
        plt.xlabel('loss')
        plt.ylabel('epoch')
        
    for i, (data,target) in enumerate(data_loader_test):
        data = data.view(-1, input_size*input_size)
        yhat = forward(data, weights[0], weights[1])[3]
        predicted = np.argmax(yhat,axis  = 1)
        correct = correct + np.sum(np.equal(predicted , target.numpy()))
        total += yhat.shape[0];
    print("accuracy",correct/total)
                
    
        
    

   

In [None]:

transform = transforms.Compose([
            transforms.ToTensor(),
            ])
mnist = datasets.MNIST('./data', download = True, transform = transform)
data_loader = torch.utils.data.DataLoader(mnist,
                                          batch_size=32,
                                          shuffle=True,
                                          num_workers=2)
mnistTest = datasets.MNIST('./data', download = True, train=False, transform = transform)
data_loader_test = torch.utils.data.DataLoader(mnistTest,
                                          batch_size=32,
                                          shuffle=True,
                                          num_workers=2)
train(28,300,10,101,data_loader, data_loader_test,0.05)

[2.2851268148671364] [4]
[2.2851268148671364, 2.213472307795993] [4, 9]
