In [2]:
# importing required packages
import numpy as np
import torch
import torch.nn as nn
import sys

In [3]:
# function to compute weighted sum
def wSum(X, W):
    h = torch.from_numpy(X)
    z = torch.matmul(W,h)
    return z

In [8]:
# function to compute weighted sum of neurons of a layer
def forwardStep(X, W_list):
    h = torch.from_numpy(X)
    for W in W_list:
        h = torch.matmul(W,h)
    return h

In [4]:
inputDim = 10
n = 1000
X = np.random.rand(n, inputDim)
y = np.random.randint(0, 2, n)

In [5]:
X.shape

(1000, 10)

In [6]:
y.shape

(1000,)

In [7]:
np.unique(y)

array([0, 1])

In [9]:
W = torch.tensor(np.random.uniform(0, 1, inputDim), requires_grad=True)

In [10]:
z = wSum(X[0,:], W)
print(z)

tensor(2.2759, dtype=torch.float64, grad_fn=<DotBackward0>)

#### Deep Neural Network Implementation without Activation Function

**Intialize Weights**

In [11]:
# W1 is all the weigths vector for all the neurons in computation layer 1 
W1 = torch.tensor(np.random.uniform(0, 1, (2, inputDim)), requires_grad=True)

# W2 is all the weigths vector for all the neurons in computation layer 2 
W2 = torch.tensor(np.random.uniform(0, 1, (3, 2)), requires_grad=True)

# W3 is all the weigths vector for all the neurons in computation layer 3 (output layer) 
W3 = torch.tensor(np.random.uniform(0, 1, (1, 3)), requires_grad=True)

In [12]:
# define list of weights
W_list = [W1, W2, W3]
W_list

[tensor([[0.5021, 0.0525, 0.1393, 0.6093, 0.9263, 0.7623, 0.8105, 0.7745, 0.9285,
          0.7656],
         [0.2872, 0.0415, 0.0759, 0.0043, 0.2576, 0.4515, 0.2372, 0.8354, 0.7372,
          0.5003]], dtype=torch.float64, requires_grad=True),
 tensor([[0.7671, 0.0330],
         [0.7451, 0.0602],
         [0.1711, 0.8942]], dtype=torch.float64, requires_grad=True),
 tensor([[0.5432, 0.6121, 0.4518]], dtype=torch.float64, requires_grad=True)]

In [14]:
# compute forward step
forwardStep(X[0,:], W_list)

tensor([3.6741], dtype=torch.float64, grad_fn=<MvBackward0>)

As we have single neuron in output layer. Let's say if we have 5 neuron in output layer then

In [15]:
# W3 is all the weigths vector for all the neurons in computation layer 3 (output layer) 
W3 = torch.tensor(np.random.uniform(0, 1, (5, 3)), requires_grad=True)

# define list of weights
W_list = [W1, W2, W3]

# compute forward step
forwardStep(X[0,:], W_list)

tensor([4.0403, 2.6145, 4.7996, 0.8383, 3.5957], dtype=torch.float64,
       grad_fn=<MvBackward0>)

As we can see 5 output for each neuron

### Activation Functions

In [16]:
activationFunc = nn.Sigmoid()
x = torch.randn(1)
print(x, activationFunc(x))

tensor([-0.2100]) tensor([0.4477])


In [17]:
x = 100 * torch.randn(1)
print(x, activationFunc(x))

tensor([98.7952]) tensor([1.])


In [18]:
activationFunc = nn.ReLU()
x = torch.randn(1)
print(x, activationFunc(x))

tensor([0.5339]) tensor([0.5339])


In [19]:
x = -1*torch.randn(1)
print(x, activationFunc(x))

tensor([-0.0939]) tensor([0.])


### Loss Function

In [22]:
activationFunc = nn.Sigmoid()
x = 100*torch.randn(1)
y = torch.randint(0, 2, (1,), dtype=torch.float) # Binary Output
y_hat = activationFunc(x)
lossFunc = nn.BCELoss() # Binary Cross Entropy Loss
loss_value = lossFunc(y_hat, y)
print(loss_value.item())

23.79510498046875


### Learning Rate

In [26]:
activationFunc = nn.Sigmoid()
lossFunc = nn.BCELoss() # Binary Cross Entropy Loss
learningRate = 0.0001
x = torch.randn(1)
y = torch.randint(0, 2, (1,), dtype=torch.float) # Binary Output
w = torch.randn(1, requires_grad=True)

In [28]:
nIter = 1000
for index in range(nIter):
    y_hat = activationFunc(w*x)
    loss = lossFunc(y_hat, y)
    loss.backward()
    dw = w.grad.data
    with torch.no_grad():
        w -= learningRate*dw
    w.grad.data.zero_()
    print(loss.item())

0.8248857855796814
0.8248825073242188
0.8248792290687561
0.8248760104179382
0.8248727321624756
0.8248694539070129
0.8248661756515503
0.8248629570007324
0.8248596787452698
0.8248564004898071
0.8248531222343445
0.8248498439788818
0.824846625328064
0.8248433470726013
0.8248400688171387
0.824836790561676
0.8248335719108582
0.8248302936553955
0.8248270153999329
0.8248237371444702
0.8248205184936523
0.8248172402381897
0.824813961982727
0.8248106837272644
0.8248074650764465
0.8248041868209839
0.8248009085655212
0.8247976303100586
0.8247944116592407
0.8247911334037781
0.8247878551483154
0.8247845768928528
0.8247813582420349
0.8247780799865723
0.8247748017311096
0.824771523475647
0.8247683048248291
0.8247650265693665
0.8247617483139038
0.8247584700584412
0.8247552514076233
0.8247518539428711
0.8247485756874084
0.8247452974319458
0.8247420191764832
0.8247388005256653
0.8247355222702026
0.82473224401474
0.8247289657592773
0.824725866317749
0.8247225880622864
0.8247193098068237
0.8247160911560059


In [29]:
def activate(x):
    return 1/(1+torch.exp(-x))

In [30]:
def updateParams(W_list, dW_list, lr):
    with torch.no_grad():
        for index in range(len(W_list)):
            W_list[index] -= lr*dW_list[index]
    return W_list

In [32]:
# function to compute weighted sum of neurons of a layer
def forwardStepNN(X, W_list):
    h = torch.from_numpy(X)
    for W in W_list:
        z = torch.matmul(W,h)
        h = activate(z)
    return h

In [44]:
def trainNN_sgd(X, y, W_list, loss_fn, lr=0.0001, epochs=100):
    for epoch in range(epochs):
        avgLoss = []
        for i in range(len(y)):
            X_in = X[i,:]
            yTrue = y[i]
            print(yTrue)
            y_hat = forwardStepNN(X_in, W_list)
            print(y_hat)
            loss = loss_fn(y_hat, torch.tensor(yTrue, dtype=torch.double))
            loss.backword()
            avgLoss.append(loss.item())
            sys.stdout.flush()
            for j in range(len(W_list)):
                dW_list.append(W_list[j].grad.data)
            W_list = updateParams(W_list, dW_list, lr)
            for k in range(len(W_list)):
                W_list[k].grad.data.zero_()
        print('Loss after epoch=%d: %f' %(epoch, np.mean(np.array(avgLoss))))
    return W_list

In [45]:
# Stocastic Gradient Decent
inputDim = 10
n = 1000
X = np.random.rand(n, inputDim)
y = np.random.randint(0, 2, n)

# W1 is all the weigths vector for all the neurons in computation layer 1 
W1 = torch.tensor(np.random.uniform(0, 1, (2, inputDim)), requires_grad=True)

# W2 is all the weigths vector for all the neurons in computation layer 2 
W2 = torch.tensor(np.random.uniform(0, 1, (3, 2)), requires_grad=True)

# W3 is all the weigths vector for all the neurons in computation layer 3 (output layer) 
W3 = torch.tensor(np.random.uniform(0, 1, (1, 3)), requires_grad=True)

# define list of weights
W_list = [W1, W2, W3]

loss_fn = nn.BCELoss() # Binary Cross Entropy Loss
trainNN_sgd(X, y, W_list, loss_fn, lr=0.0001, epochs=100)

0
tensor([0.5584], dtype=torch.float64, grad_fn=<MulBackward0>)


ValueError: Using a target size (torch.Size([])) that is different to the input size (torch.Size([1])) is deprecated. Please ensure they have the same size.