In [175]:
import numpy as np
import torch
import torch.nn as nn
import sys

In [176]:
def wSum(X,W):
    h = torch.from_numpy(X)
    z = torch.matmul(W,h)
    return z

In [177]:
def activate(x):
    return 1/(1+torch.exp(-x))

In [178]:
def forwardStep(X,W_list):
    h = torch.from_numpy(X)
    for W in W_list:
        z = torch.matmul(W,h)
        h = activate(z)
    return h

In [179]:
def updateParams(W_list, dW_list, lr):
    with torch.no_grad():
        for i in range(len(W_list)):
            W_list[i] -= lr*dW_list[i]
    return W_list

In [180]:
def trainNN_sgd(X,y,W_list,loss_fn,lr=0.0001,nepochs=100):
    for epoch in range(nepochs):
        avgLoss = []
        for i in range(len(y)):
            Xin = X[i,:]
            yTrue = y[i]
            y_hat = forwardStep(Xin,W_list)
            loss = loss_fn(y_hat,torch.tensor(yTrue,dtype=torch.double))
            loss.backward()
            avgLoss.append(loss.item())
            sys.stdout.flush()
            dW_list = []
            for j in range(len(W_list)):
                dW_list.append(W_list[j].grad.data)
            W_list = updateParams(W_list,dW_list,lr)
            for j in range(len(W_list)):
                W_list[j].grad.data.zero_()
        
        print("loss after epoch=%d: %f" %(epoch, np.mean(np.array(avgLoss))))
    return W_list           


In [181]:
def trainNN_batch(X,y,W_list,loss_fn,lr=0.0001,nepochs=100):
    n = len(y)
    for epoch in range(nepochs):
        loss = 0
        for i in range(len(y)):
            Xin = X[i,:]
            yTrue = y[i]
            y_hat = forwardStep(Xin,W_list)
            loss += loss_fn(y_hat,torch.tensor(yTrue,dtype=torch.double))
        loss = loss/n
        loss.backward()
        sys.stdout.flush()
        dW_list = []
        for j in range(len(W_list)):
            dW_list.append(W_list[j].grad.data)
        W_list = updateParams(W_list,dW_list,lr)
        for j in range(len(W_list)):
            W_list[j].grad.data.zero_()
        
        print("loss after epoch=%d: %f" %(epoch, loss))
    return W_list

In [182]:
def trainNN_minibatch(X,y,W_list,loss_fn,lr=0.0001,nepochs=100,batchSize=16):
    n = len(y)
    numBatches = n//batchSize
    
    for epoch in range(nepochs):
        for batch in range(numBatches):
            X_batch = X[batch*batchSize:(batch+1)*batchSize,:]
            y_batch = y[batch*batchSize:(batch+1)*batchSize]
            loss = 0
            for i in range(batchSize):
                Xin = X_batch[i,:]
                yTrue = y_batch[i]
                y_hat = forwardStep(Xin,W_list)
                loss += loss_fn(y_hat,torch.tensor(yTrue,dtype=torch.double))
            loss = loss/batchSize
            loss.backward()
            sys.stdout.flush()
            dW_list = []
            for j in range(len(W_list)):
                dW_list.append(W_list[j].grad.data)
            W_list = updateParams(W_list,dW_list,lr)
            for j in range(len(W_list)):
                W_list[j].grad.data.zero_()
            
            print("loss after epoch=%d: %f" %(epoch, loss/numBatches))
    return W_list

In [183]:
inputDim = 10
n = 1000
X = np.random.rand(n,inputDim)
y = np.random.randint(0,2,n)

W1 = torch.tensor(np.random.uniform(0,1,(2,inputDim)),requires_grad=True)
W2 = torch.tensor(np.random.uniform(0,1,(3,2)),requires_grad=True)
W3 = torch.tensor(np.random.uniform(0,1,3),requires_grad=True)

W_list = []
W_list.append(W1)
W_list.append(W2)
W_list.append(W3)

loss_fn = nn.BCELoss()
#W_list = trainNN_sgd(X,y,W_list,loss_fn,lr=0.0001,nepochs=100)
#W_list = trainNN_batch(X,y,W_list,loss_fn,lr=0.0001,nepochs=100)
W_list = trainNN_minibatch(X,y,W_list,loss_fn,lr=0.0001,nepochs=100)


loss after epoch=0: 0.014858
loss after epoch=0: 0.022018
loss after epoch=0: 0.016711
loss after epoch=0: 0.016718
loss after epoch=0: 0.009614
loss after epoch=0: 0.018516
loss after epoch=0: 0.011418
loss after epoch=0: 0.014911
loss after epoch=0: 0.016657
loss after epoch=0: 0.018527
loss after epoch=0: 0.025518
loss after epoch=0: 0.011442
loss after epoch=0: 0.021950
loss after epoch=0: 0.020259
loss after epoch=0: 0.018448
loss after epoch=0: 0.009662
loss after epoch=0: 0.014934
loss after epoch=0: 0.018531
loss after epoch=0: 0.011374
loss after epoch=0: 0.016597
loss after epoch=0: 0.023847
loss after epoch=0: 0.018540
loss after epoch=0: 0.020143
loss after epoch=0: 0.014943
loss after epoch=0: 0.016734
loss after epoch=0: 0.020216
loss after epoch=0: 0.016666
loss after epoch=0: 0.018473
loss after epoch=0: 0.016719
loss after epoch=0: 0.016676
loss after epoch=0: 0.018483
loss after epoch=0: 0.018421
loss after epoch=0: 0.016725
loss after epoch=0: 0.020228
loss after epo

In [184]:
inputDim = 10
n = 1000
X = np.random.rand(n,inputDim)
y = np.random.randint(0,2,n)

In [185]:
X.shape

(1000, 10)

In [186]:
y.shape

(1000,)

In [187]:
np.unique(y)

array([0, 1])

In [188]:
W = torch.tensor(np.random.uniform(0,1,inputDim),requires_grad=True)

In [189]:
z = wSum(X[0,:],W)

In [190]:
print(z)

tensor(2.6729, dtype=torch.float64, grad_fn=<DotBackward0>)


In [191]:
inputDim = 10
n = 1000
X = np.random.rand(n,inputDim)
y = np.random.randint(0,2,n)

W1 = torch.tensor(np.random.uniform(0,1,(2,inputDim)),requires_grad=True)
W2 = torch.tensor(np.random.uniform(0,1,(3,2)),requires_grad=True)
W3 = torch.tensor(np.random.uniform(0,1,3),requires_grad=True)

W_list = []
W_list.append(W1)
W_list.append(W2)
W_list.append(W3)

z = forwardStep(X[0,:],W_list)
print(z)

tensor(0.7439, dtype=torch.float64, grad_fn=<MulBackward0>)


In [192]:
#activation_fun = nn.Sigmoid()
activation_fun = nn.ReLU()
x = 100*torch.randn(1)
y = torch.randint(0,2,(1,),dtype=torch.float)
y_hat = activation_fun(x)
loss_fun = nn.BCELoss()
loss_value = loss_fun(y_hat,y)
print(loss_value.item())
#print(x,activation_fun(x))


100.0


In [193]:
print(y_hat,y)

tensor([0.]) tensor([1.])


In [194]:
m = nn.Sigmoid()
loss_fun = nn.BCELoss()
lr = 0.0001
x = torch.randn(1)
y = torch.randint(0,2,(1,),dtype=torch.float)
w = torch.randn(1,requires_grad=True)

In [195]:
nIter = 100
for i in range(nIter):
    y_hat = m(w*x)
    loss = loss_fun(y_hat,y)
    loss.backward()
    dw = w.grad.data
    with torch.no_grad():
        w -= lr*dw
    w.grad.data.zero_()
    print(loss.item())

1.3076229095458984
1.307604193687439
1.30758535861969
1.3075666427612305
1.307547926902771
1.3075292110443115
1.3075100183486938
1.3074913024902344
1.307472586631775
1.3074536323547363
1.3074349164962769
1.3074162006378174
1.3073972463607788
1.3073785305023193
1.3073595762252808
1.3073408603668213
1.3073221445083618
1.3073031902313232
1.3072844743728638
1.3072655200958252
1.3072468042373657
1.3072280883789062
1.3072091341018677
1.3071904182434082
1.3071715831756592
1.307152509689331
1.3071337938308716
1.307115077972412
1.3070963621139526
1.3070775270462036
1.3070586919784546
1.3070399761199951
1.307021141052246
1.307002067565918
1.3069833517074585
1.306964635848999
1.3069459199905396
1.306926965713501
1.3069082498550415
1.306889533996582
1.3068703413009644
1.3068516254425049
1.3068329095840454
1.306814193725586
1.3067952394485474
1.306776523590088
1.3067578077316284
1.3067388534545898
1.3067201375961304
1.3067011833190918
1.3066824674606323
1.3066636323928833
1.3066449165344238
1.30662

In [213]:
from torch.utils.data import TensorDataset, DataLoader

In [214]:
inputDim = 10
n = 1000
X = np.random.rand(n,inputDim)
y = np.random.randint(0,2,n)

tensor_x = torch.Tensor(X)
tensor_y = torch.Tensor(y)

Xy = TensorDataset(tensor_x,tensor_y)
Xy_loader = DataLoader(Xy,batch_size=16,shuffle=True,drop_last=True)

In [215]:
model = nn.Sequential(
    nn.Linear(inputDim,200),
    nn.ReLU(),
    nn.Linear(200,100),
    nn.Tanh(),
    nn.Linear(100,1),
    nn.Sigmoid()
)

In [216]:
optimizer = torch.optim.Adam(model.parameters(),lr=0.001)

In [217]:
loss_fn = nn.BCELoss()

In [219]:
nepochs = 100
for epoch in range(nepochs):
    for X,y in Xy_loader:
        batch_size = X.shape[0]
        y_hat = model(X.view(batch_size,-1))
        loss = loss_fn(y_hat, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(float(loss))

ValueError: Using a target size (torch.Size([16])) that is different to the input size (torch.Size([16, 1])) is deprecated. Please ensure they have the same size.

In [None]:
with torch.no_grad():
    xt = torch.tensor(np.random.rand(1,inputDim))
    y2 = model(xt.float())
    print(y2.detach().numpy()[0][0])