In [107]:
import pandas as pd
import numpy as np
import torch
import random
import math

## Build the model

In [108]:
class Model(torch.nn.Module):
    def __init__(self):
        super(Model,self).__init__()
        N=1200
        self.dense = torch.nn.Sequential(
            torch.nn.Linear(2, 10),
            torch.nn.Sigmoid(),
            torch.nn.Linear(10, 10),
            torch.nn.Sigmoid(),
            torch.nn.Linear(10,1)
        )

        
    def forward(self, x):
        out = self.dense(x)
        return out

In [109]:
model=Model()
print(model)

Model(
  (dense): Sequential(
    (0): Linear(in_features=2, out_features=10, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=10, out_features=10, bias=True)
    (3): Sigmoid()
    (4): Linear(in_features=10, out_features=1, bias=True)
  )
)


In [110]:
X=np.zeros(2)
Y=np.zeros(1)
X[0]=random.random()
X[1]=random.random()
Y[0]=(math.pow(X[0],2)+math.pow(X[1],2))/2

In [111]:
trainX=torch.tensor(X,dtype=torch.float32)
trainY=torch.tensor(Y,dtype=torch.float32)
loss_func=torch.nn.MSELoss()
LEARNING_RATE=0.01
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
TRAIN_TIMES=5

## Loss by batch size 1

In [141]:
X

array([0.97436534, 0.31171513])

In [112]:

optimizer.zero_grad()  

prediction = model(trainX)
#print(prediction.dtype)
loss = loss_func(prediction, trainY)

# Back propagation for loss
loss.backward()
# 按照最小loss优化参数
#optimizer.step()
    
print(loss.data.numpy())

0.6471283


In [113]:
loss.data.numpy()**2

0.41877502

In [114]:
grads = []
for param in model.parameters():
    grads.append(param.grad.view(-1))

## Get grads

In [115]:
grads

[tensor([-0.0016, -0.0005, -0.0013, -0.0004,  0.0073,  0.0023, -0.0128, -0.0041,
          0.0217,  0.0069, -0.0148, -0.0047,  0.0075,  0.0024, -0.0180, -0.0058,
          0.0033,  0.0011,  0.0012,  0.0004]),
 tensor([-0.0017, -0.0014,  0.0074, -0.0131,  0.0223, -0.0152,  0.0077, -0.0185,
          0.0034,  0.0013]),
 tensor([ 0.0025,  0.0053,  0.0053,  0.0044,  0.0034,  0.0037,  0.0050,  0.0029,
          0.0046,  0.0028,  0.0028,  0.0057,  0.0058,  0.0048,  0.0037,  0.0040,
          0.0054,  0.0031,  0.0050,  0.0031,  0.0065,  0.0135,  0.0136,  0.0113,
          0.0087,  0.0095,  0.0128,  0.0073,  0.0119,  0.0073, -0.0174, -0.0363,
         -0.0365, -0.0303, -0.0234, -0.0254, -0.0342, -0.0197, -0.0318, -0.0195,
          0.0389,  0.0812,  0.0817,  0.0678,  0.0523,  0.0568,  0.0767,  0.0441,
          0.0712,  0.0436, -0.0284, -0.0592, -0.0597, -0.0495, -0.0382, -0.0415,
         -0.0560, -0.0322, -0.0520, -0.0319,  0.0077,  0.0160,  0.0161,  0.0133,
          0.0103,  0.0112,  0.015

In [116]:
parameters=[]
for p in model.parameters():
    parameters.append(p)

In [117]:
W1=parameters[0].data.numpy()
b1=parameters[1].data.numpy()
W2=parameters[2].data.numpy()
b2=parameters[3].data.numpy()
W3=parameters[4].data.numpy()
b3=parameters[5].data.numpy()

In [118]:
np.array(grads[2]).shape

(100,)

## Forward propagation in scratch

In [119]:
def my_sigmoid(X):
    return 1/(1+np.exp(-X))

def my_forward_prop(X,W,b):
    Z=np.dot(W,X)+b
    return Z

In [120]:
W1

array([[-0.09234011, -0.42046437],
       [ 0.39928764, -0.3519029 ],
       [ 0.4203933 ,  0.493105  ],
       [ 0.55517215, -0.48097882],
       [-0.34603843, -0.15631658],
       [-0.37699986,  0.19824064],
       [ 0.07747424,  0.6450015 ],
       [-0.15236527, -0.4518948 ],
       [ 0.4987306 ,  0.6075081 ],
       [ 0.32309574, -0.5875269 ]], dtype=float32)

In [121]:
z1 = np.dot(W1, X) + b1
z1_act = my_sigmoid(z1)
z2 = np.dot(W2, z1_act) + b2
z2_act = my_sigmoid(z2)
z3 = np.dot(W3, z2_act) + b3
z3

array([-0.28116576])

In [122]:
prediction.data.numpy()

array([-0.28116578], dtype=float32)

## Back propagation in scratch

In [123]:
def my_loss(y_true,y_pred):
    return np.power(y_true-y_pred,2)

In [124]:
def loss_gradient(y_true,y_pred):
    return 2*y_pred-2*y_true

In [125]:
def sigmoid_gradient(Z):
    return my_sigmoid(Z) * (1-my_sigmoid(Z))

In [126]:
delta3=loss_gradient(Y,prediction.data.numpy())
b3_gradient=delta3
w3_gradient=np.dot(delta3, z2_act.transpose().reshape((1,10)))
w3_gradient

array([-0.77028648, -0.68547447, -0.72318788, -0.96456754, -0.86306459,
       -0.74180506, -1.00498997, -0.79745549, -0.89748667, -0.95148059])

In [127]:
delta2=np.dot(W3.transpose(), delta3) * sigmoid_gradient(z2)
b2_gradient=delta2
w2_gradient=np.dot(delta2.reshape((10,1)), z1_act.transpose().reshape(1, 10))

In [133]:
delta1=np.dot(W2.transpose(),delta2)* sigmoid_gradient(z1)
b1_gradient=delta1
w1_gradient=np.dot(delta1.reshape((10, 1)), X.transpose().reshape((1, 2)))

In [135]:
torch_autograd=[]
for g in grads:
    torch_autograd.append(np.array(g))
torch_autograd

[array([-0.00164108, -0.00052501, -0.00132329, -0.00042334,  0.00725804,
         0.00232196, -0.01277838, -0.00408801,  0.02172195,  0.0069492 ,
        -0.01480642, -0.00473681,  0.00754285,  0.00241308, -0.01800024,
        -0.00575857,  0.00330978,  0.00105885,  0.00122226,  0.00039102],
       dtype=float32),
 array([-0.00168426, -0.00135811,  0.00744899, -0.01311456,  0.02229343,
        -0.01519596,  0.0077413 , -0.01847381,  0.00339685,  0.00125441],
       dtype=float32),
 array([ 0.00254151,  0.0052963 ,  0.0053351 ,  0.00442638,  0.00341447,
         0.00370777,  0.00500269,  0.00287559,  0.00464409,  0.0028483 ,
         0.00275756,  0.00574654,  0.00578863,  0.00480267,  0.00370474,
         0.00402297,  0.00542797,  0.00312005,  0.00503888,  0.00309043,
         0.00649167,  0.01352815,  0.01362724,  0.01130615,  0.00872146,
         0.00947062,  0.01277818,  0.00734502,  0.01186222,  0.0072753 ,
        -0.01739636, -0.03625269, -0.03651822, -0.03029817, -0.02337174,
   

In [137]:
my_autograd=[w1_gradient,b1_gradient,w2_gradient,b2_gradient,w3_gradient,b3_gradient]
my_autograd

[array([[-0.00164108, -0.00052501],
        [-0.00132329, -0.00042334],
        [ 0.00725804,  0.00232196],
        [-0.01277838, -0.00408801],
        [ 0.02172195,  0.0069492 ],
        [-0.01480642, -0.00473681],
        [ 0.00754285,  0.00241308],
        [-0.01800024, -0.00575857],
        [ 0.00330978,  0.00105885],
        [ 0.00122226,  0.00039102]]),
 array([-0.00168426, -0.00135811,  0.00744899, -0.01311456,  0.02229343,
        -0.01519596,  0.0077413 , -0.01847381,  0.00339685,  0.00125441]),
 array([[ 0.00254151,  0.0052963 ,  0.0053351 ,  0.00442638,  0.00341447,
          0.00370777,  0.00500269,  0.00287559,  0.00464409,  0.0028483 ],
        [ 0.00275756,  0.00574654,  0.00578863,  0.00480267,  0.00370474,
          0.00402297,  0.00542797,  0.00312005,  0.00503888,  0.00309043],
        [ 0.00649167,  0.01352815,  0.01362724,  0.01130615,  0.00872146,
          0.00947062,  0.01277818,  0.00734502,  0.01186222,  0.0072753 ],
        [-0.01739636, -0.03625269, -0.03651

In [140]:
file = open('my_autograd.dat',"w")
file.write(str(my_autograd))

2283