In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim

def criterion(out, label):
    return (label - out)**2

# (data / label)
# note floating point number *.*
data = [(1.0,3.0), (2.0,6.0), (3.0,9.0), (4.0,12.0), (5.0,15.0), (6.0,18.0)]

# requires_grad turned on
W = torch.tensor([1.0], requires_grad=True)


X,label = data[1]

Y = X*W
loss = criterion(Y,label) 
loss.backward()

## loss = (label-X*W)**2
## d(loss)/dw = 2*(label-X*W)*(-X)=2*4*(-2)

W.grad

In [9]:
### Training with manually updating W with "Backward" ###

import torch
#from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim

def criterion(out, label):
    return (label - out)**2


data = [(1.0,3.0), (2.0,6.0), (3.0,9.0), (4.0,12.0), (5.0,15.0), (6.0,18.0)]

W = torch.tensor([1.0], requires_grad=True)

lr = 0.01

'''
temp = torch.tensor([0.0])
for epoch in range(20):
    for i, current_data in enumerate(data):
        W = torch.randn(1) # This line is for clean start
        W = temp
        W.requires_grad = True
        X, Y = current_data
        outputs = X*W
        loss = criterion(outputs, Y)
        loss.backward()
        W = W - lr* W.grad
        temp = W.detach()    # Not to deliver the grad, but just value
        print("Epoch {} - loss: {}".format(epoch, loss))

'''

### Remove the temp variable altogether
for epoch in range(20):
    for i, current_data in enumerate(data):
        X, Y = current_data
        outputs = X*W
        loss = criterion(outputs, Y)
        loss.backward()
        with torch.no_grad():
            W -= lr* W.grad  # gradients must not be recomputed automatically here
            # This fails when we write W = W - lr*W.grad ... WHY?!
        W.grad.zero_() # Zero the gradients before next interation
        print(f'Epoch {epoch} - loss: {loss.item():.4f}')
        #print("Epoch {} - loss: {}".format(epoch, loss))

### Test the trained network ###            
for i, current_data in enumerate(data):
    X, Y = current_data 
    outputs = X*W  
    print("when x = {}, y = {}".format(X, outputs))

Epoch 0 - loss: 4.0000
Epoch 0 - loss: 15.3664
Epoch 0 - loss: 29.2638
Epoch 0 - loss: 34.9813
Epoch 0 - loss: 25.2740
Epoch 0 - loss: 9.0986
Epoch 1 - loss: 0.0198
Epoch 1 - loss: 0.0761
Epoch 1 - loss: 0.1450
Epoch 1 - loss: 0.1733
Epoch 1 - loss: 0.1252
Epoch 1 - loss: 0.0451
Epoch 2 - loss: 0.0001
Epoch 2 - loss: 0.0004
Epoch 2 - loss: 0.0007
Epoch 2 - loss: 0.0009
Epoch 2 - loss: 0.0006
Epoch 2 - loss: 0.0002
Epoch 3 - loss: 0.0000
Epoch 3 - loss: 0.0000
Epoch 3 - loss: 0.0000
Epoch 3 - loss: 0.0000
Epoch 3 - loss: 0.0000
Epoch 3 - loss: 0.0000
Epoch 4 - loss: 0.0000
Epoch 4 - loss: 0.0000
Epoch 4 - loss: 0.0000
Epoch 4 - loss: 0.0000
Epoch 4 - loss: 0.0000
Epoch 4 - loss: 0.0000
Epoch 5 - loss: 0.0000
Epoch 5 - loss: 0.0000
Epoch 5 - loss: 0.0000
Epoch 5 - loss: 0.0000
Epoch 5 - loss: 0.0000
Epoch 5 - loss: 0.0000
Epoch 6 - loss: 0.0000
Epoch 6 - loss: 0.0000
Epoch 6 - loss: 0.0000
Epoch 6 - loss: 0.0000
Epoch 6 - loss: 0.0000
Epoch 6 - loss: 0.0000
Epoch 7 - loss: 0.0000
Epoch 7

In [10]:
### Training with fancier version ###

import torch
import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim

# Use a class to define the net structure (when using a complex net, and not a stock version available from PyTorch)
class Net(nn.Module): ## nn.Module class is used/inherited
    def __init__(self):
        # Call constructor of parent class
        super(Net, self).__init__()
        # Define layers of the network.
        # Using a singular linear layer here, for this simple task
        self.fc1 = nn.Linear(1,1,bias=False)  # in dim, out dim
    def forward(self, x):
        x = self.fc1(x)
        return x

net = Net()

print(net)
print(list(net.parameters())) # parameters are randomized

#input = torch.randn(1)
#out = net(input)

#def criterion(out, label):
#    return (label - out)**2
criterion = nn.MSELoss()

# Choose optimizer
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.5)
#optimizer = optim.Adam(net.parameters(), lr=0.005)

# Dataset for training the model
data = [(1.0,3.0), (2.0,6.0), (3.0,9.0), (4.0,12.0), (5.0,15.0), (6.0,18.0)]

# Train the model
for epoch in range(20): # 0 - 19
    for i, current_data in enumerate(data):
        X, Y = current_data
        print(f'X,Y')
        X, Y = torch.FloatTensor([X]), torch.FloatTensor([Y])
        print(X,Y)
        optimizer.zero_grad()   
        outputs = net(X)
        loss = criterion(outputs, Y)
        loss.backward()
        optimizer.step()    ## This line is equivalent to "W = W - lr* W.grad"
        print("Epoch {} - loss: {}".format(epoch, loss))

### Test the trained network ###            
for i, current_data in enumerate(data):
    X, Y = current_data
    X, Y = torch.FloatTensor([X]), torch.FloatTensor([Y])  
    out = net(torch.FloatTensor(X))  
    print("when x = {}, y = {}".format(X, out))
    

Net(
  (fc1): Linear(in_features=1, out_features=1, bias=False)
)
[Parameter containing:
tensor([[0.1413]], requires_grad=True)]
1.0 3.0
tensor([1.]) tensor([3.])
Epoch 0 - loss: 8.17236328125
2.0 6.0
tensor([2.]) tensor([6.])
Epoch 0 - loss: 31.39495277404785
3.0 9.0
tensor([3.]) tensor([9.])
Epoch 0 - loss: 58.469627380371094
4.0 12.0
tensor([4.]) tensor([12.])
Epoch 0 - loss: 61.69783401489258
5.0 15.0
tensor([5.]) tensor([15.])
Epoch 0 - loss: 27.182729721069336
6.0 18.0
tensor([6.]) tensor([18.])
Epoch 0 - loss: 0.1334797590970993
1.0 3.0
tensor([1.]) tensor([3.])
Epoch 1 - loss: 0.224557563662529
2.0 6.0
tensor([2.]) tensor([6.])
Epoch 1 - loss: 2.142012357711792
3.0 9.0
tensor([3.]) tensor([9.])
Epoch 1 - loss: 5.791599750518799
4.0 12.0
tensor([4.]) tensor([12.])
Epoch 1 - loss: 7.684028148651123
5.0 15.0
tensor([5.]) tensor([15.])
Epoch 1 - loss: 4.3398590087890625
6.0 18.0
tensor([6.]) tensor([18.])
Epoch 1 - loss: 0.17713622748851776
1.0 3.0
tensor([1.]) tensor([3.])
Epoch 2

In [None]:
for epoch in range(20):
    print(epoch)

In [None]:
W = torch.tensor([1.0], requires_grad=True)
W = W*2
label = 1.0
loss = W*5 - label 
loss.backward()
W.grad