In [23]:
#Mechanics of Learning

In [24]:
import torch

#temperature readings
#t_u is input, t_c is ground truth
t_c = [0.5, 14.0, 15.0, 28.0, 11.0, 8.0, 3.0, -4.0, 6.0, 13.0, 21.0]
t_u = [35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4]
t_c = torch.tensor(t_c)
t_u = torch.tensor(t_u)

In [25]:
#linear model
def model(t_u, w, b):
    return w*t_u + b

#loss square mean
def loss(t_p, t_c):
    squared_diff = (t_p-t_c)**2
    return squared_diff.mean()

In [26]:
# initialize w, b and get the initial loss
w = torch.ones(())
b = torch.zeros(())

t_p = model(t_u, w, b)
t_p

tensor([35.7000, 55.9000, 58.2000, 81.9000, 56.3000, 48.9000, 33.9000, 21.8000,
        48.4000, 60.4000, 68.4000])

In [27]:
loss_in = loss(t_p, t_c)
loss_in

tensor(1763.8848)

In [28]:
#decrease loss by calculating gradient and moving in that direction
# manual gradient calculation, won't scale
delta = 0.1
loss_rate_of_change_w = (loss(model(t_u, w+delta, b),t_c) - loss(model(t_u, w-delta, b), t_c))/ (2.0 * delta)
loss_rate_of_change_w

tensor(4517.2974)

In [29]:
delta = 0.1
loss_rate_of_change_b = (loss(model(t_u, w, b+delta),t_c) - loss(model(t_u, w, b-delta), t_c))/ (2.0 * delta)
loss_rate_of_change_b

tensor(82.5995)

In [30]:
learning_rate = 1e-2
w = w - learning_rate * loss_rate_of_change_w
b = b - learning_rate * loss_rate_of_change_b
w, b

(tensor(-44.1730), tensor(-0.8260))

In [113]:
# compute derivates to updates weights 
# derivate of d_loss/d_w is d_loss/d_tp * d_tp/d_w same would apoly for b
# d_loss_d_tp = 
def d_loss_tp(t_p, t_c):
    return 2*(t_p-t_c) / t_p.shape[0]

def d_tp_d_w(t_u) :
    return t_u

def d_tp_d_b() :
    return torch.tensor(1.0)


# make grads cleaner
def grad_fn(t_u, t_p, t_c):
    #print("Grads invoked")
    d_loss_d_t_p = d_loss_tp(t_p, t_c)
    dloss_dw = d_loss_d_t_p * d_tp_d_w(t_u)
    dloss_db = d_loss_d_t_p * d_tp_d_b()
    #sum is important in the following line as its a single weight being learnt
    # for both w and b, we are doing elementwise multiplication as its different data samples being used to 
    # estimate outputs not different features, hence no matmul is needed here and sum would give the cumulative output.
    return torch.stack([dloss_dw.sum(), dloss_db.sum()])

In [114]:
#now we have defined the gradient computation, we need to estimate parameters
# we can have a training run to repeated apply this

def training_loop(params, learning_rate, t_u, t_c, epochs):
    for i in range(epochs):
        w, b = params
        pred = model(t_u, w, b)
        los= loss(pred, t_c)
        grads = grad_fn(t_u, pred, t_c)
        if(i==0):
            print("grads shape", grads.shape)
        #update the params to reduce loss
        params = params - learning_rate * grads
        print('Epoch %s, loss %f' %(i, float(los)))
    return params


In [115]:
params = torch.tensor([1.0, 0.0])
learning_rate = 1e-2
epochs = 100
params = training_loop(params, learning_rate, t_u, t_c, epochs)
print(params)

grads shape torch.Size([2])
Epoch 0, loss 1763.884766
Epoch 1, loss 5802484.500000
Epoch 2, loss 19408029696.000000
Epoch 3, loss 64915905708032.000000
Epoch 4, loss 217130525461053440.000000
Epoch 5, loss 726257583152928129024.000000
Epoch 6, loss 2429183416467662896627712.000000
Epoch 7, loss 8125122549611731432050262016.000000
Epoch 8, loss 27176882120842590626938030653440.000000
Epoch 9, loss 90901105189019073810297959556841472.000000
Epoch 10, loss inf
Epoch 11, loss inf
Epoch 12, loss inf
Epoch 13, loss inf
Epoch 14, loss inf
Epoch 15, loss inf
Epoch 16, loss inf
Epoch 17, loss inf
Epoch 18, loss inf
Epoch 19, loss inf
Epoch 20, loss inf
Epoch 21, loss inf
Epoch 22, loss nan
Epoch 23, loss nan
Epoch 24, loss nan
Epoch 25, loss nan
Epoch 26, loss nan
Epoch 27, loss nan
Epoch 28, loss nan
Epoch 29, loss nan
Epoch 30, loss nan
Epoch 31, loss nan
Epoch 32, loss nan
Epoch 33, loss nan
Epoch 34, loss nan
Epoch 35, loss nan
Epoch 36, loss nan
Epoch 37, loss nan
Epoch 38, loss nan
Epoch 

In [116]:
# as we can loss has diverged and has expanded a loss, 
# this is typically a result of higher movements across the loss curve, we can make it slower via reduce learning rate

learning_rate = 1e-4
params = training_loop(torch.tensor([1.0, 0.0]), learning_rate, t_u, t_c, epochs)
print(params)

grads shape torch.Size([2])
Epoch 0, loss 1763.884766
Epoch 1, loss 323.090515
Epoch 2, loss 78.929634
Epoch 3, loss 37.552845
Epoch 4, loss 30.540283
Epoch 5, loss 29.351154
Epoch 6, loss 29.148884
Epoch 7, loss 29.113848
Epoch 8, loss 29.107145
Epoch 9, loss 29.105247
Epoch 10, loss 29.104168
Epoch 11, loss 29.103222
Epoch 12, loss 29.102295
Epoch 13, loss 29.101379
Epoch 14, loss 29.100466
Epoch 15, loss 29.099548
Epoch 16, loss 29.098631
Epoch 17, loss 29.097717
Epoch 18, loss 29.096796
Epoch 19, loss 29.095881
Epoch 20, loss 29.094959
Epoch 21, loss 29.094049
Epoch 22, loss 29.093134
Epoch 23, loss 29.092216
Epoch 24, loss 29.091301
Epoch 25, loss 29.090385
Epoch 26, loss 29.089464
Epoch 27, loss 29.088551
Epoch 28, loss 29.087635
Epoch 29, loss 29.086714
Epoch 30, loss 29.085804
Epoch 31, loss 29.084888
Epoch 32, loss 29.083967
Epoch 33, loss 29.083057
Epoch 34, loss 29.082142
Epoch 35, loss 29.081221
Epoch 36, loss 29.080309
Epoch 37, loss 29.079390
Epoch 38, loss 29.078474
Epoc

In [124]:
#normalizing inputs
t_un = 0.1 * t_u

# let it run for a large number of epochs
params = training_loop(torch.tensor([1.0, 0.0]), 1e-2, t_un, t_c, 5000)

grads shape torch.Size([2])
Epoch 0, loss 80.364342
Epoch 1, loss 37.574913
Epoch 2, loss 30.871077
Epoch 3, loss 29.756193
Epoch 4, loss 29.507153
Epoch 5, loss 29.392456
Epoch 6, loss 29.298828
Epoch 7, loss 29.208717
Epoch 8, loss 29.119415
Epoch 9, loss 29.030489
Epoch 10, loss 28.941877
Epoch 11, loss 28.853565
Epoch 12, loss 28.765553
Epoch 13, loss 28.677851
Epoch 14, loss 28.590431
Epoch 15, loss 28.503319
Epoch 16, loss 28.416498
Epoch 17, loss 28.329973
Epoch 18, loss 28.243742
Epoch 19, loss 28.157804
Epoch 20, loss 28.072151
Epoch 21, loss 27.986797
Epoch 22, loss 27.901728
Epoch 23, loss 27.816950
Epoch 24, loss 27.732464
Epoch 25, loss 27.648256
Epoch 26, loss 27.564344
Epoch 27, loss 27.480707
Epoch 28, loss 27.397362
Epoch 29, loss 27.314295
Epoch 30, loss 27.231512
Epoch 31, loss 27.149010
Epoch 32, loss 27.066790
Epoch 33, loss 26.984844
Epoch 34, loss 26.903175
Epoch 35, loss 26.821791
Epoch 36, loss 26.740679
Epoch 37, loss 26.659838
Epoch 38, loss 26.579279
Epoch 3

In [125]:
print(params)
#params pretty close to the fareinheit to celsius conversion.

tensor([  5.3671, -17.3012])
