In [104]:
import numpy as np
import torch
import pandas as pd
import math

In [83]:
df = pd.read_csv('./data/winequality-red.csv', delimiter=';',skiprows=1, header=None)
df_input  = df.iloc[ :, :11]
df_output = df.iloc[:, -1:]

In [84]:
input_np_array = df_input.to_numpy()
output_np_array = df_output.to_numpy()

In [85]:
input_np_array

array([[ 7.4  ,  0.7  ,  0.   , ...,  3.51 ,  0.56 ,  9.4  ],
       [ 7.8  ,  0.88 ,  0.   , ...,  3.2  ,  0.68 ,  9.8  ],
       [ 7.8  ,  0.76 ,  0.04 , ...,  3.26 ,  0.65 ,  9.8  ],
       ...,
       [ 6.3  ,  0.51 ,  0.13 , ...,  3.42 ,  0.75 , 11.   ],
       [ 5.9  ,  0.645,  0.12 , ...,  3.57 ,  0.71 , 10.2  ],
       [ 6.   ,  0.31 ,  0.47 , ...,  3.39 ,  0.66 , 11.   ]])

In [86]:
output_np_array

array([[5],
       [5],
       [5],
       ...,
       [6],
       [5],
       [6]])

In [87]:
input = torch.from_numpy(input_np_array)
target = torch.from_numpy(output_np_array)

In [88]:
input

tensor([[ 7.4000,  0.7000,  0.0000,  ...,  3.5100,  0.5600,  9.4000],
        [ 7.8000,  0.8800,  0.0000,  ...,  3.2000,  0.6800,  9.8000],
        [ 7.8000,  0.7600,  0.0400,  ...,  3.2600,  0.6500,  9.8000],
        ...,
        [ 6.3000,  0.5100,  0.1300,  ...,  3.4200,  0.7500, 11.0000],
        [ 5.9000,  0.6450,  0.1200,  ...,  3.5700,  0.7100, 10.2000],
        [ 6.0000,  0.3100,  0.4700,  ...,  3.3900,  0.6600, 11.0000]],
       dtype=torch.float64)

In [89]:
target

tensor([[5],
        [5],
        [5],
        ...,
        [6],
        [5],
        [6]])

In [90]:
# weights and biases
# Y = X * W.T + b
w = torch.randn(1, 11, requires_grad=True)
b = torch.randn(1599, 1, requires_grad=True)
print(w)
print(b)

tensor([[-0.9462, -1.0668, -0.2466, -0.3592,  1.4917, -0.7217, -1.8868, -0.1667,
         -0.1316,  1.0876,  0.5102]], requires_grad=True)
tensor([[-0.3371],
        [-0.9961],
        [ 1.4825],
        ...,
        [-0.4272],
        [-0.3894],
        [ 0.1405]], requires_grad=True)


In [91]:
def lin_model(x):
    return x @ w.t() + b

In [92]:
# Generate predictions
preds = lin_model(input.float())
print(preds)
print(preds.shape)

tensor([[ -75.9696],
        [-149.4121],
        [-115.0108],
        ...,
        [ -98.2690],
        [-108.0720],
        [ -93.6967]], grad_fn=<AddBackward0>)
torch.Size([1599, 1])


Predictions are way out of range

In [93]:
diff = target - preds
diff


tensor([[ 80.9696],
        [154.4121],
        [120.0108],
        ...,
        [104.2690],
        [113.0720],
        [ 99.6967]], grad_fn=<SubBackward0>)

In [95]:
diff_sq = diff * diff
diff_sq
torch.sum(diff_sq) / diff.numel()


tensor(16327.6074, grad_fn=<DivBackward0>)

In [96]:
# MSE loss
def mse(t1, t2):
    d = t1 - t2
    return torch.sum(d * d) / t1.numel()

In [98]:
loss = mse(target, preds)
print(loss)

tensor(16327.6074, grad_fn=<DivBackward0>)


In [105]:
print('sq root of loss: ', math.sqrt(loss))

sq root of loss:  127.77952661469286


each prediction is way off by nearly 127



In [106]:
# Compute gradients
loss.backward()

In [107]:
# Gradients for weights
print('w :', w)
print('w.grad :', w.grad)

w : tensor([[-0.9462, -1.0668, -0.2466, -0.3592,  1.4917, -0.7217, -1.8868, -0.1667,
         -0.1316,  1.0876,  0.5102]], requires_grad=True)
w.grad : tensor([[ -1785.9651,   -116.2308,    -60.0336,   -593.2161,    -19.2996,
          -4470.6685, -14503.1465,   -216.6498,   -718.1283,   -144.1647,
          -2236.5405]])


In [108]:
print('b: ', b)
print('b.grad: ', b.grad)

b:  tensor([[-0.3371],
        [-0.9961],
        [ 1.4825],
        ...,
        [-0.4272],
        [-0.3894],
        [ 0.1405]], requires_grad=True)
b.grad:  tensor([[-0.1013],
        [-0.1931],
        [-0.1501],
        ...,
        [-0.1304],
        [-0.1414],
        [-0.1247]])


If a gradient element is -ve:
1. increase element's value => loss decreases
2. decrease element's value => loss increases



In [109]:
# reset w and b gradients back to zero
w.grad.zero_()
b.grad.zero_()
print('w grad : ', w.grad)
print('b grad: ', b.grad)

w grad :  tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])
b grad:  tensor([[0.],
        [0.],
        [0.],
        ...,
        [0.],
        [0.],
        [0.]])


Adjust weights and biases using Gradient Descent optimisation algo
1. Generate predictions
2. Calculate loss
3. Compute gradient w.r.t. the weights and biases
4. Adjust weights by subtracting a small quantity proportional to the gradient
5. Reset the gradients to zero



In [112]:
#1. Generate predictions

preds = lin_model(input.float())
print(preds)
# same as above

tensor([[ -75.9696],
        [-149.4121],
        [-115.0108],
        ...,
        [ -98.2690],
        [-108.0720],
        [ -93.6967]], grad_fn=<AddBackward0>)


In [113]:
#2. Calculate loss
loss = mse(preds, target)
print(loss)


tensor(16327.6074, grad_fn=<DivBackward0>)


In [114]:
# 3. Compute gradient w.r.t. the weights and biases
loss.backward()
print(w.grad)
print(b.grad)

tensor([[ -1785.9651,   -116.2308,    -60.0336,   -593.2161,    -19.2996,
          -4470.6685, -14503.1465,   -216.6498,   -718.1283,   -144.1647,
          -2236.5405]])
tensor([[-0.1013],
        [-0.1931],
        [-0.1501],
        ...,
        [-0.1304],
        [-0.1414],
        [-0.1247]])


In [115]:
# 4. Adjust weights by subtracting a small quantity proportional to the gradient
with torch.no_grad():
    w -= w.grad * 1e-5
    b -= b.grad * 1e-5
    w.grad.zero_()
    b.grad.zero_()

In [116]:
print(w)
print(b)

tensor([[-0.9284, -1.0656, -0.2460, -0.3533,  1.4919, -0.6770, -1.7418, -0.1645,
         -0.1244,  1.0891,  0.5326]], requires_grad=True)
tensor([[-0.3371],
        [-0.9961],
        [ 1.4825],
        ...,
        [-0.4272],
        [-0.3894],
        [ 0.1405]], requires_grad=True)


In [119]:
# Calculate loss
preds = lin_model(input.float())
loss = mse(preds, target)
print(loss)


tensor(14020.7471, grad_fn=<DivBackward0>)


Train for multiple epochs


In [121]:
# Train for 1000 epochs
for i in range(1000):
    preds = lin_model(input.float())
    loss = mse(preds, target)
    loss.backward()
    with torch.no_grad():
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
        w.grad.zero_()
        b.grad.zero_()

In [122]:
preds = lin_model(input.float())
loss = mse(preds, target)
print(loss)
# quite low loss  tensor(3.4323, grad_fn=<DivBackward0>)
# initital tensor(16327.6074, grad_fn=<DivBackward0>)

tensor(3.4323, grad_fn=<DivBackward0>)
