<a href="https://colab.research.google.com/github/ankushKun/learning-ML/blob/master/day2/Linear_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Linear Regression

In [1]:
import torch
import numpy as np

### Training data
*to predict crop yeild of apples,oranges(target variables) using avg temp, rainfall, humidity(input variables aka features)*

|Region|Temp(F)|Rainfall(mm)|Humidity(%)|Apples|Oranges|
|:-----|:-----:|:----------:|:---------:|:----:|:-:|
|Kanto |73     |67          |43         |56    |70 |
|Johoto|91     |88          |64         |81    |101|
|Hoenn |87     |134         |58         |119   |133|
|Sinnoh|102    |43          |37         |22    |37 |
|Unova |69     |96          |70         |103   |119|

yeild_apple = w11 * temp + w12 * rainfall + w13 * humidity + b1\
yeild_orange = w21 * temp + w22 * rainfall + w23 * humidity + b2\
w - weight\
b - bias


In [2]:
# inputs (temp,rainfall,humidity)
inputs = np.array([
                   [73.,67,43],
                   [91,88,64],
                   [87,134,58],
                   [102,43,37],
                   [69,96,70]
], dtype='float32')

In [3]:
# targets (apples,oranges)
targets = np.array([
                   [56.,70],
                   [81,101],
                   [119,133],
                   [22,37],
                   [103,119]
], dtype='float32')

convert arrays to tensors

In [4]:
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)
print(inputs)
print(targets)

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


creating wights and biases

In [5]:
w = torch.randn(2,3,requires_grad=True) # 2 rows 3 columns
# w11  w12  w13
# w21  w22  w23
b = torch.randn(2,requires_grad=True)
#b1
#b2
print(w)
print(b)

tensor([[-3.0202, -0.6593, -0.0676],
        [ 0.9038,  1.8182,  0.3440]], requires_grad=True)
tensor([ 0.1019, -0.8091], requires_grad=True)


### X * W^T + b
5x3 matrix * weights matrix(transposed) + bias\
**.t()** transposes a matrix\
**@** is matrix multiplication in pytorch

In [6]:
def model(x):
  return x@w.t() + b

In [7]:
predictions = model(inputs)
print(predictions) # predictions of apples and oranges

tensor([[-267.4519,  201.7789],
        [-337.0806,  263.4536],
        [-354.9227,  341.4121],
        [-338.8072,  182.2870],
        [-276.3174,  260.1804]], grad_fn=<AddBackward0>)


### Loss function
- calc difference between predictions and actual target
- square all elements of the resulting matrix to remove -ve values
- take average of the resulting elements
This is known as MSE (Mean Square Error)


In [8]:
difference = targets - predictions
difference_square = difference * difference
mse = torch.sum(difference_square) / difference_square.numel() # numel - number of elements
mse

tensor(90631.1484, grad_fn=<DivBackward0>)

In [9]:
def mse(t1,t2):
  difference = targets - predictions
  difference_square = difference * difference
  mse = torch.sum(difference_square) / difference_square.numel() # numel - number of elements
  return mse

In [10]:
# lower loss = better model
loss = mse(targets,predictions)
print(loss)
print(torch.sqrt(loss)) # each element varies by this much

tensor(90631.1484, grad_fn=<DivBackward0>)
tensor(301.0501, grad_fn=<SqrtBackward>)


### Compute derivative of loss

In [11]:
loss.backward()

In [12]:
print(w)
print(w.grad) # each element is the derivative of loss w.r.t that element in w
#dloss/dwn1n2

tensor([[-3.0202, -0.6593, -0.0676],
        [ 0.9038,  1.8182,  0.3440]], requires_grad=True)
tensor([[-33172.7695, -34779.4414, -21611.0391],
        [ 13419.1406,  14170.5957,   8681.9336]])


In [13]:
print(b)
print(b.grad)

tensor([ 0.1019, -0.8091], requires_grad=True)
tensor([-391.1160,  157.8224])


#### for -ve gradient element
- decreasing w will increase loss
- increasing w will decrease loss

#### for +ve gradient element
- increasing w will increase loss
- decreasing w will decrease loss

Therefore, if gradient is +ve decrease w, if -ve increase w

In [14]:
# reset grad before continuing coz torch keeps adding then when we do .backward()
w.grad.zero_()
b.grad.zero_()
print(w.grad,b.grad)

tensor([[0., 0., 0.],
        [0., 0., 0.]]) tensor([0., 0.])


1. calc predictions
2. calc loss
3. calc derivatives
4. subtract a small value proportional to the gradient
5. recalculate predictions with updated w and b
6. calc loss (hopefully lesser than before :) )

In [15]:
predictions = model(inputs)
loss = mse(targets,predictions)
loss.backward()
print(w)
print(b)

tensor([[-3.0202, -0.6593, -0.0676],
        [ 0.9038,  1.8182,  0.3440]], requires_grad=True)
tensor([ 0.1019, -0.8091], requires_grad=True)


In [16]:
def subt(w,b):
  with torch.no_grad():# will not track/calc/modify any grad calcs
    w -= w.grad * 1e-5 #smol proportional value
    b -= b.grad * 1e-5
    w.grad.zero_()
    b.grad.zero_()
subt(w,b)
print(w)
print(b)

tensor([[-2.6884, -0.3115,  0.1485],
        [ 0.7696,  1.6765,  0.2572]], requires_grad=True)
tensor([ 0.1058, -0.8107], requires_grad=True)


In [17]:
predictions = model(inputs)
loss = mse(predictions,targets)
print(loss)
print(torch.sqrt(loss)) # decrease in loss :D

tensor(61191.1992, grad_fn=<DivBackward0>)
tensor(247.3685, grad_fn=<SqrtBackward>)


loop and keep reducing loss\
each loop is called an **epoch**

In [20]:
for _ in range(100000):
  predictions = model(inputs)
  loss = mse(predictions,targets)
  loss.backward()
  subt(w,b)
#print(loss)
#print(torch.sqrt(loss))
print(predictions)

tensor([[ 57.1791,  70.1666],
        [ 82.1982, 100.7595],
        [118.6907, 132.9817],
        [ 21.0796,  37.0331],
        [101.9228, 119.1151]], grad_fn=<AddBackward0>)
