# SIMPLE REGRESSION PROBLEM

### Imports

In [81]:
import numpy as np
from sklearn.linear_model import LinearRegression

import torch
from torch import optim, nn


### Data Generation

In [82]:
np.random.seed(42)
true_w = 2
true_b = 1
N =100
x = np.random.rand(N, 1)
epsilon = 0.1 * np.random.randn(N, 1)
y = true_b + true_w * x + epsilon

# shuffle indicies
idx = np.arange(N)
np.random.shuffle(idx)

# Training and validation idx
train_idx = idx[:int(N*0.80)]
val_idx = idx[int(N*0.80):]

# Split the data
x_train, y_train = x[train_idx], y[train_idx]
x_val, y_val = x[val_idx], y[val_idx]

# print out shape of data
print('Training data: ', x_train.shape)
print('Validation data: ', x_val.shape)

Training data:  (80, 1)
Validation data:  (20, 1)


### Using Numpy

In [83]:
np.random.seed(42)
# initialize the model parameters
b = np.random.rand(1)
w = np.random.rand(1)

print("Initial model parameters b w: ", b, w)

lr = 0.1
epochs = 100
for epoch in range(epochs):
    # compute model's prediction
    y_hat = w * x_train + b

    # Compute the error
    error = (y_hat - y_train)
    #compute the mean square error
    loss = (error ** 2).mean()

    # compute the gradient
    dw = 2* (x_train * error).mean()
    db = 2 * error.mean()

    # update the parameters w and b
    b = b - lr*db
    w = w - lr*dw

print("Final model parameters b w: ", b, w)

Initial model parameters b w:  [0.37454012] [0.95071431]
Final model parameters b w:  [1.08425392] [1.85016647]


### Using Sklearn
Compare with coefficients from scikit learn

In [84]:
lr_model = LinearRegression()
lr_model.fit(x_train,y_train)
print(lr_model.intercept_, lr_model.coef_[0])

[1.02354075] [1.96896447]


### Using Torch

### Define Device
Check whether you have GPU or CPU. GPU is normally used to speed up operations. 

In [85]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cpu


Start to convert the numpy code used earlier into Pytorch. Starting with the training data

In [86]:
x_train_tensor = torch.as_tensor(x_train).float().to(device)
y_train_tensor = torch.as_tensor(y_train).float().to(device)
print(type(x_train), type(x_train_tensor), x_train_tensor.type())

<class 'numpy.ndarray'> <class 'torch.Tensor'> torch.FloatTensor


### Define Device

In [87]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cpu


### Creating Parameters
Tensors used as trainable weights or paramaters require computation of their gradients, to update their values. Therefore `requires_grad` argument is added to tell pytorch to compute the gradient

In [88]:
# Initialize param b and w
torch.manual_seed(42)

b = torch.randn(1,requires_grad=True, dtype=torch.float, device=device)
w = torch.randn(1, requires_grad=True,dtype=torch.float,device=device)

print(b)
print(w)

tensor([0.3367], requires_grad=True)
tensor([0.1288], requires_grad=True)


### Forward Pass
Pytorch's Autograd function will compute the gradient of the w and b paramaters. To avoid gradient accumulation we clear the gradients after every epoch using `grad.zero_` method.  
`nn.MSELoss()` is not loss function itself, so can pass labels and targets, however it returns another function `loss_fn` wherewe can pass labels and targets. 

In [89]:
# number of epochs
epochs = 100

# initialize the SGD Optimizer
optimizer = optim.SGD([b,w],lr = lr)

# Define loss function
loss_fn = nn.MSELoss()

for epoch in range(epochs):
    # model prediction
    y_hat = w * x_train_tensor + b

    # # compute the error between prediction and actual label
    # error = (y_hat - y_train_tensor) 
    # # MSE
    # loss = (error ** 2).mean()
    # compute loss
    loss = loss_fn(y_hat, y_train_tensor)


    # Compute gradients for w and b
    loss.backward()

    # update parameters w and b
    optimizer.step()
    # with torch.no_grad():
    #     b -= lr * b.grad
    #     w -= lr * w.grad
    # # clear the gradients
    optimizer.zero_grad()
    #     b.grad.zero_()
    #     w.grad.zero_()
    

print(b,w)

tensor([1.1552], requires_grad=True) tensor([1.7113], requires_grad=True)


### Building Manual Model creating parameter by parameter
To make `b` and `w` parameters associated with the model,we wrapped them around the `Parameter` class. In so doing, now `b` and `w` are now associated as parameters of the model created using `ManualLinearRegression` class. 

In [90]:
class ManualLinearRegression(nn.Module):
    def __init__(self):
        super().__init__()
        # wrapping w and b with nn.Parameter makes them real
        # parameters of the Model
        self.b = nn.Parameter(torch.randn(1,requires_grad=True,
                                          dtype=torch.float))
        self.w = nn.Parameter(torch.randn(1, requires_grad=True,
                                          dtype=torch.float))
        
    def forward(self,x):
            # compute model predictions
            return self.b + self.w * x
        




#### Using the newly created model for making predictions
The models and the data all have to reside in the same device, therefore after creating the model we send it to the device.  
Since we defined w and b as model parameters using `Parameters` when doing the updates using optimizer we can retrieve them using `model.parameters()`.  
`model.train()` does not actually train the model but sets it to trainable state.


In [91]:
lr = 0.1

# Initialize param w and b
torch.manual_seed(42)
# Create model and send it to device
model = ManualLinearRegression().to(device)

# Define the optimizer to update paramters
optimizer = optim.SGD(model.parameters(),lr=lr)

# define loss function
loss_fn = nn.MSELoss()

# number of epochs
epochs = 100

for epoch in range(epochs):
    model.train()

    # model predictions
    y_hat = model(x_train_tensor)

    # compute the loss
    loss = loss_fn(y_hat, y_train_tensor)

    # compute gradients for w and b
    loss.backward()

    # Update paramters
    optimizer.step()
    # clear gradients
    optimizer.zero_grad()

# inspect models parameters using state_dict
print(model.state_dict())

OrderedDict([('b', tensor([1.1552])), ('w', tensor([1.7113]))])


### Using Pytorch's Linear Model
The regression problem we dealing with consists on one input and one output. Instead of Manually defining parameters `b` and `w`, we can use `linear` model class from pytorch.

In [97]:
class MyLinearRegression(nn.Module):
    def __init__(self):
        super().__init__()

        self.linear = nn.Linear(1,1)

    def forward(self,x):
        return self.linear(x)

In [99]:
lr = 0.1 
torch.manual_seed(42)
loss_fn = nn.MSELoss()
epochs = 100
model = MyLinearRegression().to(device)
optimizer = optim.SGD(model.parameters(), lr=lr)

print("Initial Model Parameters  ")
print(model.state_dict())
for epoch in range(epochs):
    # declare model trainanle
    model.train()
    # model predictions
    y_hat = model(x_train_tensor)
    # compute the loss
    loss = loss_fn(y_hat, y_train_tensor)
    # compute gradients
    loss.backward()
    # update model paramters
    optimizer.step()
    # clear gradients
    optimizer.zero_grad()


print('Final Model Paramters')
print(model.state_dict())

Initial:  
OrderedDict([('linear.weight', tensor([[0.7645]])), ('linear.bias', tensor([0.8300]))])
Finally
OrderedDict([('linear.weight', tensor([[1.7777]])), ('linear.bias', tensor([1.1213]))])
