In [1]:
import torch
import torch.nn as nn
import numpy as np
torch.__version__
torch.manual_seed(0)

<torch._C.Generator at 0x7f1f0968ad68>

## Linear Regression

useful links

https://www.youtube.com/watch?v=zPG4NjIkCjc

https://www.kaggle.com/aakashns/pytorch-basics-linear-regression-from-scratch

Linear regression models a linear relationship between two variables. There is usually an independent value $x$
and a dependent value $y$. Linear regression has an equation with the form $y=ax+b$ and finds the optimal values
$a$ and $b$ that best describe the relationship of the variables. More specifically, this equation describes a straight
line with slope eaual to $a$ and $b$ the intercept (the value of $y$ when $x = 0$).



Let's create and initialize randomly our model's variables  $a$ and $b$.

In [2]:

a = torch.randn(1,requires_grad=True)
b = torch.randn(1,requires_grad=True)
print(a)
print(b)


x = torch.randn(1)
y = a*x+b

y.backward()
print(y,x)

tensor([1.5410], requires_grad=True)
tensor([-0.2934], requires_grad=True)
tensor([-3.6509], grad_fn=<AddBackward0>) tensor([-2.1788])


## Fit simple line

Let's create dummy data and try to fit our linear regression model. We'll initialize randomly our a,b and try to run
some iterations to find the optimal weights that fi oour following line.

$y=2x+0.5$

Now let's create our data and  fit our model.




In [3]:

a = torch.randn((1,1),requires_grad=True)
b = torch.randn(1,requires_grad=True)

def model(x):
    return x @ a.t() + b


# MSE LOSS
Mean Squared Error (MSE) or mean squared deviation (MSD) of an estimator
(of a procedure for estimating an unobserved quantity) measures the average of the squares of the errors.

$MSE(y,\hat {y}) =\sum_{i=1}^{N} (y_{i}-\hat{y}_{i})^{2} $


In other words MSE is the mean ${ \left({\frac {1}{n}}\sum _{i=1}^{n}\right)}$
of the squares of the errors ${ (y_{i}-{\hat {y_{i}}})^{2}}$

In [4]:
def mse(y,y_hat):
     return((y-y_hat)**2).mean()




# Create dataset
Now we'll create our data that decribe the equation $y=2x+0.5$.
We will create only 10 samples but you can do more if you like.


In [5]:
inputs = torch.range(1,10).float().unsqueeze(-1)
print(inputs.shape)
print(inputs)
targets = 2. * inputs + 0.5 * torch.ones(10,1)
print(targets.shape)
print(targets)

torch.Size([10, 1])
tensor([[ 1.],
        [ 2.],
        [ 3.],
        [ 4.],
        [ 5.],
        [ 6.],
        [ 7.],
        [ 8.],
        [ 9.],
        [10.]])
torch.Size([10, 1])
tensor([[ 2.5000],
        [ 4.5000],
        [ 6.5000],
        [ 8.5000],
        [10.5000],
        [12.5000],
        [14.5000],
        [16.5000],
        [18.5000],
        [20.5000]])


  """Entry point for launching an IPython kernel.


Let's predict $\hat{y^{'}}$ with the untrained model and see what the output and loss values.


In [6]:
preds = model(inputs)
print(preds)


# Compute loss
loss = mse(preds, targets)
print(loss)

# Compute gradients
loss.backward()

tensor([[-0.5161],
        [ 0.0523],
        [ 0.6208],
        [ 1.1892],
        [ 1.7576],
        [ 2.3261],
        [ 2.8945],
        [ 3.4629],
        [ 4.0314],
        [ 4.5998]], grad_fn=<AddBackward0>)
tensor(106.3641, grad_fn=<MeanBackward0>)


Now if we do one backpropagation step, the gradients of the two parameters $a$ and $b$ we will be calculated.

In [7]:
# Gradients for weights
print(a)
print(a.grad)



# Gradients for bias
print(b)
print(b.grad)



a.grad.zero_()
b.grad.zero_()
print(a.grad)
print(b.grad)

tensor([[0.5684]], requires_grad=True)
tensor([[-127.6605]])
tensor([-1.0845], requires_grad=True)
tensor([-18.9163])
tensor([[0.]])
tensor([0.])


Let's train the model for 100 iterations


In [8]:
# Train for 100 epochs
lr = 1e-3
for i in range(100):
    preds = model(inputs)
    
    loss = mse(preds, targets)
    
    loss.backward()
    with torch.no_grad():
        a -= a.grad * lr
        b -= b.grad * lr
        a.grad.zero_()
        b.grad.zero_()

print('Optimization Done')
print(f'a = {a} b = {b}')

# Generate predictions
preds = model(inputs)
print(f'Predictions {preds}')


# Compute loss
loss = mse(preds, targets)
print(f'Loss = {loss.item()}')

Optimization Done
a = tensor([[2.1856]], requires_grad=True) b = tensor([-0.7955], requires_grad=True)
Predictions tensor([[ 1.3901],
        [ 3.5757],
        [ 5.7614],
        [ 7.9470],
        [10.1326],
        [12.3182],
        [14.5039],
        [16.6895],
        [18.8751],
        [21.0607]], grad_fn=<AddBackward0>)
Loss = 0.3596566319465637


## Run linear regression with multidimensional data

# Wine Quality dataset

We will try to solve a real problem now instead of using random data. We will use the linear regression model to
predict the wine quality based on different metrichs (pH, acidity, etc.)
You can download the dataset from the following link,
![Dataset link]https://archive.ics.uci.edu/ml/datasets/Wine+Quality,
Let's read our dataset now and explore what type of data it contains.


In [20]:
import csv

def read_wine_data():
    with open('./data/winequality-red.csv') as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=';')
        print(csv_reader)
        line_count = 0
        wine_data = []
        categories = []
        for idx,row in enumerate(csv_reader):
            #print(row)
            if idx ==0 :
                categories = row
            else:
                r = list(map(float, row))
                wine_data.append(r)
        # Convert inputs and targets to tensors
        data_tensor = torch.tensor(wine_data)
    return data_tensor,categories


a = torch.randn(1, 11, requires_grad=True)
b = torch.randn(1, requires_grad=True)

data_tensor,categories = read_wine_data()
inputs = data_tensor[:,:-1]
targets = data_tensor[:,-1].unsqueeze(-1)
print(inputs.shape)
print(targets.shape)



# Define the model
def model(x):
    return x @ a.t() + b

# MSE loss

def mse(y,y_hat):
     return((y-y_hat)**2).mean()

# Generate predictions
preds = model(inputs)
print(preds)

# Compute loss
loss = mse(preds, targets)
print(loss)

# Compute gradients
loss.backward()

# Gradients for weights
print(a)
print(a.grad)

# Gradients for bias
print(b)
print(b.grad)

a.grad.zero_()
b.grad.zero_()
print(a.grad)
print(b.grad)

<_csv.reader object at 0x7f1e72aac4a8>
torch.Size([1599, 11])
torch.Size([1599, 1])
tensor([[22.0140],
        [14.2246],
        [18.7663],
        ...,
        [12.8736],
        [10.5421],
        [16.6574]], grad_fn=<AddBackward0>)
tensor(321.4694, grad_fn=<MeanBackward0>)
tensor([[ 2.7565, -1.5055, -0.6610,  1.3232,  0.0371, -0.2849, -0.1334,  1.8929,
          3.1110, -0.4584, -0.3360]], requires_grad=True)
tensor([[ 281.7687,   16.2224,    9.7309,   82.5946,    2.7896,  371.4078,
         1046.6787,   31.4647,  103.5873,   20.8240,  329.4751]])
tensor([-1.5700], requires_grad=True)
tensor([31.5533])
tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])
tensor([0.])


Let's do  100 optimization iterations

In [26]:
# Train for 100 epochs
lr = 1e-5
for i in range(100):
    preds = model(inputs)
    loss = mse(preds, targets)
    loss.backward()
    with torch.no_grad():
        a -= a.grad * lr
        b -= b.grad * lr
        a.grad.zero_()
        b.grad.zero_()

print(f'Loss {loss.item():.2f}')

Loss 62.95


## Define Linear Regression model using PyTorch built in Functions

Now, we are going to reimplement the same model using PyTorch built-in libraries.
To create a linear model we will use Linear() class from torch.nn package.
To calculate MSE loss we will import nn.MSELoss() and torch.optim.SGD to
create a stochastic gradient descent optimizer to our model.
These are the main steps to do
- Read Data
- Create Dataloader
- Create Model, Optimizer and Loss Functions
- Train the model
- Test the model

In [18]:
inputs = torch.arange(0,10).float().unsqueeze(-1)

targets = 2. * torch.arange(0,10).float().unsqueeze(-1) + 0.5*torch.ones(10,1)

lr_model = nn.Linear(in_features=1,out_features=1)
optimizer = torch.optim.SGD(lr_model.parameters(),lr=0.001)
criterion = nn.MSELoss(size_average=True)

for i in range(100):
    preds = lr_model(inputs)
    loss = criterion(preds,targets)
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
print(f'Loss {loss.item()}')
print(lr_model.weight.data)
print(lr_model.bias.data)

torch.Size([10, 1])
0.057257700711488724
tensor([[1.9241]])
tensor([0.9393])


Let's classify again our wine dataset with the PyTorch buit-in functions now

In [None]:

data_tensor,categories = read_wine_data()
#results = list(map(int, results))
print(data_tensor.shape)
print(categories)
inputs = data_tensor[:,:-1]
targets = data_tensor[:,-1].unsqueeze(-1)
print(targets)
print(inputs.shape)
print(targets.shape)
#results = list(map(int, results))

lr_model = nn.Linear(in_features=11,out_features=1)
optimizer = torch.optim.SGD(lr_model.parameters(),lr=0.0001)
criterion = nn.MSELoss(size_average=True)


Now, we'll create an iterable dataset in order to train our model. We'll use `TensorDataset` and  `DataLoader`
from PyTorch. `TensorDataset` takes inputs and targets tensors as arguments and wraps them together. Then `Dataloader`
combines a dataset and a sampler,  provides an iterable over the given dataset and generates batches.


In [None]:
from torch.utils.data import TensorDataset, DataLoader
# Define dataset
train_ds = TensorDataset(inputs, targets)
# Define data loader
batch_size = 100
train_dl = DataLoader(train_ds, batch_size, shuffle=True)
#next(iter(train_dl))

Now we are ready to train our model.


In [None]:
epochs = 10
for i in range(epochs):
    average_loss = 0.0
    for batch_index, (x,y) in enumerate(train_dl):
        preds = lr_model(x)
        loss = criterion(preds,y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        average_loss +=loss.item()
    print(f'Epoch {i} Average_loss {average_loss/len(train_dl):.2f}')