## Linear Regression

In this tutorial, we'll try the classic Linear Regression Algorithm

![Linear Regression](img/lr.jpg)

Linear Regression involves creating a best fit linear line such the distance between the y' points on line and real values is minimum (the summation of the distance is known as our loss)


Import Libraries

In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
import matplotlib.pyplot as plt
# from torch.optim import Neumann

In [2]:
import math
import torch
from torch.optim.optimizer import Optimizer
from torch.optim.sgd import SGD

class Neumann(Optimizer):
    """
    Documentation about the algorithm
    """

    def __init__(self, params , lr=1e-3,eps = 1e-8, alpha = 1e-7, beta = 1e-5, gamma = 0.9, momentum = 0.5, sgd_steps = 5, K = 10 ):
        
        if not 0.0 <= lr:
            raise ValueError("Invalid learning rate: {}".format(lr))
        if not 0.0 <= eps:
            raise ValueError("Invalid epsilon value: {}".format(eps))
        if not 0.9 >= momentum:
            raise ValueError("Invalid momentum value: {}".format(eps))
        

        self.iter = 0
        self.sgd = SGD(params, lr=lr, momentum=0.9)

        num_variables = 2#calculate here
        defaults = dict(lr=lr, eps=eps, alpha=alpha,
                    beta=beta*num_variables, gamma=gamma,
                    sgd_steps=sgd_steps, momentum=momentum, K=K
                    )

        super(Neumann, self).__init__(params, defaults)


    def step(self, closure=None):
        """
        Performs a single optimization step.
        
        Arguments:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        """
        import ipdb; ipdb.set_trace()
        self.iter += 1


        loss = None
        if closure is not None: #checkout what's the deal with this. present in multiple pytorch optimizers
            loss = closure()

        for group in self.param_groups:

            sgd_steps = group['sgd_steps']

            if self.iter <= sgd_steps:
                self.sgd.step()
                return

            momentum = group['momentum']
            
            
            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data

                state = self.state[p]

                if len(state) == 0:
                    state['step'] = 0
                    state['m'] = torch.zeros_like(p.data).float()
                    state['d'] = torch.zeros_like(p.data).float()
                    state['moving_avg'] = p.data

                state['step'] += 1

                alpha = group['alpha']
                beta = group['beta']
                gamma = group['gamma']
                K = group['K']
                momentum = group['momentum']
                mu = momentum*(1 - (1/(1+self.iter)))
                eta = group['lr']/self.iter ## update with time

                ## Reset neumann iterate 
                if self.iter%K == 1:
                    state['m'] = grad.mul(-eta)

                ## Compute update d_t
                diff = p.data.sub(state['moving_avg'])
                diff_norm = (p.data.sub(state['moving_avg'])).norm()
                state['d'] = grad.add( (( (diff_norm.pow(2)).mul(alpha) ).sub( (diff_norm.pow(-2)).mul(beta) )).mul( diff.div(diff_norm)) )

                ## Update Neumann iterate
                state['m'] = (state['m'].mul_(mu)).sub_( state['d'].mul(eta))

                ## Update Weights
                p.data.add_((state['m'].mul(mu)).sub( state['d'].mul(eta)))

                ## Update Moving Average
                state['moving_avg'] = p.data.add( (state['moving_avg'].sub(p.data)).mul(gamma) )



        
        return loss

Initializing Seed for consistent results everytime

In [3]:
np.random.seed(42)
pts = 50

Creating a Dataset of 50 points

In [4]:
x_vals = np.random.rand(50)
x_train = np.asarray(x_vals,dtype=np.float32).reshape(-1,1)
m = 1
alpha = np.random.rand(1)
beta = np.random.rand(1)
y_correct = np.asarray([2*i+m for i in x_vals], dtype=np.float32).reshape(-1,1)

### PyTorch Models

1. Create a Class
2. Declare your Forward Pass
3. Tune the HyperParameters

In [5]:
class LinearRegressionModel(nn.Module):

    def __init__(self, input_dim, output_dim):

        super(LinearRegressionModel, self).__init__() 
        # Calling Super Class's constructor
        self.linear = nn.Linear(input_dim, output_dim)
        # nn.linear is defined in nn.Module

    def forward(self, x):
        # Here the forward pass is simply a linear function

        out = self.linear(x)
        return out

input_dim = 1
output_dim = 1

### Steps
1. Create instance of model
2. Select Loss Criterion
3. Choose Hyper Parameters

In [6]:
model = LinearRegressionModel(input_dim,output_dim)# create our model just as we do in Scikit-Learn / C / C++//

criterion = nn.MSELoss()# Mean Squared Loss
l_rate = 0.01
optimiser = Neumann(list(model.parameters()), lr = l_rate) #Stochastic Gradient Descent

epochs = 10

### Train the Model

In [None]:
import ipdb; ipdb.set_trace()
for epoch in range(epochs):

    epoch +=1
    inputs = Variable(torch.from_numpy(x_train))
    labels = Variable(torch.from_numpy(y_correct))

    #clear grads
    optimiser.zero_grad()
    #forward to get predicted values
    outputs = model.forward(inputs)
    loss = criterion(outputs, labels)
    loss.backward()# back props
    optimiser.step()# update the parameters
    print('epoch {}, loss {}'.format(epoch,loss.data[0]))

--Return--
None
> [0;32m<ipython-input-7-5f3e0237ecb0>[0m(1)[0;36m<module>[0;34m()[0m
[0;32m----> 1 [0;31m[0;32mimport[0m [0mipdb[0m[0;34m;[0m [0mipdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m      2 [0;31m[0;32mfor[0m [0mepoch[0m [0;32min[0m [0mrange[0m[0;34m([0m[0mepochs[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0m
[0m[0;32m      3 [0;31m[0;34m[0m[0m
[0m
ipdb> n
> [0;32m/home/faizaan09/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py[0m(2913)[0;36mrun_code[0;34m()[0m
[0;32m   2912 [0;31m                [0;31m# Reset our crash handler in place[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m-> 2913 [0;31m                [0msys[0m[0;34m.[0m[0mexcepthook[0m [0;34m=[0m [0mold_excepthook[0m[0;34m[0m[0m
[0m[0;32m   2914 [0;31m        [0;32mexcept[0m [0mSystemExit[0m [0;32mas[0m [0me[0m[0;34m:[0m[0;34m[0m[0m
[0m
ipdb> 
> [0;32m/home/faizaan09/anaconda3/lib/python3.

### Printing the Predictions

In [None]:
predicted = model.forward(Variable(torch.from_numpy(x_train))).data.numpy()

plt.plot(x_train, y_correct, 'go', label = 'from data', alpha = .5)
plt.plot(x_train, predicted, label = 'prediction', alpha = 0.5)
plt.legend()
plt.show()
print(model.state_dict())

### Example to Use GPU

In [None]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import numpy as np

x_values = [i for i in range(11)]
x_train = np.array(x_values, dtype=np.float32)
x_train = x_train.reshape(-1, 1)

y_values = [2*i + 1 for i in x_values]
y_train = np.array(y_values, dtype=np.float32)
y_train = y_train.reshape(-1, 1)

'''
CREATE MODEL CLASS
'''
class LinearRegressionModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)  
    
    def forward(self, x):
        out = self.linear(x)
        return out

'''
INSTANTIATE MODEL CLASS
'''
input_dim = 1
output_dim = 1

model = LinearRegressionModel(input_dim, output_dim)



model.cuda()

'''
INSTANTIATE LOSS CLASS
'''

criterion = nn.MSELoss()


learning_rate = 0.01

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)


epochs = 100
for epoch in range(epochs):
    epoch += 1

    
    if torch.cuda.is_available():
        inputs = Variable(torch.from_numpy(x_train).cuda())

        
    if torch.cuda.is_available():
        labels = Variable(torch.from_numpy(y_train).cuda())
        

    optimizer.zero_grad() 
    

    outputs = model(inputs)

    loss = criterion(outputs, labels)
    

    loss.backward()
    

    optimizer.step()
    

    print('epoch {}, loss {}'.format(epoch, loss.data[0]))


Sources:
http://github.com/pytorch/examples

http://github.com/ritchieng/the-incredible-pytorch