# Project 2
Martin Esguerra, Leo Bouraux, Franck Dessimoz

In [1]:
import torch, time
import sys
import math
torch.set_grad_enabled( False );


## Objectives
Your framework must provide the necessary tools to:
- build networks combining fully connected layers, Tanh, and ReLU,
- run the forward and backward passes,
- optimize parameters with SGD for MSE.


### Generate data


In [173]:
def generate_disc_set(nb):
    input = torch.Tensor(nb, 2).uniform_(-1, 1)
    target = input.pow(2).sum(1).sub(2 / math.pi).sign().add(1).div(2).float()
    return input, target

train_input, train_target = generate_disc_set(1000)
test_input, test_target = generate_disc_set(1000)

mean, std = train_input.mean(), train_input.std()

train_input.sub_(mean).div_(std)
test_input.sub_(mean).div_(std)

mini_batch_size = 100

### Module Structure
Implement multiple modules, starting with linear

Look pytorch https://pytorch.org/docs/stable/_modules/torch/nn/modules/module.html#Module


#### Input has batch dim in entry

In [50]:
class Module ( object ) :
    def __init__(self):
        self.params = []
    def forward ( self , * input ) :
        raise NotImplementedError
        
    def backward ( self , * gradwrtoutput ) :
        raise NotImplementedError
        
    def param ( self ) :
        return self.params
    
    def __call__(self, input):
        return self.forward(input)
    
    def zero_grad(self):
        return

In [164]:
class Linear ( Module ) :
    def __init__(self, in_features, out_features, epsilon=1e-6, bias=True):
        super(Linear, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        stdv = 1. / math.sqrt(in_features)

        self.weight = torch.empty(in_features, out_features).uniform_(-stdv, stdv)
        self.grad_w = torch.empty(self.weight.size())
        self.params.append((self.weight, self.grad_w))
        if bias:
            self.bias = torch.empty(out_features).uniform_(-stdv, stdv)
            self.grad_b = torch.empty(out_features)
            self.params.append((self.bias, self.grad_b))
        else:
            self.bias = None
        self.curr_x = torch.empty(in_features)
        
    def forward ( self , input ) :
        self.curr_x = input
        output = input.matmul(self.weight)
        if self.bias is not None:
            output += self.bias
        return output
        
    def backward ( self , gradwrtoutput ) :
    # TODO NEEDs to be checked
    # need to put to zero after each iteration of samples
        self.grad_w.add_(self.curr_x.t().matmul(gradwrtoutput))
        if self.bias is not None:
            self.grad_b.add_(gradwrtoutput.sum(dim=0))
        return gradwrtoutput.matmul(self.weight.t())
    
    def zero_grad(self):
        self.grad_w.zero_()
        self.grad_b.zero_()
    
   

In [165]:
class Tanh ( Module ) :
    def __init__(self):
        super(Tanh, self).__init__()
        self.x = None
    def forward ( self , input ) :
        self.x = input.clone()
        return input.tanh()
    
    def backward ( self , gradwrtoutput ) :
        return 4 * (self.x.exp() + self.x.mul(-1).exp()).pow(-2) * gradwrtoutput
    
    def param ( self ) :
        return []

In [166]:
class ReLU ( Module ) :
    def __init__(self):
        super(ReLU, self).__init__()
        self.x = None
    def forward ( self ,  input ) :
        
        if not isinstance(input, torch.Tensor):
            raise Exception("Wrong input type")
        out = input.clone()
        out[out < 0] = 0
        self.x = out.clone()
        return out
    
    def backward ( self , gradwrtoutput ) :
        if len(self.x) == 0:
            return None
        grad_in = gradwrtoutput.clone()
        grad_in[self.x < 0] = 0
        return grad_in
        

In [167]:
class LossMSE ( Module ) :
    def __init__(self):
        super(LossMSE, self).__init__()
        self.diff = None
        
    def forward ( self , input, target ) :
        if target.dim() ==1:
            target = target.view(-1,1)
        self.diff = input - target
        
        return (input - target).pow(2).sum()/ 2
    
    def backward ( self) :
        return self.diff
    
    def __call__(self, inp, t):
        return self.forward(inp, t)
    

In [168]:
class Sequential ( Module ) :
    def __init__(self, modules):
        super(Sequential, self).__init__()
        self.modules = modules
        for mod in modules:
            if mod.param():
                self.params += mod.param()
    def forward ( self , input ) :
        y = input
        for mod in self.modules:
            y = mod.forward(y)
        return y
    
    def backward ( self ,  gradwrtoutput ) :
        grad_in = gradwrtoutput.clone()
        for mod in self.modules[::-1]:
            grad_in = mod.backward(grad_in)
        return grad_in
    
    def zero_grad(self):
        for mod in self.modules:
            mod.zero_grad()

In [169]:
class SGD(object):
    def __init__(self, params, lr=0.001):
        self.params = params
        self.lr=lr
        
    def step(self):
        for p, grad in self.params:
            p -= self.lr * grad

### Train

In [187]:
a,b,c,d = Linear(2,10), ReLU(), Linear(10,1), Tanh()
model = Sequential([a,b,c,d])
#model = Linear(2,1)
train_model_2(model, train_input, train_target, it=25)

0 158.5514349937439
1 115.5336332321167
2 112.75693559646606
3 111.08500576019287
4 109.54303216934204
5 108.00308275222778
6 106.43621969223022
7 104.80847501754761
8 103.15799283981323
9 101.45624923706055
10 99.70861482620239
11 97.95834302902222
12 96.19540643692017
13 94.41789484024048
14 92.64863061904907
15 90.8865475654602
16 89.12988567352295
17 87.38075280189514
18 85.660227060318
19 83.98171401023865
20 82.33645701408386
21 80.72172236442566
22 79.15902304649353
23 77.6590473651886
24 76.22169995307922


In [177]:
def train_model(model, train_input, train_target, eta=1e-1, it=10):
    mse = LossMSE()
    optim = SGD(model.param())
    for e in range(it):
        model.zero_grad()
        sum_loss = 0
        for b in range(train_input.size(0)):            
            output = model(train_input[b].unsqueeze(0))
            loss = mse(output, train_target[b])
            grad_l = mse.backward()
            model.backward(grad_l)
            sum_loss = sum_loss + loss.item()
        
        optim.step()
        print(e, sum_loss)

In [186]:
# Use if model can handle minibatch
def train_model_2(model, train_input, train_target, eta=1e-3, mini_batch_size=50, it=20):
    mse = LossMSE()
    optim = SGD(model.param())
    for e in range(it):
        sum_loss = 0
        for b in range(0, train_input.size(0), mini_batch_size):
            output = model(train_input.narrow(0, b, mini_batch_size))
            loss = mse(output, train_target.narrow(0, b, mini_batch_size))
            
            model.zero_grad()
            grad_l = mse.backward()
            model.backward(grad_l)
            sum_loss = sum_loss + loss.item()
            optim.step()
        print(e, sum_loss)