” using only pytorch’s
tensor operations and the standard math library

Your framework should import only torch.empty



In [3]:
from torch import empty
import math
import torch

torch.set_grad_enabled(False) #turns off autograd


<torch.autograd.grad_mode.set_grad_enabled at 0x7fecca2871d0>

Your framework must provide the necessary tools to:
 build networks combining fully connected layers, Tanh, and ReLU,
 run the forward and backward passes,
 optimize parameters with SGD for MSE

You must implement a test executable named test.py that imports your framework and
 Generates a training and a test set of 1, 000 points sampled uniformly in [0, 1]2
, each with a
label 0 if outside the disk centered at (0.5, 0.5) of radius 1/
√
2π, and 1 inside,
 builds a network with two input units, two output units, three hidden layers of 25 units,
 trains it with MSE, logging the loss,
 computes and prints the final train and the test errors.

In [40]:
#Structure is better if original

class Module ( object ) :

    def forward ( self , * input ) :
        raise NotImplementedError
    def backward ( self , * gradwrtoutput ) :
        raise NotImplementedError
    def param ( self ) :
        return self.params

In [41]:
class Parameter(object):
    def __init__(self, data, grad=None, x=None,update = True ):
        super(Parameter, self).__init__()
        self.update = True
        self.data = data
        self.grad = grad
        self.input = x

In [42]:
class lossMSE(Module):
    def __init__(self):
        super(lossMSE, self).__init__()
        self.name = 'MSE_loss'
    def forward(self, input, target):
        return input.sub(target).pow(2).mean() 
    def backward(self, input, target):
        return input.sub(target.view(-1,input.shape[1])).mul(2)

In [43]:
class ReLU(Module):
    def __init__(self):
        super(ReLU, self).__init__()
        self.name = 'ReLU'
        self.params = Parameter(None,update = False)
    def forward(self, input):
        self.params.input = input
        return input.clamp(min = 0)
    def backward(self, gradwrtoutput):
        self.params.grad =  self.params.input.sign().add(1).div(2) * gradwrtoutput
        return self.params.grad

In [44]:
class Tanh(Module):
    def __init__(self,x = None):
        super(Tanh,self).__init__()
        self.name = 'Tanh'
        self.params = Parameter(None, update = False)
    def forward(self, input):
        self.params.input = input
        return input.tanh()
    def backward(self, gradwrtoutput):
        self.params.grad = (1 - self.param.input.tanh().pow(2) ).mul(gradwrtoutput)
        return self.params.grad

In [68]:
class Linear(Module):
    def __init__(self,in_dim, out_dim,init='zero'):
        self.params = Parameter(torch.zeros((in_dim,out_dim)))
        
    def forward(self, x):
        self.params.input = x
        return  x @self.params.data
    
    def backward(self, gradwrtoutput):
        self.params.grad = gradwrtoutput.mul(self.params.input)
        return self.params.grad
    

In [69]:
class Sequential(Module):
    def __init__(self, modules):
        super(Sequential, self).__init__()
        self.modules = modules
        self.params = []
        for mod in self.modules:
            if mod.param() is not None:
                self.params.append(mod.param())
        
            
    def forward(self, input):
        for mod in self.modules:
            input = mod.forward(input)
        return input
    
    def backward(self, gradwrtoutput):
        for mod in reversed(self.modules):
            gradwrtoutput = mod.backward(gradwrtoutput)
                


In [70]:
class SGD_opti(object):
    def __init__(self, model_parameters, learn_rate = 1e-3):
        self.lr = learn_rate
        self.param_to_update = model_parameters
    
    def step(self):
        for ind_module in self.param_to_update:
            if len(ind_module) > 0:
                for p in ind_module:
                    if p.update:
                        p.data -= self.lr*p.grad
                    
    def zero_grad(self):
        for ind_module in self.param_to_update:
            if len(ind_module) >0 :
                for p in ind_module:
                    p.input = None
                    p.grad = None


In [71]:
def generate_disc_set(nb):
    data = empty(nb,2).uniform_(0,1)
    target = (data-0.5).pow(2).sum(1).sub(2/math.pi).sign().add(1).div(2)
    
    return data, target

In [72]:
def convert_to_one_hot_labels(target):
    hot_labels = empty(target.size(0), 2)
    hot_labels[:,0], hot_labels[:,1] = 1-target, target 
    return hot_labels

In [73]:
model = Sequential([Linear(2,10), ReLU()])

In [74]:
optim = SGD_opti(model.param())

In [75]:
data,target = generate_disc_set(1000)

In [76]:
data.size()

torch.Size([1000, 2])

In [78]:
output = model.forward(data)

torch.Size([1000])

 Generates a training and a test set of 1, 000 points sampled uniformly in [0, 1]2
, each with a
label 0 if outside the disk centered at (0.5, 0.5) of radius 1/
√
2π, and 1 inside,


def generate_disk_set(nb):
    input = Tensor(nb, 1).uniform_(0, 1)
    target = input.pow(2).sum(1).sub(1 / math.pi).sign().add(1).div(2).long()
    return input, target

train_input, train_target = generate_disc_set(1000)
test_input, test_target = generate_disc_set(1000)

mean, std = train_input.mean(), train_input.std()

train_input.sub_(mean).div_(std)
test_input.sub_(mean).div_(std)

mini_batch_size = 100

 builds a network with two input units, two output units, three hidden layers of 25 units

that returns a mlp with 2 input units, hidden layers of sizes respectively 4, 8, 16, 32, 64, 128, and 2
output units





def create_shallow_model():
    return nn.Sequential(
        nn.Linear(2, 128),
        nn.ReLU(),
        nn.Linear(128, 2)
    )

def create_deep_model():
    return nn.Sequential(
        nn.Linear(2, 4),
        nn.ReLU(),
        nn.Linear(4, 8),
        nn.ReLU(),
        nn.Linear(8, 16),
        nn.ReLU(),
        nn.Linear(16, 32),
        nn.ReLU(),
        nn.Linear(32, 64),
        nn.ReLU(),
        nn.Linear(64, 128),
        nn.ReLU(),
        nn.Linear(128, 2)
    )

forward and backward ?

def forward_pass(w1, b1, w2, b2, x):
    x0 = x
    s1 = w1.mv(x0) + b1
    x1 = sigma(s1)
    s2 = w2.mv(x1) + b2
    x2 = sigma(s2)

    return x0, s1, x1, s2, x2

def backward_pass(w1, b1, w2, b2,
                  t,
                  x, s1, x1, s2, x2,
                  dl_dw1, dl_db1, dl_dw2, dl_db2):
    x0 = x
    dl_dx2 = dloss(x2, t)
    dl_ds2 = dsigma(s2) * dl_dx2
    dl_dx1 = w2.t().mv(dl_ds2)
    dl_ds1 = dsigma(s1) * dl_dx1

    dl_dw2.add_(dl_ds2.view(-1, 1).mm(x1.view(1, -1)))
    dl_db2.add_(dl_ds2)
    dl_dw1.add_(dl_ds1.view(-1, 1).mm(x0.view(1, -1)))
    dl_db1.add_(dl_ds1)
    
    #ensuite utilisé dans la partie 4 de practical 3