In [1]:
import torch
import math
import matplotlib.pyplot as plt

torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x7ff04071ed90>

In [2]:
class Module(object) :
    def __init__(self):
        super().__init__()
    
    def forward(self , *input):
        raise  NotImplementedError
        
    def backward(self , *gradwrtoutput):
        raise  NotImplementedError
        
    def param(self): #These are the layers of the network
        return  []

In [None]:
class Linear(Module):
    def __init__(self, in_features, out_features):
        super().__init__()
        self.in_features = in_features
        self.out_features = out_features
        param = 1 / math.sqrt(1/in_features)
        self.x = torch.empty(self.out_features)
        self.weight = torch.empty(out_features, in_features).uniform_(-param, param)
        self.bias = torch.empty(out_features).uniform_(-param, param)
        
    def forward(self, x):
        self.x = x
        return torch.mm(self.weight, x) + self.bias
    
    def set_Lr(self, lr):
        self.lr=lr
        return
        
    def backward(self, grad):
        b = grad.mm(self.weight.t())
        lr=self.lr
        self.weight = self.weight - lr * self.x.t().mm(grad) 
        self.bias = self.bias - lr * grad * 1
        return b
    
    def weight():
        return self.weight
    
    def bias():
        return self.bias

In [3]:
class Sequential(Module) :
    def __init__(self, param):
        super().__init__()
        self.model = (param)
        self.loss = LossMSE()
    
    def forward(self, x):
        for layer in self.model:
            x = layer.forward(x)
        return x
        
    def backward(self, x, y):
        Loss = self.loss.loss(x, y)
        for layer in x.param:
            x = layer.backward(x)
        return x.param

In [12]:
class Tanh(Module) :
    def __init__(self):
        super().__init__()
    
    def forward(self, x):
        return torch.div(x.exp() - (-x).exp(), x.exp() + (-x).exp())
        
    def backward(self, x):
        return 1 - torch.div(x.exp() - (-x).exp(), x.exp() + (-x).exp())**2

In [5]:
class ReLU(Module) :
    def __init__(self):
        super().__init__()
    
    def forward(self, x):
        return max(0,x)
    
    def backward(self, x):
        if x > 0:
            return 1
        if x <= 0:
            return 0

In [11]:
class LossMSE(Module) :
    def __init__(self):
        super().__init__() 
    
    def forward(self, data_input, data_target):
        loss = (data_input - data_target).pow(2).sum()
        return loss
    
    def backward(self, data_input, data_target):
        dloss = 2*(data_input - data_target)
        return dloss

In [6]:
def create_random_batch(input_size, mini_batch_size):
    
    # This function return a 2D tensor that is the random selection of inputs for our
    # stochastic gradient method, taking into account the number of mini_batches.
    
    # We suppose here that our mini_batch_size is well chosen taking in count the fact
    # that it divides input_size.
    
    # Initialization
    L = int(input_size / mini_batch_size)
    new_batch = torch.ones(L, mini_batch_size)
    
    indices = torch.randperm(input_size)
    for k in range(L):
        new_batch[k] = indices[k * mini_batch_size : (k+1) * mini_batch_size]
    
    return new_batch

In [7]:
def train_model(model, train_input, train_classes, nb_epochs, mini_batch_size):
    
    h_step = 1e-3
    
    for epoch in range(nb_epochs):
        random_batch = create_random_batch(train_input.size(0), mini_batch_size)
        acc_loss = 0
        for batch in range(0, train_input.size(0), mini_batch_size):
            output = model.forward(train_input[random_batch[batch]])
            loss = LossMSE(output, train_classes[random_batch[batch]])
            model.backward(train_target.narrow(0, b, mini_batch_size), output)
            acc_loss = acc_loss + loss.item()
            
            for p in (model):
                p = p - (h_step * p.grad())

In [8]:
def compute_nb_errors(model, data_input, data_target, mini_batch_size):

    nb_data_errors = 0

    for b in range(0, data_input.size(0), mini_batch_size):
        _, _, result = model.forward(data_input.narrow(0, b, mini_batch_size))
        _, predicted_classes = torch.max(result, 1)
        for k in range(mini_batch_size):
            if data_target[b + k] != predicted_classes[k]:
                nb_data_errors = nb_data_errors + 1

    return nb_data_errors

In [9]:
def create_problem(nb_samples):
    
    # Remark: the function .uniform return a uniform distribution on [0,1) instead of [0,1],
    # but in our case it's not a problem since it is only a train and a test set on a circle
    # that do not touch the border of the set [0,1]^2.
    train_input = torch.empty(nb_samples, 2).uniform_(0, 1)
    test_input = torch.empty(nb_samples, 2).uniform_(0, 1)
    
    # Radius of our circle
    R = 1 / math.sqrt(2 * math.pi)
    
    train_classes = train_input.sub(0.5).pow(2).sum(1).sub(R**2).sign().sub(1).div(-2).long()
    test_classes = test_input.sub(0.5).pow(2).sum(1).sub(R**2).sign().sub(1).div(-2).long()
    
    return train_input, train_classes, test_input, test_classes

In [10]:
def get_tests(n):
    M = []
    for k in range (0, n):
        L = []
        _, _, _, test_input, test_target, test_classes =  create_problem(1000)
        L.append(test_input)
        L.append(test_target)
        L.append(test_classes)
        M.append(L)
    return M

In [None]:
model = Sequential([Linear(2,128), ReLU(), Linear(128,2), Tanh()])
nb_epochs = 25
mini_batch_size = 100

train_model(model,)