# Find Me

Solve NN generalization problem using PyTorch and grid search.

There are random function of 8 inputs and X random inputs added.
We split data in two parts for training and testing

# Setup

Copy auxiliary files from GitHub 

In [1]:
!rm gridsearch.py solutionmanager.py speedtest.py
!wget https://raw.githubusercontent.com/VVKot/mlinseconds-find-me/master/mlis/utils/gridsearch.py -q
!wget https://raw.githubusercontent.com/VVKot/mlinseconds-find-me/master/mlis/utils/solutionmanager.py -q
!wget https://raw.githubusercontent.com/VVKot/mlinseconds-find-me/master/mlis/utils/speedtest.py -q

rm: cannot remove 'gridsearch.py': No such file or directory
rm: cannot remove 'solutionmanager.py': No such file or directory
rm: cannot remove 'speedtest.py': No such file or directory


Import libraries and utils

In [2]:
!pip3 install tensorboard tensorboardX

Collecting tensorboardX
[?25l  Downloading https://files.pythonhosted.org/packages/5c/76/89dd44458eb976347e5a6e75eb79fecf8facd46c1ce259bad54e0044ea35/tensorboardX-1.6-py2.py3-none-any.whl (129kB)
[K    100% |████████████████████████████████| 133kB 8.1MB/s 
Installing collected packages: tensorboardX
Successfully installed tensorboardX-1.6


In [0]:
import time
import random
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import solutionmanager as sm
from gridsearch import GridSearch

Check whether CUDA is available

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

Create neural network

In [0]:
class SolutionModel(nn.Module):
    def __init__(self, input_size, output_size, solution):
        super(SolutionModel, self).__init__()
        self.input_size = input_size
        self.output_size = output_size
        self.solution = solution
        self.learning_rate = solution.learning_rate
        self.momentum = solution.momentum
        self.hidden_size = solution.hidden_size
        self.activation_hidden = solution.activation_hidden
        self.activation_output = solution.activation_output
        self.do_batch_norm = solution.do_batch_norm
        self.layers_number = solution.layers_number
        if self.solution.grid_search.enabled:
            torch.manual_seed(solution.random)
        self.hidden_size = self.solution.hidden_size
        self.linears = nn.ModuleList([nn.Linear(self.input_size if i == 0 else self.hidden_size, self.hidden_size if i != self.solution.layers_number -1 else self.output_size) for i in range(self.solution.layers_number)]).to(device)
        self.batch_norms = nn.ModuleList([nn.BatchNorm1d(self.hidden_size if i != self.solution.layers_number-1 else self.output_size, track_running_stats=False) for i in range(self.solution.layers_number)]).to(device)

    def forward(self, x):
        for i in range(len(self.linears)):
            x = self.linears[i](x)
            if self.solution.do_batch_norm:
                x = self.batch_norms[i](x)
            act_function = self.solution.activation_output if i == len(self.linears)-1 else self.solution.activation_hidden
            x = self.solution.activations[act_function](x)
        return x

    def calc_loss(self, output, target):
        bce_loss = nn.BCELoss()
        loss = bce_loss(output, target)
        return loss

    def calc_predict(self, output):
        predict = output.round()
        return predict

Create class to store hyper parameters. Implement grid search

In [0]:
class Solution():
    def __init__(self):
        self.best_step = 1000
        self.activations = {
            'sigmoid': nn.Sigmoid(),
            'relu': nn.ReLU(),
            'rrelu0103': nn.RReLU(0.1, 0.3),
            'elu': nn.ELU(),
            'selu': nn.SELU(),
            'leakyrelu01': nn.LeakyReLU(0.1)
        }
        self.learning_rate = 0.8
        self.momentum = 0.9
        self.hidden_size = 45
        self.layers_number = 5
        self.activation_hidden = 'relu'
        self.activation_output = 'sigmoid'
        self.do_batch_norm = True
        self.sols = {}
        self.solsSum = {}
        self.random = 0
        self.random_grid = [_ for _ in range(10)]
        self.layers_number_grid = [5, 6, 7, 8]
        self.hidden_size_grid = [20, 25, 28, 30, 32, 35, 38, 40, 45]
#         self.momentum_grid = [0.0, 0.3, 0.5, 0.8, 0.9]
        self.learning_rate_grid = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 1.2, 1.5]
        self.activation_hidden_grid = list(self.activations.keys())
#         self.activation_output_grid = list(self.activations.keys())
        self.grid_search = GridSearch(self)
        self.grid_search.set_enabled(False)

    def create_model(self, input_size, output_size):
        return SolutionModel(input_size, output_size, self)

    def get_key(self):
        return "{}_{}_{}_{}_{}_{}_{}".format(self.learning_rate, self.momentum, self.hidden_size, self.activation_hidden, self.activation_output, self.do_batch_norm, "{0:03d}".format(self.layers_number));

    # Return number of steps used
    def train_model(self, model, train_data, train_target, context):
        key = self.get_key()
        if key in self.sols and self.sols[key] == -1:
            return
        step = 0
        model.to(device)
        # Put model in train mode
        model.train()
        optimizer = optim.SGD(model.parameters(), lr=self.learning_rate, momentum=self.momentum)
        while True:
            time_left = context.get_timer().get_time_left()
            # No more time left, stop training
            if time_left < 0.1:
                break
            data = train_data
            target = train_target
            # model.parameters()...gradient set to zero
            optimizer.zero_grad()
            # evaluate model => model.forward(data)
            output = model(data)
            # if x < 0.5 predict 0 else predict 1
            predict = model.calc_predict(output)
            # Number of correct predictions
            correct = predict.eq(target.view_as(predict)).long().sum().item()
            # Total number of needed predictions
            total = predict.view(-1).size(0)
#             if correct == total or (self.grid_search.enabled and step > 1000):
#                 if not key in self.sols:
#                     loss = model.calc_loss(output, target)
#                     self.sols[key] = 0
#                     self.solsSum[key] = 0
#                     self.sols[key] += 1
#                     self.solsSum[key] += step
#                 if correct == total:
#                     self.print_stats(step, loss, correct, total, model)
#                     print('{:.4f}'.format(float(self.solsSum[key])/self.sols[key]))
#                 break
            # calculate loss
            loss = model.calc_loss(output, target)
            # calculate deriviative of model.forward() and put it in model.parameters()...gradient
            loss.backward()
            # print progress of the learning
            # update model: model.parameters() -= lr * gradient
            optimizer.step()
            step += 1
        return step
    
    def print_stats(self, step, loss, correct, total, model):
        print("LR={}, Momentum={}, HS={}, Number of layers={}, ActivOut={}, Step = {} Prediction = {}/{} Error = {}".format(model.solution.learning_rate, model.solution.momentum,
                                                                                                              model.hidden_size, model.layers_number, model.activation_hidden, step, correct, total, loss.item()))


Evaluate the model

In [43]:
class Limits:
    def __init__(self):
        self.time_limit = 2.0
        self.size_limit = 1000000
        self.test_limit = 1.0

class DataProvider:
    def __init__(self):
        self.number_of_cases = 10

    def create_data(self, data_size, input_size, random_input_size, seed):
        torch.manual_seed(seed)
        function_size = 1 << input_size
        function_input = torch.ByteTensor(function_size, input_size)
        for i in range(function_input.size(0)):
            fun_ind = i
            for j in range(function_input.size(1)):
                input_bit = fun_ind&1
                fun_ind = fun_ind >> 1
                function_input[i][j] = input_bit
        function_output = torch.ByteTensor(function_size).random_(0, 2)

        if data_size % function_size != 0:
            raise "Data gen error"

        data_input = torch.ByteTensor(data_size, input_size).view(-1, function_size, input_size).to(device)
        target = torch.ByteTensor(data_size).view(-1, function_size).to(device)
        for i in range(data_input.size(0)):
            data_input[i] = function_input
            target[i] = function_output
        data_input = data_input.view(data_size, input_size)
        target = target.view(data_size)
        if random_input_size > 0:
            data_random = torch.ByteTensor(data_size, random_input_size).random_(0, 2).to(device)
            
            data = torch.cat([data_input, data_random], dim=1)
        else:
            data = data_input
        perm = torch.randperm(data.size(1))
        data = data[:,perm]
        perm = torch.randperm(data.size(0))
        data = data[perm]
        target = target[perm]
        return (data.float(), target.view(-1, 1).float())

    def create_case_data(self, case):
        data_size = 256*32
        input_size = 8
        random_input_size = min(32, (case-1)*4)

        data, target = self.create_data(2*data_size, input_size, random_input_size, case)
        return sm.CaseData(case, Limits(), (data[:data_size], target[:data_size]), (data[data_size:], target[data_size:])).set_description("{} inputs and {} random inputs".format(input_size, random_input_size))

class Config:
    def __init__(self):
        self.max_samples = 10000

    def get_data_provider(self):
        return DataProvider()

    def get_solution(self):
        return Solution()

# If you want to run specific case, put number here
sm.SolutionManager(Config()).run(case_number=-1)


Local CPU time mult = 0.65
Case #1[8 inputs and 0 random inputs] Step=76 Size=7023/1000000 Time=1.9/2.0
Train correct/total=8192/8192 Ratio/limit=1.00/1.00 Loss=0.0007047344697639346
Test  correct/total=8192/8192 Ratio/limit=1.00/1.00 Loss=0.0007638151291757822
[92m[OK][0m
Case #2[8 inputs and 4 random inputs] Step=82 Size=7203/1000000 Time=1.9/2.0
Train correct/total=8192/8192 Ratio/limit=1.00/1.00 Loss=0.000707161845639348
Test  correct/total=8192/8192 Ratio/limit=1.00/1.00 Loss=0.000793427461758256
[92m[OK][0m
Case #3[8 inputs and 8 random inputs] Step=80 Size=7383/1000000 Time=1.9/2.0
Train correct/total=8192/8192 Ratio/limit=1.00/1.00 Loss=0.0006573689170181751
Test  correct/total=8192/8192 Ratio/limit=1.00/1.00 Loss=0.0006920306477695704
[92m[OK][0m
Case #4[8 inputs and 12 random inputs] Step=74 Size=7563/1000000 Time=1.9/2.0
Train correct/total=8192/8192 Ratio/limit=1.00/1.00 Loss=0.0008905095746740699
Test  correct/total=8192/8192 Ratio/limit=1.00/1.00 Loss=0.000941755308

Best hyper parameters:

        self.learning_rate = 0.8
        self.momentum = 0.9
        self.hidden_size = 45
        self.layers_number = 5
        self.activation_hidden = 'relu'
        self.activation_output = 'sigmoid'