# Поиск структуры методом случайного поиска

Модель - MLP с не более чем 4 скрытыми слоями (всего слоев не более 5, если считать softmax).

Для каждого скрытого слоя существуют следующие варианты подмоделей:
 * тождественное отображение 
 * 16 нейронов
 * 32 нейрона
 * 64 нейрона
 * 256 нейронов
 * 512 нейронов
 * 1024 нейрона
 
Итого, пространство поиска составляет 6^4+6^3+6^2+6 = 1554 модели

## Импорт библиотек, объявление функций и констант

In [None]:
import torch as t
import torchvision
import numpy as np
from torch.utils.data.sampler import SubsetRandomSampler
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pylab as plt
from torch.nn.utils import clip_grad_value_
%matplotlib inline
import pickle


In [None]:
input_dim = 32*32*1 # размерность CIFAR, grayscale
class_num = 10
n_epochs =  30
batch_size = 256
random_seed = 42
valid_size = 0.1 
trials = 10 # количество повторений эксперимента
series_num = 5 # количество попыток подобрать параметры в случайном поике
search_space = [1, 16, 32, 64, 256, 512, 1024]  # '1' кодирует тождественное отображение

In [None]:
train_data = torchvision.datasets.CIFAR10('./files/', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                                  torchvision.transforms.Lambda(lambda x: x.mean(0).view(-1))
                             ]))

test_data = torchvision.datasets.CIFAR10('./files/', train=False, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                              (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                                  torchvision.transforms.Lambda(lambda x: x.mean(0).view(-1))
                             ]))

num_train = len(train_data)
indices = list(range(num_train))
split = int(np.floor(valid_size * num_train))

train_idx, valid_idx = indices[split:], indices[:split]
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

train_loader = t.utils.data.DataLoader(train_data, batch_size=batch_size, sampler=train_sampler, num_workers=0, pin_memory=True )
test_loader = t.utils.data.DataLoader(test_data, batch_size=batch_size)
valid_loader = t.utils.data.DataLoader(train_data, batch_size=batch_size, sampler=valid_sampler)


In [None]:
def build_model(layers):
    _layers = []
    x_in = input_dim
    for l in layers:
        if l ==1:
            continue
        else:
            _layers.append(nn.Linear(x_in, l))
            _layers.append( nn.Tanh())
            x_in = l
    _layers.append(nn.Linear(x_in, class_num))
    return  nn.Sequential(*_layers)

In [None]:
def test_acc(model, loader, func = lambda x:x):
    tp = 0
    cases = 0
    for x,y in loader: 
            x = func(x)
            x = x.cuda()
            y = y.cuda()
            out = model(x).argmax(1)
            tp+=(out==y).sum()
            cases+=len(y)
    return  tp.cpu().numpy()*1.0/cases

## Случайный поиск

In [None]:
models = []
t.manual_seed(random_seed)
rs = np.random.RandomState(random_seed)

for trial in range(trials): 
    best = 0
    models.append([])
    for series in range(series_num):        
        layers = []
        for l in range(5):
            layers.append(rs.choice(search_space))
        print ('Network:', layers)
        net = build_model(layers)    
        net = net.cuda()
        

        optimizer = optim.Adam(net.parameters())    
        loss_fn = nn.CrossEntropyLoss()
        for epoch in range(n_epochs):
            for x,y in train_loader:    

                x = x.cuda()
                y = y.cuda()                
                out = net(x)
                optimizer.zero_grad()
                loss = loss_fn(out, y)
                loss.backward()
                optimizer.step()            
            print ('Trial: {0}. Series: {1}. Epoch: {2}. '.format(trial, series, epoch))

        acc = test_acc(net, valid_loader)
        print ('Accuracy', acc)
        if acc>best:
            print ('New best model!')    
            models[-1].append(net)
            best = acc
        else:
            models[-1].append(models[-1][-1]) 



In [None]:
import pickle

with open('./models_rs.pckl', 'wb') as out:
    pickle.dump(models, out)


## Анализ моделей

### Общая статистика

In [None]:
import pickle

with open('./models_rs.pckl', 'rb') as inp:
    models = pickle.load(inp)


In [None]:
stats = {}

In [None]:
stats = {}
pn = []
for subnet in models:
    subnet = subnet[-1]
    num = 0
    for p in subnet.parameters():
    
        if len(p.size())==1:
            num+=p.size()[0]
        elif len(p.size())==0:
            num+=1
        else:
            num+=p.size()[1]*p.size()[0]
    pn.append(num)

stats['param number'] = pn
stats['param number']

In [None]:
def get_superposition_number(): 
    sn = []
    for subnet in models:
        subnet = subnet[-1]
        cnt = 0
        for submodel in subnet:
            
            print (submodel)
            if len(list(submodel.parameters()))>0:
                cnt+=1
        sn.append(cnt)
        
    return sn
stats['superposition number'] = get_superposition_number()
stats['superposition number']

### Качество при добавлении шума в выборку: гауссовский шум

In [None]:
x = train_data[1]
for noise in np.linspace(0, 1.0, 4):
    plt.title(noise)
    plt.imshow(x[0].reshape(32,32).cpu().numpy() + np.random.randn(32, 32)*noise)
    plt.show()

In [None]:
t.manual_seed(random_seed)
X = []
Y = []
Y_std = []
accs = []
for noise in np.linspace(0, 1.0, 10):
    X.append(noise)
    acc = []
    for subnet in models:
        subnet = subnet[-1]             
        acc += [test_acc(subnet, test_loader, func = lambda x: x+t.randn(x.size())*noise)] 
    print (acc)
    Y.append(np.mean(acc))
    Y_std.append(np.std(acc))
    accs.append(acc)
stats['noise'] = [X,Y,Y_std, accs]

### Качество при добавлении шума в параметры

In [None]:
t.manual_seed(random_seed)
X = []
Y = []
accs = []
Y_std = []
for noise in np.linspace(0, 0.1, 10):
    X.append(noise)
    acc = []
    for subnet in models:
        m = subnet[-1]
        m.eval()
        old_params = []
        for p in m.parameters():
            old_params.append(p.data*1.0)

        tp = 0        
        for x,y in test_loader:

            for p, o in zip(m.parameters(), old_params):                
                n = t.randn(p.data.shape)*noise
                n = n.cuda()                    
                p.data = o + n
            x = x.cuda()
            y = y.cuda()
            out = m(x).argmax(1)
            tp+=(out==y).sum()
            for p, o in zip(m.parameters(), old_params):                
                p.data = o
        acc.append(tp.cpu().numpy()*1.0/len(test_data))
    print (acc)
    accs.append(acc)
    Y.append(np.mean(acc))
    Y_std.append(np.std(acc))
stats['params'] = [X,Y,Y_std, accs]

In [None]:
with open('./stats_rs.pckl', 'wb') as out:
    pickle.dump(stats, out)