In [2]:
import functools
from functools import partial
import math
import os
import copy
import numpy as np

import torch
import torch.nn.functional as F
from torch import nn

import torchvision
from torchvision import transforms

lt_conv_kwargs = {'kernel_size': 3, 'padding': 'same', 'bias': False}

batchsize = 1024
bias_scaler = 56

hyp = {
    'opt': {
        'bias_lr':        1.64 * bias_scaler/512, 
        'non_bias_lr':    1.64 / 512,
        'bias_decay':     1.08 * 6.45e-4 * batchsize/bias_scaler,
        'non_bias_decay': 1.08 * 6.45e-4 * batchsize,
        'scaling_factor': 1./9,
        'percent_start': .23,
        'loss_scale_scaler': 1./128, 
    },
    'misc': {
        'ema': {
            'epochs': 10, 
            'decay_base': .95,
            'decay_pow': 3.,
            'every_n_steps': 5,
        },
        'device': 'cpu',
        'data_location': 'mnist.pt',
    }
}


if not os.path.exists(hyp['misc']['data_location']):

        transform = transforms.Compose([
            transforms.ToTensor()])

        mnist      = torchvision.datasets.MNIST('mnist/', download=True,  train=True,  transform=transform)
        mnist_eval = torchvision.datasets.mnist('mnist/', download=false, train=false, transform=transform)

        train_dataset_gpu_loader = torch.utils.data.dataloader(mnist, batch_size=len(mnist), drop_last=True,
                                                  shuffle=True, num_workers=2, persistent_workers=False)
        eval_dataset_gpu_loader = torch.utils.data.DataLoader(mnist_eval, batch_size=len(mnist_eval), drop_last=true,
                                                  shuffle=false, num_workers=1, persistent_workers=false)

        train_dataset_gpu = {}
        eval_dataset_gpu = {}

        train_dataset_gpu['images'], train_dataset_gpu['targets'] = [item.to(device=hyp['misc']['device'], non_blocking=True) for item in next(iter(train_dataset_gpu_loader))]
        eval_dataset_gpu['images'],  eval_dataset_gpu['targets']  = [item.to(device=hyp['misc']['device'], non_blocking=True) for item in next(iter(eval_dataset_gpu_loader)) ]


        data = {
            'train': train_dataset_gpu,
            'eval': eval_dataset_gpu
        }

        data['train']['images'] = data['train']['images'].half().requires_grad_(False)
        data['eval']['images']  = data['eval']['images'].half().requires_grad_(False)

        data['train']['targets'] = F.one_hot(data['train']['targets']).half()
        data['eval']['targets'] = F.one_hot(data['eval']['targets']).half()

        torch.save(data, hyp['misc']['data_location'])

else:
    
    data = torch.load(hyp['misc']['data_location'])



In [3]:
class SqueezeExciteBlock(nn.Module):
    def __init__(self, filters):
        super(SqueezeExciteBlock, self).__init__()
        self.filters = filters
        self.weight1 = nn.Parameter(torch.empty(self.filters, self.filters//32))
        self.bias1 = nn.Parameter(torch.empty(1, self.filters//32))
        self.weight2 = nn.Parameter(torch.empty(self.filters//32, self.filters))
        self.bias2 = nn.Parameter(torch.empty(1, self.filters))

        nn.init.xavier_uniform_(self.weight1)
        nn.init.zeros_(self.bias1)
        nn.init.xavier_uniform_(self.weight2)
        nn.init.zeros_(self.bias2)

    def forward(self, x):
        se = F.avg_pool2d(x, kernel_size=(x.shape[2], x.shape[3]))
        se = se.reshape(shape=(-1, self.filters))
        se = se * self.weight1 + self.bias1
        se = F.relu(se)
        se = se * self.weight2 + self.bias2
        se = F.sigmoid(se).reshape(shape=(-1, self.filters, 1, 1))
        se = x @ se
        return se
    


In [4]:
class ConvBlock(nn.Module):
    def __init__(self, h, w, x ,filters = 128, conv=3):
        super(ConvBlock, self).__init__()
        self.h, self.w, self.x = h, w, x
        self.cweights = nn.ParameterList([
            nn.Parameter(torch.empty(filters, x if i == 0 else filters, conv, conv))
            for i in range(3)
        ])
        self.cbiases = nn.ParameterList([
            nn.Parameter(torch.empty(filters))
            for i in range(3)
        ])

        for w in self.cweights:
            nn.init.xavier_uniform_(w)
        for b in self.cbiases:
            nn.init.zeros_(b)

        self._bn = nn.BatchNorm2d(128)
        self._seb = SqueezeExciteBlock(filters)

    def forward(self, input):
        x = input.reshape(shape = (-1, self.x, self.w, self.h))
        for cw, cb in zip(self.cweights, self.cbiases):
            x = F.pad(x, (1,1,1,1))
            x = F.conv2d(x, cw, bias=cb)
            x = F.relu(x)
        x = self._bn(x)
        x = self._seb(x)
        return x
        

In [8]:
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv = nn.ModuleList([ConvBlock(28,28,1), ConvBlock(28,28,128), ConvBlock(14,14,128)])
        self.weight1 = nn.Parameter(torch.empty(128,10))
        self.weight2 = nn.Parameter(torch.empty(128,10))

        nn.init.xavier_uniform_(self.weight1)
        nn.init.xavier_uniform_(self.weight2)


    def forward(self, x):
        x = self.conv[0](x)
        x = self.conv[1](x)
        x = F.avg_pool2d(x ,kernel_size = (2,2)) 
        x = self.conv[2](x)
        x1 = F.avg_pool2d(x,kernel_size = (14,14)).reshape(shape=(-1,128))
        x2 = F.max_pool2d(x,kernel_size = (14,14)).reshape(shape=(-1,128))
        xo = x1 * self.weight1 + x2 * self.weight2
        return xo
    
    

%pylab inline

%pylab is deprecated, use %matplotlib inline and import the required libraries.
Populating the interactive namespace from numpy and matplotlib


In [7]:
import sys

if __name__ == "__main__":
    lrs = [1e-4, 1e-5]
    epochs = [2,1]
    BS = 32

    lmbd = 0.00025
    
    def lossfn(out, y):
        criterion = nn.CrossEntropyLoss()
        regularization_term = lmbd * (model.weight1.abs().sum() + model.weight2.abs().sum())
        return criterion(out, y) + regularization_term
    X_train, X_test, Y_train, Y_test = data['train']['images'], data['eval']['images'], data['train']['targets'], data['eval']['targets']
    X_train = X_train.reshape(-1, 28, 28)
    X_test = X_test.reshape(-1, 28, 28)
    steps = len(X_train)//BS
    np.random.seed(1337)
    
    model = ConvNet()

    for lr,epoch in zip(lrs,epochs):
        opt = nn.optim.Adam(model.parameters, lr = lrs[0])
        for epch in range(1,epoch+1):
            X_aug = X_train
            
    

    

ModuleNotFoundError: No module named 'extra'