## CIFAR 10

In [1]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

You can get the data via:

    wget http://pjreddie.com/media/files/cifar.tgz

In [2]:
from fastai.conv_learner import *
PATH = "data/cifar10/"
os.makedirs(PATH,exist_ok=True)

In [3]:
classes = ('airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
stats = (np.array([ 0.4914 ,  0.48216,  0.44653]), np.array([ 0.24703,  0.24349,  0.26159]))

### Format data

In [4]:
with open(f'{PATH}/labels.txt') as f:
    classes = f.readlines()

In [None]:
'sdfs'.rstrip()

In [None]:
classes = [x.rstrip() for x in classes]

In [None]:
classes

In [None]:
for c in classes:
    os.makedirs(f'{PATH}train/{c}', exist_ok=True)

In [None]:
trn_files = glob(f'{PATH}train_orig/*.png')

In [None]:
f1 = trn_files[0]

In [None]:
group = re.findall(r'.*/(\d+)_(.*).png', f1)

In [None]:
print(group)

In [None]:
pattern = re.compile(r'.*/(\d+)_(.*).png')
for f in trn_files:
    matches = pattern.findall(f)
    if matches and len(matches[0]) != 2:
        print('Could not find class:', f)
        continue
    img_id, img_class = matches[0]
    dst = f.replace(f'train_orig/{img_id}', f'train/{img_class}/{img_id}')
    os.rename(f, dst)

test set

In [None]:
for c in classes:
    os.makedirs(f'{PATH}test/{c}', exist_ok=True)

In [None]:
trn_files = glob(f'{PATH}test_orig/*.png')

In [None]:
pattern = re.compile(r'.*/(\d+)_(.*).png')
for f in trn_files:
    matches = pattern.findall(f)
    if matches and len(matches[0]) != 2:
        print('Could not find class:', f)
        continue
    img_id, img_class = matches[0]
    dst = f.replace(f'test_orig/{img_id}', f'test/{img_class}/{img_id}')
    os.rename(f, dst)

### Get Data

In [5]:
def get_data(sz,bs):
#     tfms = tfms_from_stats(stats, sz, aug_tfms=[RandomFlipXY()], pad=sz//8)
    tfms = tfms_from_stats(stats, sz, aug_tfms=[RandomFlip()], pad=sz//8)
    return ImageClassifierData.from_paths(PATH, val_name='test', tfms=tfms, bs=bs)

In [6]:
bs=256

In [7]:
data = get_data(32,bs)

### Look at data

In [None]:
data = get_data(32,4)

In [None]:
x,y=next(iter(data.trn_dl))

In [None]:
plt.imshow(data.trn_ds.denorm(x)[0]);

In [None]:
plt.imshow(data.trn_ds.denorm(x)[1]);

## Fully connected model

In [8]:
data = get_data(32,bs)

In [None]:
lr=1e-2

In [None]:
test = nn.ModuleList()

In [None]:
test.

From [this notebook](https://github.com/KeremTurgutlu/deeplearning/blob/master/Exploring%20Optimizers.ipynb) by our student Kerem Turgutlu:

In [None]:
class SimpleNet(nn.Module):
    def __init__(self, layers):
        super().__init__()
        self.layers = nn.ModuleList()
        for idx,l in enumerate(layers[:-1]):
            self.layers.append(nn.Linear(l,layers[idx+1]))
#         self.d1 = nn.Linear(layers[0], layers[1])
#         self.d2 = nn.Linear(layers[1], layers[2])
    def forward(self, x):
        x = x.view(x.shape[0], -1)
        for l in self.layers:
            x = l(x)
            if l != self.layers[-1]:
                x = F.relu(x)
            else:
                x = F.log_softmax(x, dim=-1)
        return x
#         x = x.view(x.shape[0], -1)
#         x = self.d1(x)
#         x = F.relu(x)
#         x = self.d2(x)
#         return F.log_softmax(x)

In [None]:
learn = ConvLearner.from_model_data(SimpleNet([32*32*3,40,10]), data)

In [None]:
learn, [o.numel() for o in learn.model.parameters()]

In [None]:
learn.summary()

In [None]:
learn.lr_find()

In [None]:
learn.sched.plot()

In [None]:
%time learn.fit(lr, 2)

In [None]:
%time learn.fit(lr, 2, cycle_len=1)

## CNN

In [None]:
input = torch.autograd.Variable(torch.randn(1, 64, 10, 9))
# m = nn.AdaptiveMaxPool2d((1, 1))
# output = m(input); output.shape
F.adaptive_avg_pool2d(input, 1).shape

In [None]:
input = torch.autograd.Variable(torch.randn([1, 80, 3, 3]))
# input = input.view(input.shape[0], -1)
m = nn.AdaptiveMaxPool2d(1)
m(input).shape

In [None]:
??nn.AdaptiveMaxPool2d

In [None]:
class ConvNet(nn.Module):
    def __init__(self, layers, c):
        super().__init__()
        self.conv_layers = nn.ModuleList()
        for i in range(len(layers)-1):
            conv = nn.Conv2d(layers[i], layers[i+1], kernel_size=3, stride=2)
            self.conv_layers.append(conv)
        self.pool = nn.AdaptiveMaxPool2d((1, 1))
        self.final = nn.Linear(layers[-1], c)
    def forward(self, x):
        for l in self.conv_layers:
            x = l(x)
            x = F.relu(x)
            
#         print(x.shape)
        x = self.pool(x)
#         print(x.shape)
        x = x.view(x.shape[0], -1)
#         print(x.shape)
        x = self.final(x)
        return F.log_softmax(x, dim=-1)
        

In [None]:
learn = ConvLearner.from_model_data(ConvNet([3, 20, 40, 80], 10), data)

In [None]:
learn.summary()

In [None]:
learn.lr_find(end_lr=100)

In [None]:
learn.sched.plot()

In [None]:
%time learn.fit(1e-1, 2)

In [None]:
%time learn.fit(1e-1, 4, cycle_len=1)

## Refactored

In [9]:
class ConvLayer(nn.Module):
    def __init__(self, n_i, n_o):
        super().__init__()
        self.layer = nn.Conv2d(n_i, n_o, kernel_size=3, stride=2, padding=1)
    def forward(self, x):
        return F.relu(self.layer(x))

In [8]:
class ConvNet2(nn.Module):
    def __init__(self, layers, c):
        super().__init__()
        self.conv_layers = nn.ModuleList([ConvLayer(layers[i], layers[i+1])
                                            for i in range(len(layers)-1)])
        self.final = nn.Linear(layers[-1], c)
            
    def forward(self, x):
        for l in self.conv_layers:
            x = l(x)
        x = F.adaptive_max_pool2d(x, 1)
        x = x.view(x.shape[0], -1)
        return F.log_softmax(self.final(x), dim=-1)

In [9]:
learn = ConvLearner.from_model_data(ConvNet2([3, 20, 40, 80], 10), data)

In [None]:
learn.summary()

In [10]:
%time learn.fit(1e-1, 2)

epoch      trn_loss   val_loss   accuracy                   
    0      1.699481   1.577857   0.42002   
    1      1.479431   1.394545   0.503223                   

CPU times: user 41.6 s, sys: 26.4 s, total: 1min 8s
Wall time: 41.7 s


[1.3945453, 0.50322265625]

In [None]:
%time learn.fit(1e-1, 2, cycle_len=1)

## BatchNorm

Testing

In [49]:
x = torch.randn([256, 40, 8, 8])
xt = x.transpose(0, 1)
xv = xt.contiguous().view(xt.shape[0], -1)

In [50]:
xv.shape

torch.Size([40, 16384])

In [77]:
x_chan = x.transpose(0, 1).contiguous().view(x.shape[1], -1)
x_chan.shape

torch.Size([40, 16384])

In [52]:
xm = torch.mean(xv, 1)

In [63]:
torch.std(xv, 1).shape

torch.Size([40])

In [99]:
xm[:, None, None].shape

torch.Size([40, 1, 1])

In [101]:
x_diff = x - xm[:, None, None]

In [10]:
class BnLayer(nn.Module):
    def __init__(self, n_in, n_out, kernel_size=3, stride=2, padding=1, bias=False, activation=True):
        super().__init__()
        self.conv = nn.Conv2d(n_in, n_out, kernel_size, stride, padding, bias=bias)
        self.m = nn.Parameter(torch.ones(n_out,1,1))
        self.a = nn.Parameter(torch.zeros(n_out,1,1))
        self.activation = activation
        
    def forward(self, x):
        x = self.conv(x)
        if self.activation:
            x = F.relu(x)
        x_chan = x.transpose(0, 1).contiguous().view(x.shape[1], -1)
#         print(x_chan.shape)
        if self.training:
            self.std = torch.std(x_chan, 1)[:, None, None]
            self.mean = torch.mean(x_chan, 1)[:, None, None]
        x = (x - self.mean) / self.std
        return x * self.m + self.a

In [14]:
class ConvBnNet(nn.Module):
    def __init__(self, layers, c):
        super().__init__()
#         self.conv1 = nn.Conv2d(3, 10, kernel_size=5, stride=1, padding=2)
        self.conv1 = BnLayer(3, 10, kernel_size=5, stride=1, padding=2, bias=True, activation=False)
#         having batch norm for first field doesn't seem to help or hurt when training model
        self.layers = nn.ModuleList([BnLayer(layers[i], layers[i + 1])
            for i in range(len(layers) - 1)])
        self.out = nn.Linear(layers[-1], c)
        
    def forward(self, x):
        x = self.conv1(x)
        for l in self.layers: x = l(x)
        x = F.adaptive_max_pool2d(x, 1)
        x = x.view(x.size(0), -1)
        return F.log_softmax(self.out(x), dim=-1)

In [15]:
learn = ConvLearner.from_model_data(ConvBnNet([10, 20, 40, 80, 160], 10), data)

In [16]:
learn.summary()

OrderedDict([('Conv2d-1',
              OrderedDict([('input_shape', [-1, 3, 32, 32]),
                           ('output_shape', [-1, 10, 32, 32]),
                           ('trainable', True),
                           ('nb_params', 760)])),
             ('BnLayer-2',
              OrderedDict([('input_shape', [-1, 3, 32, 32]),
                           ('output_shape', [-1, 10, 32, 32]),
                           ('nb_params', 0)])),
             ('Conv2d-3',
              OrderedDict([('input_shape', [-1, 10, 32, 32]),
                           ('output_shape', [-1, 20, 16, 16]),
                           ('trainable', True),
                           ('nb_params', 1800)])),
             ('BnLayer-4',
              OrderedDict([('input_shape', [-1, 10, 32, 32]),
                           ('output_shape', [-1, 20, 16, 16]),
                           ('nb_params', 0)])),
             ('Conv2d-5',
              OrderedDict([('input_shape', [-1, 20, 16, 16]),
               

In [17]:
%time learn.fit(3e-2, 2)

epoch      trn_loss   val_loss   accuracy                   
    0      1.488293   1.300101   0.538281  
    1      1.25174    1.134271   0.590625                   

CPU times: user 40.2 s, sys: 23.8 s, total: 1min 4s
Wall time: 43.1 s


[1.1342713, 0.590625]

In [18]:
%time learn.fit(1e-1, 4, cycle_len=1)

epoch      trn_loss   val_loss   accuracy                   
    0      1.124571   1.017611   0.644434  
    1      1.029502   0.92705    0.67002                    
    2      0.935734   0.87668    0.692676                    
    3      0.884671   0.819221   0.713867                    

CPU times: user 1min 18s, sys: 49.3 s, total: 2min 7s
Wall time: 1min 17s


[0.81922054, 0.7138671875]

## Deep BatchNorm

In [11]:
class ConvBnNet2(nn.Module):
    def __init__(self, layers, c):
        super().__init__()
        self.conv1 = BnLayer(3, 10, kernel_size=5, stride=1, padding=2, bias=True, activation=False)
        self.conv1_layers = nn.ModuleList([
            BnLayer(layers[i], layers[i+1], kernel_size=3, stride=2, padding=1, bias=False)
            for i in range(len(layers)-1)
        ])
        self.conv2_layers = nn.ModuleList([
            BnLayer(layers[i+1], layers[i+1], kernel_size=3, stride=1, padding=1, bias=False)
            for i in range(len(layers)-1)
        ])
        self.final = nn.Linear(layers[-1], c)
        
    def forward(self, x):
        x = self.conv1(x)
        for i in range(len(self.conv1_layers)):
#             print('Before conv:', x.shape)
            x = self.conv1_layers[i](x)
#             print('After conv1:', x.shape)
            x = self.conv2_layers[i](x)
#             print('After conv2:', x.shape)
        x = F.adaptive_avg_pool2d(x, (1))
        x = self.final(x.view(x.shape[0], -1))
        return F.log_softmax(x, dim=-1)

In [12]:
learn = ConvLearner.from_model_data(ConvBnNet2([10, 20, 40, 80, 160], 10), data)

In [13]:
%time learn.fit(1e-2, 2)

epoch      trn_loss   val_loss   accuracy                   
    0      1.454808   1.304778   0.524023  
    1      1.226852   1.129529   0.591504                   

CPU times: user 54.4 s, sys: 29 s, total: 1min 23s
Wall time: 51.3 s


[1.1295286, 0.59150390625]

In [14]:
%time learn.fit(1e-2, 2, cycle_len=1)

epoch      trn_loss   val_loss   accuracy                   
    0      1.040477   0.98441    0.648242  
    1      0.977695   0.905616   0.677734                    

CPU times: user 47.1 s, sys: 26.8 s, total: 1min 13s
Wall time: 45.9 s


[0.905616, 0.677734375]

## Resnet

In [60]:
class ResLayer(BnLayer):
    def forward(self, x):
        x_f = super().forward(x)
        return x + x_f

In [None]:
class Resnet(nn.Module):
    def __init__(self, layers, c):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 10, kernel_size=5, stride=1, padding=2)
        self.layers = nn.ModuleList([BnLayer(layers[i], layers[i+1])
            for i in range(len(layers) - 1)])
        self.layers2 = nn.ModuleList([ResnetLayer(layers[i+1], layers[i + 1], 1)
            for i in range(len(layers) - 1)])
        self.layers3 = nn.ModuleList([ResnetLayer(layers[i+1], layers[i + 1], 1)
            for i in range(len(layers) - 1)])
        self.out = nn.Linear(layers[-1], c)
        
    def forward(self, x):
        x = self.conv1(x)
        for l,l2,l3 in zip(self.layers, self.layers2, self.layers3):
            x = l3(l2(l(x)))
        x = F.adaptive_max_pool2d(x, 1)
        x = x.view(x.size(0), -1)
        return F.log_softmax(self.out(x), dim=-1)

In [None]:
learn = ConvLearner.from_model_data(Resnet([10, 20, 40, 80, 160], 10), data)

In [None]:
wd=1e-5

In [None]:
%time learn.fit(1e-2, 2, wds=wd)

In [None]:
%time learn.fit(1e-2, 3, cycle_len=1, cycle_mult=2, wds=wd)

In [None]:
%time learn.fit(1e-2, 8, cycle_len=4, wds=wd)

## Resnet 2

In [73]:
class Resnet2(nn.Module):
    def __init__(self, layers, c, p=0.5):
        super().__init__()
        self.conv1 = BnLayer(3, 16, kernel_size=7, stride=1)
        self.layer1 = nn.ModuleList([
            BnLayer(layers[i], layers[i+1])
            for i in range(len(layers)-1)
        ])
        self.layer2 = nn.ModuleList([
            ResLayer(layers[i+1], layers[i+1], stride=1)
            for i in range(len(layers)-1)
        ])
        self.layer3 = nn.ModuleList([
            ResLayer(layers[i+1], layers[i+1], stride=1)
            for i in range(len(layers)-1)
        ])
        self.final = nn.Linear(layers[-1], c)
        self.dropout = nn.Dropout(p)
        
    def forward(self, x):
        x = self.conv1(x)
        for f,g,h in zip(self.layer1, self.layer2, self.layer3):
            x = h(g(f(x)))
        x = F.adaptive_max_pool2d(x, 1)
        x = x.view(x.shape[0], -1)
        x = self.dropout(x)
        x = self.final(x)
        x = F.log_softmax(x, dim=-1)
        return x
        
            

In [74]:
learn = ConvLearner.from_model_data(Resnet2([16, 32, 64, 128, 256], 10, 0.2), data)

In [75]:
wd=1e-6

In [76]:
%time learn.fit(1e-2, 2, wds=wd)

epoch      trn_loss   val_loss   accuracy                   
    0      1.750364   3.615538   0.447949  
    1      1.497726   1.303117   0.526953                   

CPU times: user 45.2 s, sys: 24.9 s, total: 1min 10s
Wall time: 45.7 s


[1.3031166, 0.526953125]

In [None]:
%time learn.fit(1e-2, 3, cycle_len=1, cycle_mult=2, wds=wd)

epoch      trn_loss   val_loss   accuracy                   
    0      1.259316   1.153073   0.593262  
    1      1.209491   1.077703   0.619922                   
    2      1.054812   0.977194   0.654199                   
    3      1.108446   1.330272   0.59541                    
    4      0.986665   0.930528   0.675684                    
    5      0.875311   0.850411   0.697754                    
    6      0.822313   0.832844   0.708203                    

CPU times: user 2min 41s, sys: 1min 29s, total: 4min 10s
Wall time: 2min 43s


[0.83284426, 0.708203125]

In [None]:
%time learn.fit(1e-2, 8, cycle_len=4, wds=wd)

epoch      trn_loss   val_loss   accuracy                    
    0      0.947625   0.877477   0.690723  
    1      0.846274   0.828483   0.710449                    
    2      0.746697   0.717678   0.744531                    
    3      0.697601   0.736819   0.742969                    
    4      0.823708   0.779726   0.731934                    
    5      0.737899   0.709785   0.753613                    
    6      0.651845   0.65618    0.77334                     
    7      0.606265   0.666784   0.772559                    
    8      0.724141   0.737465   0.745996                    
    9      0.662189   0.660784   0.769336                    
    10     0.584376   0.60046    0.789746                    
    11     0.54263    0.594487   0.79043                     
    12     0.662779   0.778527   0.749219                    
    13     0.603221   0.651121   0.784277                    
    14     0.515879   0.581568   0.800781                    
    15     0.486071   0.56

In [None]:
learn.save('tmp3')

In [None]:
log_preds,y = learn.TTA()
preds = np.mean(np.exp(log_preds),0)

In [None]:
metrics.log_loss(y,preds), accuracy(preds,y)

### End