In [1]:
using Flux, Flux.Optimise #deep learning framework
using MLDatasets # to load dataset 
using Functors

In [2]:
# load full training set
train_x, train_y = CIFAR10(split=:train)[1:45000]

# load the rest in training for validation
validate_x, validate_y = CIFAR10(split=:train)[45001:50000]

(features = [0.29411766 0.27058825 … 0.8117647 0.88235295; 0.26666668 0.23921569 … 0.8392157 0.8784314; … ; 0.4117647 0.36862746 … 0.8509804 0.8745098; 0.54901963 0.4627451 … 0.8901961 0.90588236;;; 0.19607843 0.19215687 … 0.6745098 0.7411765; 0.1764706 0.15686275 … 0.7058824 0.7411765; … ; 0.33333334 0.29411766 … 0.7607843 0.78039217; 0.4627451 0.3764706 … 0.8 0.8117647;;; 0.12156863 0.1254902 … 0.5529412 0.6156863; 0.12941177 0.11372549 … 0.5803922 0.6156863; … ; 0.28627452 0.23529412 … 0.61960787 0.627451; 0.45882353 0.3647059 … 0.65882355 0.65882355;;;; 0.7647059 0.8627451 … 0.29803923 0.2509804; 0.7411765 0.7411765 … 0.2627451 0.2; … ; 0.6901961 0.827451 … 0.4392157 0.4509804; 0.6392157 0.70980394 … 0.42352942 0.43137255;;; 0.7490196 0.85882354 … 0.27450982 0.23137255; 0.7294118 0.73333335 … 0.23529412 0.17254902; … ; 0.69411767 0.83137256 … 0.47843137 0.47058824; 0.6431373 0.7176471 … 0.4509804 0.45882353;;; 0.73333335 0.85490197 … 0.2627451 0.22745098; 0.70980394 0.7176471 … 0.2

## 2D Convolution in Flux


**Flux.Conv — Type** 

Conv(filter, in => out, σ = identity;
     stride = 1, pad = 0, dilation = 1, groups = 1, [bias, init])


Standard convolutional layer. _filter_ is a tuple of integers specifying the size of the convolutional kernel; _in_ and _out_ specify the number of input and output channels.

Image data should be stored in WHCN order (width, height, channels, batch). In other words, a 100×100 RGB image would be a 100×100×3×1 array, and a batch of 50 would be a 100×100×3×50 array. This has N = 2 spatial dimensions, and needs a kernel size like (5,5), a 2-tuple of integers.

To take convolutions along N feature dimensions, this layer expects as input an array with ndims(x) == N+2, where size(x, N+1) == in is the number of input channels, and size(x, ndims(x)) is (as always) the number of observations in a batch. Then:

- filter should be a tuple of N integers.
- Keywords stride and dilation should each be either single integer, or a tuple with N integers.
- Keyword pad specifies the number of elements added to the borders of the data array. It can be
    - a single integer for equal padding all around,
    - a tuple of N integers, to apply the same padding at begin/end of each spatial dimension,
    - a tuple of 2*N integers, for asymmetric padding, or
    - the singleton _SamePad()_, to calculate padding such that size(output,d) == size(x,d) / stride (possibly rounded) for each spatial dimension.
- Keyword groups is expected to be an Int. It specifies the number of groups to divide a convolution into.

Keywords to control initialization of the layer:

- init - Function used to generate initial weights. Defaults to glorot_uniform.
- bias - The initial bias vector is all zero by default. Trainable bias can be disabled entirely by setting this to false, or another vector can be provided such as bias = randn(Float32, out).
            
            
**Flux.Conv - Method**
_Conv(weight::AbstractArray, [bias, activation; stride, pad, dilation])_

Constructs a convolutional layer with the given weight and bias. Accepts the same keywords and has the same defaults as Conv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ; ...).

## ResNet Layer

In [3]:
mutable struct ResNetLayer
    conv1::Flux.Conv
    conv2::Flux.Conv
    bn1::Flux.BatchNorm
    bn2::Flux.BatchNorm
    f::Function
    in_channels::Int
    channels::Int
    stride1::Int
    stride2::Int
    pad1::Int
    pad2::Int
end

@functor ResNetLayer (conv1, conv2, bn1, bn2)


In [4]:
function residual_identity(layer::ResNetLayer, x::AbstractArray{T, 4}) where {T<:Number}
    (w, h, c, b) = size(x)
    stride = layer.stride1
    if stride > 1
        @assert ((w % stride == 0) & (h % stride == 0)) "Spatial dimensions are not divisible by `stride`"
    
        # Strided downsample
        x_id = copy(x[begin:2:end, begin:2:end, :, :])
    else
        x_id = x
    end

    channels = layer.channels
    in_channels = layer.in_channels
    if in_channels < channels
        # Zero padding on extra channels
        (w, h, c, b) = size(x_id)
        pad = zeros(w, h, channels - in_channels, b)
        x_id = cat(x_id, pad; dims=3)
    elseif in_channels > channels
        error("in_channels > out_channels not supported")
    end
    return x_id
end

residual_identity (generic function with 1 method)

In [5]:
function ResNetLayer(in_channels::Int, channels::Int; stride1=1, stride2=1, f=relu, pad1=0, pad2=0)
    bn1 = Flux.BatchNorm(in_channels)
    conv1 = Flux.Conv((3,3), in_channels=>channels, f; stride=stride1, pad=pad1)
    bn2 = Flux.BatchNorm(channels)
    conv2 = Flux.Conv((3,3), channels=>channels, f; stride=stride2, pad=pad2)

    return ResNetLayer(conv1, conv2, bn1, bn2, f, in_channels, channels, stride1, stride2, pad1, pad2)
end

ResNetLayer

In [6]:
function (self::ResNetLayer)(x::AbstractArray)
    identity = residual_identity(self, x)
    z = self.bn1(x)
    z = self.f(z)
    z = self.conv1(z)
    z = self.bn2(z)
    z = self.f(z)
    z = self.conv2(z)

    y = z + identity
    return y
end

In [8]:
l = ResNetLayer(3, 10; stride1=2, stride2=1, pad1=1, pad2=1);

In [9]:
x = randn(Float32, (64, 64, 3, 2));
y = l(x);
size(y)

(32, 32, 10, 2)