In [99]:
import numpy as np
import pickle
import gzip
import random
import matplotlib.pyplot as plt

# Convolutions

In [100]:


class conv():
    def __init__(self, H, W, C, N, K, R, S, stride):
        self.H = H
        self.W = W
        self.C = C
        self.N = N
        self.P = 0
        self.Q = 0
        self.K = K
        self.R = R
        self.S = S
        self.stride = stride

In [101]:
# conv(hight, width, input channels, minibatch, output channels, filter height, filter width, stride)
c = conv (32, 32, 8, 16, 64, 2, 2, 2)

In [102]:
ifm= np.random.rand(c.H, c.W, c.C, c.N)
filter = np.random.rand(c.R, c.S, c.C, c.K)

In [103]:
np.shape(ifm)

(32, 32, 8, 16)

In [104]:
np.shape(filter)

(2, 2, 8, 64)

In [105]:
def fprop_conv(c, ifm, filter):
    P = int(c.H/c.stride)
    Q = int(c.W/c.stride)
    ofm = np.empty([P, Q, c.K, c.N])
    for p in range(P):
        for q in range(Q):
            # ifm[p*self.stride:p*self.stride + r, q*self.stride:q*self.stride + s] = CxN
            # filter[0:r, 0:s] = CxK
            ofm[p, q] = np.dot(filter[0:c.R, 0:c.S].reshape(c.R*c.S*c.C, c.K).T, ifm[p*c.stride:p*c.stride + c.R, q*c.stride:q*c.stride + c.S].reshape(c.R*c.S*c.C, c.N))
    return ofm

In [106]:
ofm = fprop_conv(c,ifm, filter)
np.shape(ofm)

(16, 16, 64, 16)

## GEMM parallelization

In [107]:
tpus = [x for x in range(4)]
n = len(tpus)

### split on K

In [108]:
# conv(hight, width, input channels, minibatch, output channels, filter height, filter width, stride)
c = conv (32, 32, 8, 16, 64, 2, 2, 2)
ifm= np.random.rand(c.H, c.W, c.C, c.N)
filter = np.random.rand(c.R, c.S, c.C, c.K)
Kslice = int(c.K/n)
cKslice = conv (32, 32, 8, 16, Kslice, 2, 2, 2)

# filter = RSCxK -> (RSC)xK -> kx(RSC) -> K/n x(RSC) -> (RSC)xk/n -> RSCxK/n 
# note this is the same as np.take(filter,[0:K/n], axis = 3)
ofm = fprop_conv(cKslice, ifm, filter.reshape([c.R*c.S*c.C,c.K]).transpose()[0:Kslice].transpose().reshape([c.R,c.S,c.C,Kslice]))
for t in range(1,n):
    nofm = fprop_conv(cKslice, ifm, filter.reshape([c.R*c.S*c.C,c.K]).transpose()[t*Kslice:Kslice + t*Kslice].transpose().reshape([c.R,c.S,c.C,Kslice]))
    ofm = np.concatenate((ofm,nofm), axis=2)

print(np.shape(ofm))

(16, 16, 64, 16)


### split on N

In [109]:
# conv(hight, width, input channels, minibatch, output channels, filter height, filter width, stride)
c = conv (32, 32, 8, 16, 64, 2, 2, 2)
ifm= np.random.rand(c.H, c.W, c.C, c.N)
filter = np.random.rand(c.R, c.S, c.C, c.K)
Nslice = int(c.N/n)
cNslice = conv (32, 32, 8, Nslice, 64, 2, 2, 2)

# ifm = HWCxN -> (HWC)xN -> Nx(HWC) -> N/n x(HWC) -> (HWC)xN/n -> HWCxN/n 

ofm = fprop_conv(cNslice, ifm.reshape([c.H*c.W*c.C,c.N]).transpose()[0:Nslice].transpose().reshape([c.H,c.W,c.C,Nslice]), filter)
for t in range(1,n):
    nofm = fprop_conv(cNslice, ifm.reshape([c.H*c.W*c.C,c.N]).transpose()[t*Nslice:Nslice + t*Nslice].transpose().reshape([c.H,c.W,c.C,Nslice]), filter)
    ofm = np.concatenate((ofm,nofm), axis=3)

print(np.shape(ofm))

(16, 16, 64, 16)


### split on C

In [110]:
# conv(hight, width, input channels, minibatch, output channels, filter height, filter width, stride)
c = conv (32, 32, 8, 16, 64, 2, 2, 2)
ifm= np.random.rand(c.H, c.W, c.C, c.N)
filter = np.random.rand(c.R, c.S, c.C, c.K)
Cslice = int(c.C/n)
cNslice = conv (32, 32, Cslice, 16, 64, 2, 2, 2)

# use np.take() this time to simplify:

ofm = fprop_conv(cNslice, np.take(ifm, [x for x in range(0,Cslice)], axis=2), np.take(filter, [x for x in range(0,Cslice)], axis=2))
for t in range(1,n):
    ofm = np.add(ofm, fprop_conv(cNslice, np.take(ifm, [x for x in range(t*Cslice, (t+1)*Cslice)], axis=2), np.take(filter, [x for x in range(t*Cslice, (t+1)*Cslice)], axis=2)))

print(np.shape(ofm))

(16, 16, 64, 16)


# Depthwise separable convolutions

# softmax

In [111]:


def softmax(x):
    exps = np.exp(x)
    sum = np.sum(exps)
    return np.exp(x)/sum

c = conv (32, 32, 8, 16, 64, 2, 2, 2)
ofm = np.random.rand(c.H, c.W, c.C, c.N)


# fully connected layer: reduce channels and all pixels to  L classes
L = 10
fc_ofm = np.empty([L,c.N])
filter = np.random.rand(L,c.H*c.W*c.C)
ofm = ofm.reshape([c.H*c.W*c.C, c.N])

fc_ofm = np.dot(filter, ofm)
fc_ofm = fc_ofm/2000

print(np.shape(fc_ofm))


(10, 16)


In [112]:
sm = np.empty([c.N, L])

for i, n in enumerate(fc_ofm.T):
    sm[i] = np.sum(softmax(n))
    
print(np.shape(sm))

(16, 10)


# batchnorm

In [113]:

ofm = np.random.rand(c.H, c.W, c.C, c.N)
mean = np.empty([c.H, c.W, c.C])
for h in range(c.H):
    for w in range(c.W):
        for ch in range(c.C):
            mean[h,w,ch] = ofm[h,w,ch].mean()
print(np.shape(mean))

(32, 32, 8)


In [114]:
variance = np.empty([c.H, c.W, c.C])
for h in range(c.H):
    for w in range(c.W):
        for ch in range(c.C):
            variance[h,w,ch] = np.square(ofm[h,w,ch] - mean[h,w,ch]).mean()
print(np.shape(variance))

(32, 32, 8)


In [115]:
bn_ofm = np.random.rand(c.H, c.W, c.C, c.N)
gamma = 1
beta = 1
epsilon = 0
for h in range(c.H):
    for w in range(c.W):
        for ch in range(c.C):
            bn_ofm[h,w,ch] = gamma*np.divide(ofm[h,w,ch] - mean[h,w,ch], np.sqrt(variance[h,w,ch]-epsilon)) + beta
print(np.shape(bn_ofm))



(32, 32, 8, 16)


In [116]:
scale = np.divide(gamma, np.sqrt(variance-epsilon))
shift = -1*mean + (beta/gamma)*np.sqrt(variance-epsilon)

print(np.shape(scale))
print(np.shape(shift))


(32, 32, 8)
(32, 32, 8)


In [117]:
scale=scale.reshape([c.H*c.W*c.C,1])
shift=shift.reshape([c.H*c.W*c.C,1])
ofm=ofm.reshape([c.H*c.W*c.C, c.N])
print(np.shape(scale))
print(np.shape(shift))
bnss_ofm = np.empty([c.H*c.W*c.C, c.N])
np.add(ofm, shift)
bnss_ofm=bnss_ofm.reshape([c.H,c.W,c.C, c.N])
print(np.shape(bnss_ofm))


(8192, 1)
(8192, 1)
(32, 32, 8, 16)
