# Machine Problem 3: NumPy CNN

### Import Libraries

In [1]:
import numpy as np
import os
import time
from scipy import signal
from imageio import imread
from random import shuffle
from matplotlib import pyplot as plt

%matplotlib inline

### Preprocessing Functions

In [2]:
# load_images
    # Read in images and makes a list for each set in the form: [images, labels]
    # images: np array with dims [N x img_height x img width x num_channels]
    # labels: np array with dims [N x 1]. elephant = 0, lionfish = 1
    #
    # Returns:  train_set: The list [train_images, train_labels]
    #           val_set: The list [val_images, val_labels] 

def load_images():
    
    sets = ['train', 'val']
    
    data_sets = []
    for dset in sets:
        img_path = './bin_dataset/' + dset + '/ele'
        ele_list = [imread(os.path.join(img_path, img)) for img in os.listdir(img_path)]

        img_path = './bin_dataset/' + dset + '/lio'
        lio_list = [imread(os.path.join(img_path, img)) for img in os.listdir(img_path)]

        set_images = np.stack(ele_list + lio_list)
        N = set_images.shape[0]
        labels = np.ones((N,1))
        labels[0:int(N/2)] = 0
        data_sets.append([set_images, labels])

    train_set, val_set = data_sets

    print("Loaded", len(train_set[0]), "training images")
    print("Loaded", len(val_set[0]), "validation images")
    
    return train_set, val_set



# batchify
    # Inputs:    train_set: List containing images and labels
    #            batch size: The desired size of each batch
    #
    # Returns:   image_batches: A list of shuffled training image batches, each with size batch_size
    #            label_batches: A list of shuffled training label batches, each with size batch_size 

# def batchify(train_set, batch_size):   
#     # YOUR CODE HERE
#     indices = np.linspace(0, len(train_set[0])-1, num=len(train_set[0]), dtype=np.int16)
#     shuffle(indices)
#     image_batches = []
#     label_batches = []
#     b = 0;
#     image_b = np.zeros((batch_size,len(train_set[0][0]),len(train_set[0][0][0]),len(train_set[0][0][0][0])))
#     for i in range(0,int(len(train_set[0])/batch_size)):
#         for x in range(0, len(train_set[0])):
#             for b in range(0, batch_size):
#                 image_b[b][:][:][:]=train_set[0][indices[x]]
#                 label_batches[b]=train_set[1][indices[x]]              
#         image_batches.append(np.array(image_b))
#         label_batches.append(np.array(label_b)) 
#     return image_batches, label_batches
def batchify(train_set, batch_size):
    
    # YOUR CODE HERE
    indices = np.linspace(0, len(train_set[0])-1, num=len(train_set[0]), dtype=np.int16)
    shuffle(indices)
    image_batches = []
    label_batches = []
    b = 0;
    image_b = []
    label_b = []
    for x in range(0, len(train_set[0])):
        image_b.append(train_set[0][indices[x]])
        label_b.append(train_set[1][indices[x]])
        if(b == batch_size - 1):
            image_batches.append(np.array(image_b))
            label_batches.append(np.array(label_b))
            b = 0
            image_b = []
            label_b = []
        else:
            b = b + 1
    image_batches.append(np.array(image_b))
    label_batches.append(np.array(label_b))  
    return image_batches, label_batches
def data_normalization(data_set):
    data, label = data_set
    data = data.astype(float)
    data /= 256.0
    return [data,label]


In [222]:
print('size of train_set:',len(train_set),len(train_set[0]),len(train_set[0][0]),len(train_set[0][0][0]),len(train_set[0][0][0][0]))
print('size of train_set:',len(train_set),len(train_set[1]),len(train_set[1][0]))


size of train_set: 2 2000 100 100 3
size of train_set: 2 2000 1


### Network Functions

#### Activation Functions

In [223]:
# relu
    # Inputs:   x: Multi-dimensional array with size N along the first axis
    # 
    # Returns:  out: Multi-dimensional array with same size of x 

def relu(x):
    
    # YOUR CODE HERE
    out = np.maximum(0,x)
    return out

# sigmoid
    # Inputs:    x: Multi-dimensional array with size N along the first axis
    # 
    # Returns:   out: Multi-dimensional array with same size of x 

def sigmoid(x):
    # YOUR CODE HERE
    out = x
    for i in range(0,len(x)):
        out[i]=1/(1+np.exp(-x[i]));
    return out


# unit_step
    # Inputs:    x: Multi-dimensional array with size N along the first axis 
    # 
    # Returns:   out: Multi-dimensional array with same size of x 

def unit_step(x):
    
    # YOUR CODE HERE
    out=np.heaviside(x,1)
    return out 
# temp= np.random.rand(5,5)-0.2
# out=unit_step(temp)
# print(out)

#### Layer Functions

In [3]:
# convolve2D
    # Inputs:    X: [N x height x width x num_channels]
    #            filters: [num_filters x filter_height x filter_width x num_input_channels]
    # 
    # Returns:   Xc: output array by convoling X and filters. [N x output_height x output_width x num_filters]

def convolve2D(X0, filters):
   
    N, X0_len, _, num_ch = X0.shape
    num_out_ch, filter_len, _, _ = filters.shape
    F0_side = X0_len - filter_len + 1
    F0 = np.zeros((N, F0_side, F0_side, num_out_ch))
    
    for n in range(N):
        for o_ch in range(num_out_ch):
            for ch in range(num_ch):
#                 print(temp.shape)
#                 print(F0[n][:,:,o_ch].shape)
#                 print(X0[n][:,:,ch].shape)
#                 print(filters[o_ch][:,:,ch].shape)
                F0[n,0:,0:,o_ch] += signal.convolve2d(X0[n,0:,0:,ch], filters[o_ch,0:,0:,ch],mode='valid')
                #print(F0[n][:][:][o_ch])
    return F0
# print((W0[0][:][:][0]))
# print(W0)
weights = np.load('weights.npz')
W0 = weights['W0']
W1 = weights['W1']
W2 = weights['W2']
F0=convolve2D(val_set[0], W0)
print(len(F0),len(F0[0]),len(F0[0][0]))
print(F0[0])

# maxPool
    # Inputs:    R0: [N x height x width x num_channels]
    #            mp_len: size of max pool window, also the stride for this MP
    # 
    # Returns:   p_out: output of pooling R0. [N x output_height x output_width x num_channels]
    #            R0_mask: A binary mask with the same size as R0. Indicates which index was chosen to be the max
    #            for each max pool window. This will be used for backpropagation.

def maxPool(R0, mp_len):

    N, R0_len, _, num_ch = R0.shape
    p_out_len = int((R0_len-mp_len)/mp_len + 1)

    R0_mask = np.zeros(R0.shape)
    p_out = np.zeros((N, p_out_len, p_out_len, num_ch))
    
    for n in range(N):
        for ch in range(num_ch):
            for row in range(p_out_len): 
                for col in range(p_out_len):
                    # YOUR CODE HERE
                    max = 0;
                    a_max = 0;
                    b_max = 0;
                    for a in range(mp_len*row,mp_len*(row+1)):
                        for b in range(mp_len*col, mp_len*(col+1)):
                            if (R0[n, a, b, ch] > max):
                                max_ = R0[n, a, b, ch]
                                a_max = a
                                b_max = b
                            if ((a == mp_len*(row+1) -1) and (b == mp_len*(col+1) -1)):
                                R0_mask[n, a_max, b_max, ch]=1
                                
                    p_out[n, row, col, ch]=max_

    return p_out, R0_mask
# fc
    # Inputs:    X: [N x num_input_features]
    #            W: [num_input_features x num_fc_nodes]
    # 
    # Returns:   out: Linear combination of X and W. [N x num_fc_nodes]

def fc(X, W):
    
    # YOUR CODE HERE
    out = np.matmul(X, W)
    print(X.shape,W.shape)
    return out

# flatten
    # Inputs:    X1: [N x output_height x output_width x num_channels]
    # Return:    f_out: [N x (output_height x output_width x num_channels)]
def flatten(X1):
    N, num_rows, num_cols, num_ch = X1.shape
    f_out = np.zeros((N, (num_rows * num_cols * num_ch)))
    for n in range(N):
        f_out[n] = X1[n].flatten()
    return f_out

NameError: name 'val_set' is not defined

#### CNN Functions

In [225]:
# cnn_fwd
    # Inputs:    X0: batch of images. [N x img_height x img_width x num_channels]
    #            W0, W1, W2: Parameters of the CNN
    #            mp_len: the length of one side of the max pool window
    # 
    # Returns:   sig: vector containing the output for each sample. [N x 1]
    #            cache: a dict containing the relevant output layer calculations that will be
    #            used in backpropagation
    
def cnn_fwd(X0, W0, W1, W2, mp_len):
    
    # F0 
    # YOUR CODE HERE
    F0 = convolve2D(X0, W0)
    # X1p 
    # YOUR CODE HERE
    R0 = relu(F0)
    X1p,R0_mask = maxPool(R0, mp_len)
    # X1 (flatten)
    # YOUR CODE HERE
    X1 = flatten(X1p)
    # FC Layers
    # YOUR CODE HERE
    F1 = fc(X1, W1)
    X2 = relu(F1)
    F2 = fc(X2,W2)
    sig = sigmoid(F2)
    # Output
    # YOUR CODE HERE
    
    # Save outputs of functions for backward pass
    cache = {
        "F0":F0,
        "R0":R0,
        "X1p":X1p,
        "R0m":R0_mask,
        "X1":X1,
        "F1":F1,
        "X2":X2,
        "F2":F2      
    }
    
    return sig, cache


# loss
    # Inputs:    sig: vector containing the CNN output for each sample. [N x 1]
    #            Y: vector containing the ground truth label for each sample. [N x 1]
    # 
    # Returns:   L: Loss/error criterion for the model. 

def loss(sig, Y):
    
    # YOUR CODE HERE
    N = len(sig)
    L = 0.0
    for i in range(N):
        L += (-Y[i])*np.log(sig[i])-(1.0-Y[i])*np.log(1.0-sig[i])
        #print(L,sig[i],Y[i])
    L=L/N
    return L


### Backprop Functions

In [6]:
# convolve2DBwd
    # Inputs:    X0: batch of images. [N x height x width x num_channels]
    #            dL_dF0: Gradient at the output of the conv layer. 
    # 
    # Returns:   dL_dW0. gradient of loss L wrt W0. Same size as W0

def convolve2DBwd(X0, dL_dF0):
    
    N, X0_len, _, num_ch = X0.shape
    _, dL_dF0_len, _, num_out_ch  = dL_dF0.shape
    filter_len = X0_len - dL_dF0_len + 1
    
    dL_dW0 = np.zeros((num_out_ch, filter_len, filter_len, num_ch))
    
    for n in range(N):
        for o_ch in range(num_out_ch):
            for ch in range(num_ch):
                # YOUR CODE HERE 
    
    return dL_dW0


# maxPoolBwd
    # Inputs:    dL_dX1p: Gradient at the output of the MaxPool layer
    #            R0_mask: A binary mask with the same size as R0. Defined in maxPool
    #            mp_len: the length of one side of the max pool window
    # 
    # Returns:   dL_dR0: Gradient at the output of ReLu
    
def maxPoolBwd(dL_dX1p, R0_mask,  mp_len):
    
    N, H, W, C = R0_mask.shape
    N, dH, dW, C = dL_dX1p.shape
    
    dL_dR0 = np.zeros(R0_mask.shape)
    
    for n in range(N):
        for ch in range(C):
            for row in range(dH):
                for col in range(dW):
                    # YOUR CODE HERE
                    
    return dL_dR0


# dL_dW2
    # Inputs:    Y: vector containing the ground truth label for each sample. [N x 1]
    #            cache: a dict containing the relevant output layer calculations 
    # 
    # Returns:   dL_dW2: Gradient of the Loss wrt W2
    
def dL_dW2(Y, cache):
   
    # YOUR CODE HERE
    
    return dL_dW2


# dL_dW1
    # Inputs:    Y: vector containing the ground truth label for each sample. [N x 1]
    #            W2: Weight matrix for the second FC layer
    #            cache: a dict containing the relevant output layer calculations 
    # 
    # Returns:   dL_dW1: Gradient of the Loss wrt W1
    
def dL_dW1(Y, W2, cache):
    
    # YOUR CODE HERE
    
    return dL_dW1


# dL_dW0
    # Inputs:    X0: batch of images. [N x height x width x num_channels]
    #            Y: vector containing the ground truth label for each sample. [N x 1]
    #            W1: Weight matrix for the first FC layer
    #            W2: Weight matrix for the second FC layer
    #            mp_len: the length of one side of the max pool window
    #            cache: a dict containing the relevant output layer calculations 
    # 
    # Returns:   dL_dW0: Gradient of the Loss wrt W0

def dL_dW0(X0, Y, W1, W2, mp_len, cache):
    
    N, X1p_len, _, no_out_ch  = cache['X1p'].shape
    F2 = cache['F2']
    F1 = cache['F1']
    R0m = cache['R0m']
    F0 = cache['F0']
    
    #dL_dF2
    # YOUR CODE HERE
    
    #dL_dF1
    # YOUR CODE HERE
    
    #dL_dX1
    # YOUR CODE HERE
    
    # dL_dX1p (unflatten)
    # YOUR CODE HERE
    
    # dL_dR0 (unpool)
    # YOUR CODE HERE
    
    # dL_dF0 (relu_bwd)
    # YOUR CODE HERE
    
    # dL_dW0
    # YOUR CODE HERE
    
    return dL_dW0
    
        

### Training

#### Load Images

In [4]:
# Load images and scale them
# YOUR CODE HERE
train_set, val_set= load_images()
print(val_set[0][0])
train_set=data_normalization(train_set)
val_set =data_normalization(val_set)

Loaded 2000 training images
Loaded 800 validation images
[[[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
  [218 236 248]
  [ 98  83  76]
  [ 98  74  72]]

 [[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
  [143 129 162]
  [122 103 123]
  [ 72  59 102]]

 [[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
  [155 133 119]
  [ 86  67  86]
  [124 102  81]]

 ...

 [[144 136 115]
  [115 102  96]
  [162 145 127]
  ...
  [172 168 133]
  [222 202 152]
  [140 111  71]]

 [[129 119 107]
  [200 175 134]
  [215 196 163]
  ...
  [187 163 119]
  [198 166 117]
  [163 124  85]]

 [[239 219 192]
  [236 212 174]
  [178 153 113]
  ...
  [249 240 207]
  [170 135 113]
  [190 166 128]]]


#### Config

In [227]:
# Hyperparameters
epochs = 20
lr = 0.1
batch_size = 16
filter_len = 5
num_out_ch = 3
mp_len = 12
fc_nodes = 2

# Declare weights
# YOUR CODE HERE


In [None]:
for i in range(epochs):
    
    # make set of batches
    # YOUR CODE HERE
    
    for b_idx in range(num_batches):
        X = img_batches[b_idx]
        Y = label_batches[b_idx]
        
        # Forward pass
        # YOUR CODE HERE
        
        # Calculate gradients
        # YOUR CODE HERE
        
        # Update gradients
        # YOUR CODE HERE
      

### Test Correctness of Forward and Backward Pass

#### Forward

In [228]:
weights = np.load('weights.npz')
W0 = weights['W0']
W1 = weights['W1']
W2 = weights['W2']

sig, cache = cnn_fwd(val_set[0], W0, W1, W2, mp_len)
train_acc = len(np.where(np.round(sig) == val_set[1])[0])/len(val_set[1])

print("train_loss:", loss(sig, val_set[1]), "train_acc:", train_acc)

(800, 192) (192, 2)
(800, 2) (2, 1)
train_loss: [0.55778204] train_acc: 0.7625


In [236]:

print(len(val_set),len(val_set[0]), len(val_set[0][0]),len(val_set[0][0][0]),len(val_set[0][0][0][0]))
#print(cache['F0'][0])
print(val_set[0][0])
print("train_loss:", loss(sig, val_set[1]))

2 800 100 100 3
[[[0.1875     0.23046875 0.16796875]
  [0.22265625 0.234375   0.25390625]
  [0.41015625 0.39453125 0.3984375 ]
  ...
  [0.78125    0.84375    0.79296875]
  [0.19140625 0.2578125  0.1953125 ]
  [0.5234375  0.58984375 0.52734375]]

 [[0.3828125  0.48046875 0.328125  ]
  [0.34765625 0.40234375 0.34375   ]
  [0.40625    0.4375     0.39453125]
  ...
  [0.62890625 0.69921875 0.64453125]
  [0.30859375 0.37890625 0.32421875]
  [0.43359375 0.50390625 0.44921875]]

 [[0.140625   0.26953125 0.0859375 ]
  [0.2421875  0.34765625 0.2265625 ]
  [0.30078125 0.375      0.296875  ]
  ...
  [0.24609375 0.32421875 0.27734375]
  [0.5        0.5859375  0.53515625]
  [0.35546875 0.44140625 0.39453125]]

 ...

 [[0.515625   0.68359375 0.50390625]
  [0.421875   0.6171875  0.41015625]
  [0.4140625  0.640625   0.40234375]
  ...
  [0.4765625  0.61328125 0.30078125]
  [0.5234375  0.66015625 0.33203125]
  [0.60546875 0.7421875  0.421875  ]]

 [[0.34765625 0.51953125 0.37890625]
  [0.4453125  0.62109

#### Backward

In [None]:
# Make backprop testing batch
X_bp = np.vstack([train_set[0][0:8,:,:,:], train_set[0][-9:-1,:,:,:]])
Y_bp = np.vstack([train_set[1][0:8], train_set[1][-9:-1]])

# Initialize weights to all ones
# YOUR CODE HERE

# Update weights once
# YOUR CODE HERE


print("W2 value:", np.sum(W2))
print("W1 value:", np.sum(W1))
print("W0 value:", np.sum(W0))