In [124]:
from scipy import signal as sig

# imports from custom library
import sys
sys.path.append('../../')
import matplotlib.pyplot as plt
plt.rc('text', usetex=True)
from mlrefined_libraries import convnets_library as convlib
from mlrefined_libraries import basics_library as baslib
from mlrefined_libraries import superlearn_library as superlearn
from mlrefined_libraries import multilayer_perceptron_library as network_lib

import autograd.numpy as np
from autograd import grad as compute_grad   

import autograd.numpy as np
import numpy as npo

import pandas as pd
import cv2
import csv
import pickle
import glob
import time
import copy
from datetime import datetime 

#this is needed to compensate for matplotlib notebook's tendancy to blow up images when plotted inline
%matplotlib notebook
from matplotlib import rcParams
rcParams['figure.autolayout'] = True

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## True convolution function

In [125]:
import scipy.signal as signal

def ScipyConv(image, kernel):
    
    # flip kernel
    kernel = np.flipud(np.fliplr(kernel))
    
    # compute convolution
    conv = signal.convolve2d(image, kernel, boundary='fill', fillvalue=0, mode='same')
    return conv  

# Slow but careful way - image version

In [67]:
# sliding window for image augmentation
def sliding_window_image(image, kernel, stride):
    windowed_image = []
    for i in np.arange(0, np.shape(image)[0]-kernel.shape[0]+1, stride):
        for j in np.arange(0, np.shape(image)[1]-kernel.shape[1]+1, stride):
             windowed_image.append(image[i:i+kernel.shape[0], j:j+kernel.shape[1]].flatten())
            
    return np.asarray(windowed_image)

# pad image with appropriate number of zeros for convolution
def pad_image(image,kernel_size):
    odd_nums = np.asarray([int(2*n + 1) for n in range(100)])
    val = kernel_size[0]
    pad_val = np.argwhere(odd_nums == val)[0][0]
    image_padded = np.zeros((np.shape(image) + 2*pad_val))
    image_padded[pad_val:-pad_val,pad_val:-pad_val] = image
    return image_padded          

In [4]:
# generate test image and kernel
image = np.random.randn(5,5)
kernel = np.ones((3,3))
kernel_size = kernel.shape

In [5]:
# pad image
padded_image = pad_image(image,kernel_size)

# window image
wind_img = sliding_window_image(padded_image,kernel,stride = 1)

# produce matrix multiplication convolution 
conv2 = np.dot(wind_img,kernel.flatten()[:,np.newaxis])

# reshape convolution into array
conv2.shape = (np.shape(image))

In [6]:
print(np.shape(image))
print(np.shape(padded_image))
print(np.shape(wind_img))
print(np.shape(conv2))

(5, 5)
(7, 7)
(25, 9)
(5, 5)


In [7]:
#### baseline convolution #####
conv1 = ScipyConv(image,kernel)

In [8]:
#### compare convolutions ####
error = np.linalg.norm(conv1 - conv2)
print (error)
print (np.shape(conv1),np.shape(conv2))

1.84611094726e-15
(5, 5) (5, 5)


In [9]:
# activation 
def activation(t):
    return np.maximum(0,t)

# output of activation
a_conv = activation(conv2)

In [10]:
# pool operation
kernel2 = np.ones((2,2))
wind_conv = sliding_window(a_conv,kernel2,stride = 2)

# max pooling
max_pool = np.max(wind_conv,axis = 1)

In [11]:
print (np.shape(wind_conv))
print (np.shape(max_pool))

(4, 4)
(4,)


In [12]:
# back to an image
max_pool.shape = (int(np.size(max_pool)**(0.5)),int(np.size(max_pool)**(0.5)))

In [13]:
np.shape(max_pool)

(2, 2)

# Tensor version

In [14]:
# sliding window for image augmentation
def sliding_window_tensor(tensor, kernel, stride):
    windowed_tensor = []
    for i in np.arange(0, np.shape(tensor)[1]-kernel.shape[0]+1, stride):
        for j in np.arange(0, np.shape(tensor)[2]-kernel.shape[1]+1, stride):
            sock = copy.deepcopy(tensor[:,i:i+kernel.shape[0], j:j+kernel.shape[1]])
            windowed_tensor.append(sock)
    
    # re-shape properly
    windowed_tensor = np.asarray(windowed_tensor)
    windowed_tensor = windowed_tensor.swapaxes(0,1)
    windowed_tensor = windowed_tensor.reshape(np.shape(windowed_tensor)[0]*np.shape(windowed_tensor)[1],np.shape(windowed_tensor)[2]*np.shape(windowed_tensor)[3])    
    return windowed_tensor

# pad image with appropriate number of zeros for convolution
def pad_tensor(tensor,kernel):
    odd_nums = np.asarray([int(2*n + 1) for n in range(100)])
    val = kernel.shape[0]
    pad_val = np.argwhere(odd_nums == val)[0][0]
    tensor_padded = np.zeros((np.shape(tensor)[0], np.shape(tensor)[1] + 2*pad_val,np.shape(tensor)[2] + 2*pad_val))
    tensor_padded[:,pad_val:-pad_val,pad_val:-pad_val] = tensor
    return tensor_padded    

In [15]:
# create test tensor
tensor = np.random.randn(3,5,5)
kernel = np.ones((3,3))

In [16]:
# pad tensor
padded_tensor = pad_tensor(tensor,kernel)

# window tensor
wind_tensor = sliding_window_tensor(padded_tensor,kernel,stride = 1)

# # produce matrix multiplication convolution 
conv2 = np.dot(wind_tensor,kernel.flatten()[:,np.newaxis])

# # reshape convolution into array
conv2.shape = (np.shape(tensor))

In [17]:
print(np.shape(tensor))
print(np.shape(padded_tensor))
print(np.shape(wind_tensor))
print(np.shape(conv2))

(3, 5, 5)
(3, 7, 7)
(75, 9)
(3, 5, 5)


In [18]:
#### baseline convolution #####
convs = []
for i in range(np.shape(tensor)[0]):
    conv1 = ScipyConv(tensor[i,:,:],kernel)
    convs.append(conv1)
convs = np.asarray(convs)

In [19]:
#### compare convolutions ####
error = np.linalg.norm(convs - conv2)
print (error)
print (np.shape(convs),np.shape(conv2))

2.53553677697e-15
(3, 5, 5) (3, 5, 5)


In [20]:
# output of activation
a_conv = activation(conv2)

In [21]:
# pool operation
kernel2 = np.ones((2,2))
wind_conv = sliding_window_tensor(a_conv,kernel2,stride = 2)

# max pooling
max_pool = np.max(wind_conv,axis = 1)

In [22]:
print (np.shape(wind_conv))
print (np.shape(max_pool))

(12, 4)
(12,)


In [23]:
# reshape into tensor
max_pool.shape = (np.shape(tensor)[0],int(np.shape(wind_conv)[1]**(0.5)),int(np.shape(wind_conv)[1]**(0.5)))

In [24]:
np.shape(max_pool)

(3, 2, 2)

# Test with tensor and multiple kernels

In [25]:
# create test tensor
tensor = np.random.randn(3,28,28)
kernels = np.random.randn(8,3,3)
kernel = kernels[0]

# pad tensor
padded_tensor = pad_tensor(tensor,kernel)

# window tensor
wind_tensor = sliding_window_tensor(padded_tensor,kernel,stride = 1)

In [26]:
#### baseline convolution #####
convs = []
for kernel in kernels:
    temp = []
    for i in range(np.shape(tensor)[0]):
        conv1 = ScipyConv(tensor[i,:,:],kernel)
        temp.append(conv1)
    temp = np.asarray(temp)
    convs.append(temp)
convs = np.asarray(convs)

##### use tensor calculation from above ######

startTime= datetime.now() 

conv2 = []
padded_tensor = pad_tensor(tensor,kernel)
wind_tensor = sliding_window_tensor(padded_tensor,kernel,stride = 1)
for kernel in kernels:
    # # produce matrix multiplication convolution 
    conv = np.dot(wind_tensor,kernel.flatten()[:,np.newaxis])

    # # reshape convolution into array
    conv.shape = (np.shape(tensor))
    conv = np.asarray(conv)
    conv2.append(conv)
conv2 = np.asarray(conv2)

timeElapsed=datetime.now()-startTime 

print('Time elpased (hh:mm:ss.ms) {}'.format(timeElapsed))

Time elpased (hh:mm:ss.ms) 0:00:00.008210


In [27]:
# # SUPER COMPACT VERSION
# # produce matrix multiplication convolution 
num_kernels = np.shape(kernels)[0]

startTime= datetime.now() 

conv3 = np.dot(wind_tensor,kernels.reshape(np.shape(kernels)[0],np.shape(kernels)[1]*np.shape(kernels)[2]).T).T

# # reshape convolution into array
a = np.shape(tensor)
a = list(a)
a.insert(0,num_kernels)
a = tuple(a)
conv3.shape = a

timeElapsed=datetime.now()-startTime 

print('Time elpased (hh:mm:ss.ms) {}'.format(timeElapsed))

Time elpased (hh:mm:ss.ms) 0:00:00.000375


In [28]:
#### compare convolutions  - either scipy or one kernel at a time ####
error = np.linalg.norm(conv2 - convs)
print (error)
print (np.shape(convs),np.shape(conv3))

5.95018652155e-14
(8, 3, 28, 28) (8, 3, 28, 28)


At the end of convolution we have `num_kernels` number of feature maps for our input data.

In [29]:
np.shape(conv2)

(8, 3, 28, 28)

Now to shove through activation and pool.

In [30]:
# output of activation
transformed_feature_maps = activation(conv2)

In [31]:
np.shape(transformed_feature_maps)

(8, 3, 28, 28)

In [32]:
# pool operation
kernel2 = np.ones((6,6))
stride = 3
new_tensors = []
for feature_map in transformed_feature_maps:
    # move over feature map and gather patches
    wind_conv = sliding_window_tensor(feature_map,kernel2,stride = stride)
    
    # max pool on each collected patch
    max_pool = np.max(wind_conv,axis = 1)
    
    # reshape into new tensor
    max_pool.shape = (np.shape(tensor)[0],int((np.shape(max_pool)[0]/float(np.shape(tensor)[0]))**(0.5)),int((np.shape(max_pool)[0]/float(np.shape(tensor)[0]))**(0.5)))
    
    # reshape into new downsampled pooled feature map
    new_tensors.append(max_pool)
    
# turn into array
new_tensors = np.asarray(new_tensors)

In [33]:
np.shape(new_tensors)

(8, 3, 8, 8)

Final step - as we feed into fully connected network component, make sure everything is reshaped correctly.  What the final output shape should be - one long vectorized sequence of 'feature maps' for each input image.  So here we are

In [34]:
# reshape into final feature vector to touch fully connected layer(s)
new_tensors = new_tensors.swapaxes(0,1)
new_tensors = new_tensors.reshape(np.shape(new_tensors)[0],np.shape(new_tensors)[1]*np.shape(new_tensors)[2]*np.shape(new_tensors)[3]).T
print (np.shape(new_tensors))

(512, 3)


All together we have.

In [35]:
# create test tensor
tensor = np.random.randn(3,28,28)
kernels = np.random.randn(8,3,3)
kernel = kernels[0]

# pad tensor
padded_tensor = pad_tensor(tensor,kernel)

# window tensor
wind_tensor = sliding_window_tensor(padded_tensor,kernel,stride = 1)

#### create convolution feature maps ####
feature_maps = np.dot(wind_tensor,kernels.reshape(np.shape(kernels)[0],np.shape(kernels)[1]*np.shape(kernels)[2]).T).T

# reshape feature maps back into arrays
shapes = np.shape(tensor)
shapes = list(shapes)
shapes.insert(0,num_kernels)
shapes = tuple(shapes)
feature_maps.shape = shapes

# push feature maps through activation
transformed_feature_maps = activation(feature_maps)

#### downsample via pooling ####
kernel2 = np.ones((6,6))
stride = 3
new_tensors = []
for feature_map in transformed_feature_maps:
    # move over feature map and gather patches
    wind_conv = sliding_window_tensor(feature_map,kernel2,stride = stride)
    
    # max pool on each collected patch
    max_pool = np.max(wind_conv,axis = 1)
    
    # reshape into new tensor
    max_pool.shape = (np.shape(tensor)[0],int((np.shape(max_pool)[0]/float(np.shape(tensor)[0]))**(0.5)),int((np.shape(max_pool)[0]/float(np.shape(tensor)[0]))**(0.5)))
    
    # reshape into new downsampled pooled feature map
    new_tensors.append(max_pool)
    
# turn into array
new_tensors = np.asarray(new_tensors)

# reshape into final feature vector to touch fully connected layer(s)
new_tensors = new_tensors.swapaxes(0,1)
new_tensors = new_tensors.reshape(np.shape(new_tensors)[0],np.shape(new_tensors)[1]*np.shape(new_tensors)[2]*np.shape(new_tensors)[3]).T
print (np.shape(new_tensors))

(512, 3)


# Test transformation on face images

Load in data, transform via original method, transform via new method, compare features to make sure everything looks good.

## Old way

In [36]:
 def load_data(csvname):
    # load in data
    datapath = '../../mlrefined_datasets/convnet_datasets/feat_face_data.csv'
    data = np.loadtxt(datapath,delimiter = ',')

    # import data and reshape appropriately
    X = data[:,:-1]
    y = data[:,-1]
    y.shape = (len(y),1)
    
    X_square = np.zeros((len(y),28,28))
    for i in range(0,len(y)):
        X_square[i,:,:] = np.reshape(X[i,:],(28,28),1)
    
    # pad data with ones for more compact gradient computation
    o = np.ones((np.shape(X)[0],1))
    X = np.concatenate((o,X),axis = 1)
    X = X.T
    
    return X,X_square,y

In [37]:
# load data
X,X_square, y = load_data('../../mlrefined_datasets/convnet_datasets/feat_face_data.csv')

In [113]:
# load kernels
kernels = convlib.image_viz.load_kernels()

# params
sliding_window_size = (6,6) 
stride=3
pooling_func= 'max'

# get number of images in the dataset
num_images = np.shape(X_square)[0]
        
# a test run to find the number of features with the params above
test = convlib.image_viz.make_feat(X_square[0,:,:], kernels, sliding_window_size=sliding_window_size, stride=stride)
num_features = np.shape(test)[0]

In [106]:
# start timer
startTime= datetime.now() 

# run old method
feat = []
for i in range(0,num_images):
    # extract features
    f = convlib.image_viz.make_feat(X_square[i,:,:], kernels, sliding_window_size=sliding_window_size,
                                            stride=stride, pooling_func=pooling_func) 
    # store it
    feat.append(f)
    
# convert to array
feat = np.asarray(feat)

# time for measurment
timeElapsed=datetime.now()-startTime 

print('Time elpased (hh:mm:ss.ms) {}'.format(timeElapsed))

Time elpased (hh:mm:ss.ms) 0:00:31.737743


## New way - non-compact image version

Re-make original exposing all looping structures etc.,

Far too slow - takes over a minute for just 4000 images - can't use.

In [114]:
# copy over test kernels
new_kernels = []
kernels = convlib.image_viz.load_kernels()
for ind, kernel in kernels.items():
    new_kernels.append(kernel)
new_kernels = np.asarray(new_kernels)
kernels = copy.deepcopy(new_kernels)

In [118]:
# assign tensor name
tensor = X_square[:4000]

In [119]:
# start timer
startTime = datetime.now() 

#### loop over each image, shove through filters and make feature maps, then downsample and pool
new_tensors = []
kernel_size = kernels[0].shape

#### loop over images
for image in tensor:
    # pad image with zeros
    padded_image = pad_image(image,kernel_size)

    #### loop over kernels and construct feature map for each kernel
    downsampled_feature_maps = []
    for kernel in kernels:
        # window image
        wind_img = sliding_window_image(padded_image,kernel,stride = 1)
    
        # make convolution feature map - via matrix multiplication over windowed tensor 
        feature_map = np.dot(wind_img,kernel.flatten()[:,np.newaxis])
        
        # reshape convolution feature map into array
        feature_map.shape = (np.shape(image))
        feature_map = np.asarray(feature_map)

        # now shove result through nonlinear activation
        feature_map = activation(feature_map)

        #### now pool / downsample feature map, first window then pool on each window
        wind_featmap = sliding_window_image(feature_map,kernel2,stride = stride)

        # max pool on each collected patch
        max_pool = np.max(wind_featmap,axis = 1)

        # reshape into new tensor
        max_pool.shape = (int((np.size(max_pool))**(0.5)),int((np.size(max_pool))**(0.5)))

        # reshape into new downsampled pooled feature map
        downsampled_feature_maps.append(max_pool)
        
    ## re-shape downsampled_feature_maps and store
    new_tensors.append(downsampled_feature_maps)

# reshape new tensor properly
new_tensors = np.reshape(new_tensors, (np.shape(new_tensors)[0],np.shape(new_tensors)[1],np.shape(new_tensors)[2]*np.shape(new_tensors)[3]))
new_tensors = np.reshape(new_tensors, (np.shape(new_tensors)[0],np.shape(new_tensors)[1]*np.shape(new_tensors)[2]),order = 'F')

# time for measurment
timeElapsed=datetime.now()-startTime 

print('Time elpased (hh:mm:ss.ms) {}'.format(timeElapsed))

Time elpased (hh:mm:ss.ms) 0:01:28.811969


In [109]:
np.linalg.norm(new_tensors - feat)

2.1149511218721878e-14

## New way - non-compact tensor version

#### A somewhat effecient version of the feature transform code that is still somewhat understandable, about 4 times faster than most naive version.

Convert kernel dictionary to tensor.

In [120]:
# copy over test kernels
new_kernels = []
kernels = convlib.image_viz.load_kernels()
for ind, kernel in kernels.items():
    new_kernels.append(kernel)
new_kernels = np.asarray(new_kernels)
kernels = copy.deepcopy(new_kernels)

# assign tensor name
tensor = X_square

Now run.

In [121]:
# start timer
startTime= datetime.now() 

# pad tensor
padded_tensor = pad_tensor(tensor,kernel)

# window tensor
wind_tensor = sliding_window_tensor(padded_tensor,kernel,stride = 1)

#### compute convolution feature maps / downsample via pooling one map at a time over entire tensor #####
kernel2 = np.ones((6,6))
stride = 3
new_tensors = []
for kernel in kernels:
    #### make convolution feature map - via matrix multiplication over windowed tensor 
    feature_map = np.dot(wind_tensor,kernel.flatten()[:,np.newaxis])

    # reshape convolution feature map into array
    feature_map.shape = (np.shape(tensor))
    feature_map = np.asarray(feature_map)
    
    # now shove result through nonlinear activation
    feature_map = activation(feature_map)
    
    #### now pool / downsample feature map, first window then pool on each window
    wind_featmap = sliding_window_tensor(feature_map,kernel2,stride = stride)
    
    # max pool on each collected patch
    max_pool = np.max(wind_featmap,axis = 1)
    
    # reshape into new tensor
    max_pool.shape = (np.shape(tensor)[0],int((np.shape(max_pool)[0]/float(np.shape(tensor)[0]))**(0.5)),int((np.shape(max_pool)[0]/float(np.shape(tensor)[0]))**(0.5)))

    # reshape into new downsampled pooled feature map
    new_tensors.append(max_pool)
    
# turn into array
new_tensors = np.asarray(new_tensors)

# reshape into final feature vector to touch fully connected layer(s), otherwise keep as is in terms of shape
new_tensors = new_tensors.swapaxes(0,1)
new_tensors = np.reshape(new_tensors, (np.shape(new_tensors)[0],np.shape(new_tensors)[1],np.shape(new_tensors)[2]*np.shape(new_tensors)[3]))
new_tensors = np.reshape(new_tensors, (np.shape(new_tensors)[0],np.shape(new_tensors)[1]*np.shape(new_tensors)[2]),order = 'F')

# time for measurment
timeElapsed=datetime.now()-startTime 

print('Time elpased (hh:mm:ss.ms) {}'.format(timeElapsed))

Time elpased (hh:mm:ss.ms) 0:00:07.618043


In [122]:
np.linalg.norm(new_tensors - feat)

2.1149511218721878e-14

## New way - compact tensor version

#### More compact version - all kernel multiplications done at once - strangely is a bit slower than the one above, all the re-shaping must not be worth it, at least for this instance

In [43]:
# start timer
startTime= datetime.now() 

# pad tensor
kernel = kernels[0]
num_kernels = np.shape(kernels)[0]
padded_tensor = pad_tensor(tensor,kernel)

# window tensor
wind_tensor = sliding_window_tensor(padded_tensor,kernel,stride = 1)

#### create convolution feature maps ####
feature_maps = np.dot(wind_tensor,kernels.reshape(np.shape(kernels)[0],np.shape(kernels)[1]*np.shape(kernels)[2]).T).T

# reshape feature maps back into arrays
shapes = np.shape(tensor)
shapes = list(shapes)
shapes.insert(0,num_kernels)
shapes = tuple(shapes)
feature_maps.shape = shapes

# push feature maps through activation
# activation 
def activation(t):
    return np.maximum(0,t)

transformed_feature_maps = activation(feature_maps)

#### downsample via pooling ####
kernel2 = np.ones((6,6))
stride = 3
new_tensors = []
for feature_map in transformed_feature_maps:    
    # move over feature map and gather patches
    wind_featmap = sliding_window_tensor(feature_map,kernel2,stride = stride)
    
    # max pool on each collected patch
    max_pool = np.max(wind_featmap,axis = 1)

    # reshape into new tensor
    max_pool.shape = (np.shape(tensor)[0],int((np.shape(max_pool)[0]/float(np.shape(tensor)[0]))**(0.5)),int((np.shape(max_pool)[0]/float(np.shape(tensor)[0]))**(0.5)))

    # reshape into new downsampled pooled feature map
    new_tensors.append(max_pool)
    
# turn into array
new_tensors = np.asarray(new_tensors)

# reshape into final feature vector to touch fully connected layer(s), otherwise keep as is in terms of shape
new_tensors = new_tensors.swapaxes(0,1)
new_tensors = np.reshape(new_tensors, (np.shape(new_tensors)[0],np.shape(new_tensors)[1],np.shape(new_tensors)[2]*np.shape(new_tensors)[3]))
new_tensors = np.reshape(new_tensors, (np.shape(new_tensors)[0],np.shape(new_tensors)[1]*np.shape(new_tensors)[2]),order = 'F')

# time for measurment
timeElapsed=datetime.now()-startTime 

print('Time elpased (hh:mm:ss.ms) {}'.format(timeElapsed))

Time elpased (hh:mm:ss.ms) 0:00:07.301212


In [45]:
np.linalg.norm(new_tensors - feat)

2.4085930089667896e-14

# Really compact version - push all tensors together for compute

##### to do: probably not necessary for now, but intellectually appealing 