[View in Colaboratory](https://colab.research.google.com/github/clee1994/DLclass/blob/master/HW02_Convolution.ipynb)

In [99]:
import numpy as np
from scipy import signal as signal
import matplotlib.pyplot as plt
import time
import tensorflow  as tf

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [0]:
def ClemensConv(matrix, kernel):

  size = np.array(matrix.shape) + np.array(kernel.shape) - 1
  fsize = 2 ** np.ceil(np.log2(size)).astype(int)
  fslice = tuple([slice(0, int(sz)) for sz in size])



  new_x = np.fft.fft2(test_matrix , fsize)
  new_y = np.fft.fft2(test_kernel , fsize)

  result = np.fft.ifft2(new_x*new_y)[fslice]
  #in case you want to have look at the result, use the next line
  #result = np.round(np.array(result, dtype="float"),3)[1:-1,1:-1]

  return result


In [0]:
def tf2dConv(matrix_reshaped,kernel_reshaped, gpu=False):
  config = tf.ConfigProto(allow_soft_placement = True)
  config.gpu_options.allow_growth = True
  
  if gpu:
    device_string = '/gpu:0'
  else:
    device_string = '/cpu:0'

  with tf.device(device_string):
    
    input_matrix =  tf.Variable(matrix_reshaped.astype(float))
    kernel_tf = tf.constant(kernel_reshaped.astype(float))

    convoluted_matrix = tf.nn.convolution(input=input_matrix,filter=kernel_tf,strides=[1,1],padding="VALID")
    init = tf.global_variables_initializer()

    with tf.Session(config=config) as sess:
      sess.run(init)
      time1 = time.time()
      output_matrix = sess.run(convoluted_matrix)
      time2 = time.time()
  return output_matrix[0],(time2-time1)



In [0]:
def padWith(vector, pad_width, iaxis, kwargs):
  pad_value = kwargs.get('padder', 10)
  vector[:pad_width[0]] = pad_value
  vector[-pad_width[1]:] = pad_value
  return vector

In [0]:
def joshi2dpatch(patch,kernel):
  flippedKernel = np.fliplr(np.flipud(kernel))
  appliedKernel = np.multiply(patch,flippedKernel)
  outPixel = np.sum(np.sum(appliedKernel))
  
  return outPixel

In [0]:
def joshi2dConv(inMatrix,kernel):
  
  kSize = kernel.shape
  pSize = inMatrix.shape
  outputH = (pSize[0]-kSize[0])+1
  outputW = (pSize[1]-kSize[1])+1
  outputPatch = np.zeros([outputH,outputW])
  
  
  for outer in range(outputH):
    for inner in range(outputW):
      outputPatch[outer,inner] = joshi2dpatch(inMatrix[outer:(outer+kSize[0]),inner:(inner+kSize[1])],kernel) 
  
  return outputPatch

In [0]:
def runConvs(matrix, kernel):
  iters = 100
  
  
  mshape = matrix.shape
  kshape = kernel.shape
  temp_mat = np.repeat(matrix,iters)
  matrix_reshaped = temp_mat.reshape(iters, mshape[0], mshape[1], 1)
  kernel_reshaped = kernel.reshape(kshape[0], kshape[1], 1, 1)
  
  
  results = np.ones([iters,5])*np.nan
  
  
  #tensorflow with CPU/GPU
  testConvTf, tftime = tf2dConv(matrix_reshaped,kernel_reshaped,gpu=False)
  results[0,2] = 1/(tftime/100)
  testConvTf, tftime = tf2dConv(matrix_reshaped,kernel_reshaped,gpu=True)
  results[0,3] = 1/(tftime/100)
  
  for i in range(iters):
    #Joshi
    time1 = time.time()
    testConvJoshi = joshi2dConv(matrix,kernel)
    time2 = time.time()
    results[i,0] = 1/(time2-time1)
    
    #Clemens
    time1 = time.time()
    testConvClemens = ClemensConv(matrix,kernel)
    time2 = time.time()
    results[i,4] = 1/(time2-time1)

    #Scipy
    time1 = time.time()
    testConvScipy = signal.convolve2d(matrix,kernel,mode='valid')
    time2 = time.time()
    results[i,1] = 1/(time2-time1)
    


    
    
  
  print('Joshi Convolutions per second: {:.3f} +/- {:.3f}'.format(np.average(results[:,0]), np.std(results[:,0])))
  print('Clemens Convolutions per second: {:.3f} +/- {:.3f}'.format(np.average(results[:,4]), np.std(results[:,4])))
  print('Scipy Convolutions per second: {:.3f} +/- {:.3f}'.format(np.average(results[:,1]), np.std(results[:,1])))
  print('Tensorflow Convolutions per second: {:.3f} '.format(results[0,2]))
  print('Tensorflow with GPU Convolutions per second: {:.3f} '.format(results[0,3]))
  

In [107]:
if __name__=='__main__':
  
  ksizes = [3,5,7]
  msizes = [28,32]
 
  
  for i in ksizes:
    for j in msizes:
      print("# matrix size: {}, kernel size: {}".format(i,j))
      kernel = np.eye(i)
      matrix = np.random.randn(j,j)*100
      runConvs(matrix,kernel)
      print("-"*25)
  
  
  



# matrix size: 3, kernel size: 28
Joshi Convolutions per second: 115.961 +/- 3.951
Clemens Convolutions per second: 2155.646 +/- 203.192
Scipy Convolutions per second: 11133.053 +/- 2484.986
Tensorflow Convolutions per second: 401.494 
Tensorflow with GPU Convolutions per second: 351.871 
-------------------------
# matrix size: 3, kernel size: 32
Joshi Convolutions per second: 86.742 +/- 5.903
Clemens Convolutions per second: 1468.316 +/- 151.376
Scipy Convolutions per second: 9688.382 +/- 2048.841
Tensorflow Convolutions per second: 374.379 
Tensorflow with GPU Convolutions per second: 362.790 
-------------------------
# matrix size: 5, kernel size: 28
Joshi Convolutions per second: 132.751 +/- 10.366
Clemens Convolutions per second: 2206.460 +/- 215.957
Scipy Convolutions per second: 9995.557 +/- 1530.169
Tensorflow Convolutions per second: 363.513 
Tensorflow with GPU Convolutions per second: 373.767 
-------------------------
# matrix size: 5, kernel size: 32
Joshi Convolutions p