In [None]:
from pynq import Overlay
from pynq import allocate
import numpy as np
import math
import time

# Program the FPGA with the bit file
ol = Overlay("/home/xilinx/jupyter_notebooks/bit_files/Conv_no_opt.bit")

# Check the IPs in the overaly
ol.ip_dict

# Create an instance of the DMA and define functions for sending and receiving the data
dma = ol.axi_dma_0
dma_send = ol.axi_dma_0.sendchannel
dma_recv = ol.axi_dma_0.recvchannel


In [None]:

# Defining matrix and convolution parameters (All matrix has to be square)

image_size = 9       # 3x3
kernal_size = 9      # 3x3 
output_size = 9      

# Padding = 1, Stride = 1 (Can be change in Vivado_HLS)

# Giving the inputs in form of list
image = [1,2,3,4,5,6,7,8,9]
kernal = [-1,-2,-1,0,0,0,1,2,1]


In [None]:

# Defining buffers for streaming 
Image_buffer = allocate(shape=(image_size,), dtype=float)
Kernal_buffer = allocate(shape=(kernal_size,), dtype=float)
output_buffer = allocate(shape=(output_size,), dtype=float)


# Copying the input and kernal matrix elements into buffers transfer
for i in range(image_size):
    Image_buffer[i] = image[i]
for i in range(kernal_size):
    Kernal_buffer[i] = kernal[i]
    

In [None]:

# Checking if buffers are loaded correctly 

print('Image Matrix')
for i in range(image_size):
    print(Image_buffer[i])
    
print('Kernal Matrix')
for i in range(kernal_size):
    print(Kernal_buffer[i])
    

In [None]:

# Sending the inputs via AXI channel
start = time.time()

dma_send.transfer(Image_buffer)
dma_send.idle

dma_send.transfer(Kernal_buffer)
dma_send.idle


In [None]:

# Receive the output data from FPGA 
dma_recv.transfer(output_buffer)
dma.recvchannel.wait()

end = time.time()
fpga_run_time = end - start

In [None]:
print('Output Image')
for i in range(output_size):
    print(output_buffer[i])

In [None]:
# Performing convolution using python script

A_matrix = Image_buffer.reshape((int(math.sqrt(image_size)),int(math.sqrt(image_size))))
B_matrix = Kernal_buffer.reshape((int(math.sqrt(kernal_size)),int(math.sqrt(kernal_size))))
Output_Matrix = output_buffer.reshape((int(math.sqrt(output_size)),int(math.sqrt(output_size))))

def convolution(input_matrix, kernel_matrix, stride, padding):
    # Get dimensions of input and kernel matrices
    input_height, input_width = input_matrix.shape
    kernel_height, kernel_width = kernel_matrix.shape

    # Calculate output dimensions after considering padding
    output_height = (input_height - kernel_height + 2 * padding) // stride + 1
    output_width = (input_width - kernel_width + 2 * padding) // stride + 1

    # Pad the input matrix
    padded_input = np.pad(input_matrix, ((padding, padding), (padding, padding)), mode='constant')

    # Initialize the output matrix
    output_matrix = np.zeros((output_height, output_width))

    # Perform convolution
    for i in range(0, output_height):
        for j in range(0, output_width):
            output_matrix[i, j] = np.sum(padded_input[i*stride:i*stride+kernel_height, j*stride:j*stride+kernel_width] * kernel_matrix)

    return output_matrix


# Set stride and padding
stride = 1
padding = 1
    
# Perform convolution
start = time. time()

result = convolution(A_matrix,B_matrix, stride, padding)

end = time.time()
ps_run_time = end -start

print("Input Matrix:")
print(A_matrix)
print("\nKernel Matrix:")
print(B_matrix)
print("\nConvolution result by python script:")
print(result)


In [None]:
# Checking if both output matrix are equal

if np.array_equal(Output_Matrix, result):
    print("The matrices are equal.")
else:
    print("Both matrix doesnt match")

In [None]:
print('FPGA run time: ', fpga_run_time)
print('ARM PS run time: ', ps_run_time)