In [None]:
from pynq import PL
PL.reset() #important fixes caching issues which have popped up.
from pynq import Overlay  #import the overlay module
ol = Overlay('./design_1_wrapper.bit')  #locate/point to the bit file
import pprint
pprint.pprint(ol.ip_dict)
dma = ol.dma #because my DMA was named as "dma" this works
from pynq import Clocks
Clocks.fclk0_mhz = 100 # bring up to 100MHz in case it didn't start there (not sure why it won't)

In [None]:
import numpy as np
from scipy.signal import lfilter
import time
%matplotlib notebook
import matplotlib.pyplot as plt
from scipy.signal import lfilter
 
def plot_to_notebook(time_sec,in_signal,n_samples,out_signal=None):
    plt.figure()
    plt.subplot(1, 1, 1)
    plt.xlabel('Time (usec)')
    plt.grid()
    plt.plot(time_sec[:n_samples]*1e6,in_signal[:n_samples],'y-',label='Input signal')
    if out_signal is not None:
        plt.plot(time_sec[:n_samples]*1e6,out_signal[:n_samples],'g-',linewidth=2,label='Module output')
    plt.legend()
 
# Total time of signal
#just create 20 ms of signal in python for testing and demo:
T = 0.02
# Sampling frequency
fs = 100e6
# Number of samples
n = int(T * fs)
# Time vector in seconds
t = np.linspace(0, T, n, endpoint=False)
# Samples of the signal
samples = 10000*np.sin(0.2e6*2*np.pi*t) + 1500*np.cos(46e6*2*np.pi*t) + 2000*np.sin(20e6*2*np.pi*t)
# Convert samples to 32-bit integers
samples = samples.astype(np.int32)
print('Number of samples: ',len(samples))
 
# Plot signal to the notebook
plot_to_notebook(t,samples,1000)
 
 
#my fir coefficients...maybe your's are different.
#change if different!
coeffs = np.array([-2,-3,-4,0,9,21,32,36,32,21,9,0,-4,-3,-2])
start_time = time.time() #just before operation run
#For first part just do the simple math operation we wanted:
#this is numpy array so it'll know how to do this operation
#in a pretty efficient manner!
swresult = 3*samples+10000
 
#lfilter is a decently fast way to run a fir in python
#there's some sites that compare, but it is in the running for
#among the good ones/most efficient.
#remember despite this being "python", it is really just Python
#calling highly optimized C operations (numpy and scipyi) written
# by better people than you or I, so this is a pretty decent
#test of a what a computer can do on the SW side.
#first arg is zeroes coefficients (which correspond to the
#delay taps of the FIR (the feed-forward coefficients)
#second arg is the feedback coefficients (poles coefficients)
#That'll be just 1 here since we're an FIR
#Third argument is the samples.
#uncomment for part 2 (And comment out the earlier 3*samples+10000
#swresult = lfilter(coeffs, [1.0], samples)
 
stop_time = time.time() #just after operation run
sw_exec_time = stop_time - start_time
print('Software execution time: ',sw_exec_time)
 
# Plot the result to notebook
plot_to_notebook(t,samples,1000,out_signal=swresult)
 
#HARDWARE TIME
#now it is time to run on hardware:
from pynq import allocate
import numpy as np
 
# Allocate buffers for the input and output signals
in_buffer = allocate(shape=(n,), dtype=np.int32)
out_buffer = allocate(shape=(n,), dtype=np.int32)
 
# Copy the samples to the in_buffer
np.copyto(in_buffer,samples)
 
# Trigger the DMA transfer and wait for the result
start_time = time.time()
dma.sendchannel.transfer(in_buffer)
dma.recvchannel.transfer(out_buffer)
dma.sendchannel.wait()
dma.recvchannel.wait()
stop_time = time.time()
hw_exec_time = stop_time-start_time
print('Hardware execution time: ',hw_exec_time)
print('Hardware acceleration factor: ',sw_exec_time / hw_exec_time)
# Plot to the notebook
plot_to_notebook(t,samples,1000,out_signal=out_buffer)
 
# Free the buffers
in_buffer.close()
out_buffer.close()