## Import necessary headers

In [17]:
from pynq import Overlay
from pynq import MMIO
from pynq import allocate
import random
import string
import numpy as np
import sys
import hashlib
import timeit
print("User Current Version:-", sys.version)

User Current Version:- 3.6.5 (default, Apr  1 2018, 05:46:30) 
[GCC 7.3.0]


In [18]:
Lab_Design = Overlay("./design_1.bit")
BRAM0_ADDR = Lab_Design.ip_dict['axi_bram_ctrl_0']['phys_addr']     # input bram 0
BRAM0_ADDR_range = Lab_Design.ip_dict['axi_bram_ctrl_0']['addr_range']
BRAM0_MMIO = MMIO(BRAM0_ADDR, BRAM0_ADDR_range)
BRAM1_ADDR = Lab_Design.ip_dict['axi_bram_ctrl_1']['phys_addr']     # input bram 1
BRAM1_ADDR_range = Lab_Design.ip_dict['axi_bram_ctrl_1']['addr_range']
BRAM1_MMIO = MMIO(BRAM1_ADDR, BRAM1_ADDR_range)
BRAM2_ADDR = Lab_Design.ip_dict['axi_bram_ctrl_2']['phys_addr']     # input bram 2
BRAM2_ADDR_range = Lab_Design.ip_dict['axi_bram_ctrl_2']['addr_range']
BRAM2_MMIO = MMIO(BRAM2_ADDR, BRAM2_ADDR_range)
BRAM3_ADDR = Lab_Design.ip_dict['axi_bram_ctrl_3']['phys_addr']     # input bram 3
BRAM3_ADDR_range = Lab_Design.ip_dict['axi_bram_ctrl_3']['addr_range']
BRAM3_MMIO = MMIO(BRAM3_ADDR, BRAM3_ADDR_range)
BRAM4_ADDR = Lab_Design.ip_dict['axi_bram_ctrl_4']['phys_addr']     # output bram
BRAM4_ADDR_range = Lab_Design.ip_dict['axi_bram_ctrl_4']['addr_range']
BRAM4_MMIO = MMIO(BRAM4_ADDR, BRAM4_ADDR_range)

GPIO0_ADDR = Lab_Design.ip_dict['axi_gpio_0']['phys_addr']           # size_ins
GPIO0_ADDR_range = Lab_Design.ip_dict['axi_gpio_0']['addr_range']
GPIO1_ADDR = Lab_Design.ip_dict['axi_gpio_1']['phys_addr']           # start / clear
GPIO1_ADDR_range = Lab_Design.ip_dict['axi_gpio_1']['addr_range']
GPIO2_ADDR = Lab_Design.ip_dict['axi_gpio_2']['phys_addr']           # valid
GPIO2_ADDR_range = Lab_Design.ip_dict['axi_gpio_2']['addr_range']
GPIO0_MMIO = MMIO(GPIO0_ADDR, GPIO0_ADDR_range)
GPIO1_MMIO = MMIO(GPIO1_ADDR, GPIO1_ADDR_range)
GPIO2_MMIO = MMIO(GPIO2_ADDR, GPIO2_ADDR_range)

bram_addrs = [BRAM0_ADDR, BRAM1_ADDR, BRAM2_ADDR, BRAM3_ADDR, BRAM4_ADDR]
bram_ranges = [BRAM0_ADDR_range, BRAM1_ADDR_range, BRAM2_ADDR_range, BRAM3_ADDR_range, BRAM4_ADDR_range]
bram_mmio = [BRAM0_MMIO, BRAM1_MMIO, BRAM2_MMIO, BRAM3_MMIO, BRAM4_MMIO]

## Generate input

In [19]:
random.seed(0)
def gen_input(num, one=False, mine=False):
    '''
    Generate num random inputs
    @Output rand_inputs: random generated ndarray of uint32
    @Output frags: number of fragments
    '''
    
    origin_inputs = []
    rand_inputs = []
    frags = []
    
    for i in range(num):
        if one:
            N = 1
            rand_bytes = [1,]
        elif mine:
            N = 10
            rand_bytes = [random.randint(0, sys.maxsize) for i in range(N)]
        else:
            N = random.randint(1, 998)
            rand_bytes = [random.randint(0, sys.maxsize) for i in range(N)]
        origin_inputs.append(np.array(rand_bytes.copy()))
        
        rand_bytes.append(0x80000000)
        zeros = 14 - (N + 1) % 16
        zeros = zeros if zeros >= 0 else zeros + 16
        rand_bytes += [0 for i in range(zeros)]
        rand_bytes.append((N * 32) // (2**32))
        rand_bytes.append((N * 32) % 2**32)
        
        rand_bytes = np.array(rand_bytes)
        
    
        rand_inputs.append(rand_bytes)
        frags.append((len(rand_bytes)) // 16)
    
    return rand_inputs, origin_inputs, frags


## Software SHA-256

In [20]:
k = np.array([
   0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
   0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
   0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
   0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
   0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
   0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
   0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
   0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2], dtype=np.uint32)

def rightRotate(n, d):
    return (n >> d) | (n << (32 - d)) & 0xFFFFFFFF

def sha_256(sha_input, frags):
    start = timeit.default_timer()
    acc = np.array([0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
                    0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19], dtype=np.uint32)
    for iteration in range(frags):
        w = []
        for i in range(16):
            w.append(sha_input[i + iteration * 16])
        for i in range(16, 64):
            s0 = rightRotate(w[i - 15], 7) ^ rightRotate(w[i - 15], 18) ^ (w[i-15] >> 3)
            s1 = rightRotate(w[i - 2], 17) ^ rightRotate(w[i - 2], 19) ^ (w[i-2] >> 10)
            w.append((w[i - 16] + s0 + w[i - 7] + s1) & 0xffffffff)
        #print(w)
        tmp = np.copy(acc)
        for i in range(64):
            s0 = rightRotate(tmp[0], 2) ^ rightRotate(tmp[0], 13) ^ rightRotate(tmp[0], 22)
            maj = (tmp[0] & tmp[1]) ^ (tmp[0] & tmp[2]) ^ (tmp[1] & tmp[2])
            t2 = s0 + maj
            s1 = rightRotate(tmp[4], 6) ^ rightRotate(tmp[4], 11) ^ rightRotate(tmp[4], 25)
            ch = (tmp[4] & tmp[5]) ^ (~tmp[4] & tmp[6])
            t1 = tmp[7] + s1 + ch + k[i] + w[i]
            
            tmp[7], tmp[6], tmp[5], tmp[4], tmp[3], tmp[2], tmp[1], tmp[0] = \
            tmp[6], tmp[5], tmp[4], (tmp[3] + t1) & 0xffffffff, tmp[2], tmp[1], tmp[0], (t1 + t2) & 0xffffffff
        acc += tmp
    return acc, timeit.default_timer() - start

## PL SHA-256 Functions

In [21]:
start_sigs = [0, 0, 0, 0]
sizes = [0, 0, 0, 0]
start_time = [0, 0, 0, 0]

total_time = 0
sv_time = 0

ready_q = []

def send_size_sig(n, fragments):
    
    sizes[n] = fragments - 1
    output = 0
    # [23:18] [17:12] [11:6] [5:0]
    # size3   size2   size1  size0
    for i in range(3, -1, -1):  # construct size signal (24-bit)
        output <<= 6
        output += sizes[i]
    GPIO0_MMIO.write(0, output)

def send_start_sig():
    output = 0
    # [3] [2] [1] [0]
    # s3  s2  s1  s0
    for i in range(3, -1, -1):
        output <<= 1
        output += start_sigs[i]
    GPIO1_MMIO.write(0, output)
    
def parse_valid_sig(valid):
    global sv_time
    has_valid = 0
    for i in range(4):
        x = valid & 0x1
        if (x == 1 and start_sigs[i] == 1):
            ready_q.append(i)  # i is ready for next input
            start_sigs[i] = 0
            
            has_valid = 1
            send_start_sig()
        valid >>= 1
    if has_valid:
        sv_time += (timeit.default_timer() - start_time[ready_q[len(ready_q)-1]])
    return has_valid

def get_hash_result(n, mine=False):
    base_addr = 32 * n
    output = ''
    for i in range(0, 32, 4):
        x = bram_mmio[4].read(base_addr + i)
        output += hex(x)[2:].zfill(8)
        if mine and i == 0 and output[:2] != "00":
            return
    print(output)

## Test Correctness

In [22]:
num = 1
inputs, origins, frags = gen_input(num, one = True)
total_time = 0
print("Python calculation result:")
for i in range(len(frags)):
    hash_val, interval = sha_256(inputs[i], frags[i])
    output = ''
    for i in range(8):
        output += str(hex(hash_val[i]))[2:].zfill(8)
    print(output)
    total_time += interval
print("Time elapsed: ", total_time)

print("\nFPGA calculation result:")
cur_input = 0
finish_cnt = 0
start  = timeit.default_timer()
for i in range(4):  # first time filling 4 banks of bram
    for j in range(len(inputs[cur_input])): 
        bram_mmio[i].write(j * 4, int(inputs[cur_input][j]))
    start_sigs[i] = 1
    start_time[i] = timeit.default_timer()
    send_size_sig(i, frags[cur_input])
    send_start_sig()
    cur_input += 1
    if cur_input >= num:
        while (finish_cnt < num):
            while (not parse_valid_sig(GPIO2_MMIO.read(0))):
                continue
            for i in ready_q:
                get_hash_result(i)
                finish_cnt += 1
            ready_q = []
        break;


while (cur_input < num):
    while (not parse_valid_sig(GPIO2_MMIO.read(0))):
        continue
    for i in ready_q:
        get_hash_result(i)
        finish_cnt += 1
        if (cur_input < num):
            for j in range(len(inputs[cur_input])): 
                bram_mmio[i].write(j * 4, int(inputs[cur_input][j]))
            start_sigs[i] = 1
            start_time[i] = timeit.default_timer()
            send_size_sig(i, frags[cur_input])
            send_start_sig()
            cur_input += 1
    ready_q = []

while (finish_cnt < num):
    while (not parse_valid_sig(GPIO2_MMIO.read(0))):
        continue
    for i in ready_q:
        get_hash_result(i)
        finish_cnt += 1

print("Time elapsed:", timeit.default_timer() - start)

Python calculation result:
b40711a88c7039756fb8a73827eabe2c0fe5a0346ca7e0a104adc0fc764f528d
Time elapsed:  0.06890017699879536

FPGA calculation result:
b40711a88c7039756fb8a73827eabe2c0fe5a0346ca7e0a104adc0fc764f528d
Time elapsed: 0.006471335998867289


## Test Performance

In [23]:
num = 20
inputs, origins, frags = gen_input(num)
total_time = 0
print("Python calculation result:")
for i in range(len(frags)):
    hash_val, interval = sha_256(inputs[i], frags[i])
    output = ''
    for i in range(8):
        output += str(hex(hash_val[i]))[2:].zfill(8)
    print(output)
    total_time += interval
print("Time elapsed: ", total_time)

print("\nFPGA calculation result:")
cur_input = 0
finish_cnt = 0
start  = timeit.default_timer()
for i in range(4):  # first time filling 4 banks of bram
    for j in range(len(inputs[cur_input])): 
        bram_mmio[i].write(j * 4, int(inputs[cur_input][j]))
    start_sigs[i] = 1
    start_time[i] = timeit.default_timer()
    send_size_sig(i, frags[cur_input])
    send_start_sig()
    cur_input += 1
    if cur_input >= num:
        while (finish_cnt < num):
            while (not parse_valid_sig(GPIO2_MMIO.read(0))):
                continue
            for i in ready_q:
                get_hash_result(i)
                finish_cnt += 1
            ready_q = []
        break;


while (cur_input < num):
    while (not parse_valid_sig(GPIO2_MMIO.read(0))):
        continue
    for i in ready_q:
        get_hash_result(i)
        finish_cnt += 1
        if (cur_input < num):
            for j in range(len(inputs[cur_input])): 
                bram_mmio[i].write(j * 4, int(inputs[cur_input][j]))
            start_sigs[i] = 1
            start_time[i] = timeit.default_timer()
            send_size_sig(i, frags[cur_input])
            send_start_sig()
            cur_input += 1
    ready_q = []

while (finish_cnt < num):
    while (not parse_valid_sig(GPIO2_MMIO.read(0))):
        continue
    for i in ready_q:
        get_hash_result(i)
        finish_cnt += 1

print("Time elapsed:", timeit.default_timer() - start)

Python calculation result:
a339a6729f75f637993e76fff72f4af9cb3fc3ebd07dc66bc2c1579d648d5af8
b167fc0f221a4110d89786c733bad58752f9a2c23fa28bfe10a9b5d79a75d351
9349b95fa9770630d08acd3533525221fa91ff45dcb125699c2373194c4e7b1f
95dd37526b7b10700ebfa7257e3bfda5d1b62202ca93988da2ce455d5091328a
c765e586499a959f9f785001e751eb5f6abd532173ab9b4c188dbbfb95878db7
c4d08435b93b2292584f8850b22a766d731d67ac2781859be91109f5e435e177
1fc97d04abc1a769a5313254af5b80ce91613f166ff465f4048ae9ed298537ec
1ec829bef6bf7a9c9c87249d172b855d73950aec1d00b15e02dc2b3f2f44b61a
f727c8db6877fbe0f7d0135d391ed5dde11aa289f94676442569a90c952020e6
fddb79e684ec78cc73f6dbe9421f246bca6892aed133ea72307cad0f188e8e1c
4c676b14b688f13245667c8000b80cab02c2871bdf4f36423ff4c3f2d4088a02
3e5565bf02faecd9ecc1ea457692724ab3b78c4b8fc46b902e9aa274bff6636a
38754bd8c1a7117ab7f0ff8178f18b7daf6ff1f35985a3890c8c72519c490ad3
7cc69dda16528ce07f2668e171f99a4b0e5302a586daaacf83a3ed7f10284bd1
5a9abfa3b1cc1dd2b9e1053551b15b739f9a1d3435ab3bb58041bd9231a0c99

##  PoW Testing

In [24]:
num = 5000
inputs, origins, frags = gen_input(num, mine=True)
print("FPGA calculation result:")
cur_input = 0
finish_cnt = 0
start  = timeit.default_timer()
for i in range(4):  # first time filling 4 banks of bram
    for j in range(len(inputs[cur_input])): 
        bram_mmio[i].write(j * 4, int(inputs[cur_input][j]))
    start_sigs[i] = 1
    start_time[i] = timeit.default_timer()
    send_size_sig(i, frags[cur_input])
    send_start_sig()
    cur_input += 1
    if cur_input >= num:
        while (finish_cnt < num):
            while (not parse_valid_sig(GPIO2_MMIO.read(0))):
                continue
            for i in ready_q:
                get_hash_result(i)
                finish_cnt += 1
            ready_q = []
        break;


while (cur_input < num):
    while (not parse_valid_sig(GPIO2_MMIO.read(0))):
        continue
    for i in ready_q:
        get_hash_result(i, mine=True)
        finish_cnt += 1
        if (cur_input < num):
            for j in range(len(inputs[cur_input])): 
                bram_mmio[i].write(j * 4, int(inputs[cur_input][j]))
            start_sigs[i] = 1
            start_time[i] = timeit.default_timer()
            send_size_sig(i, frags[cur_input])
            send_start_sig()
            cur_input += 1
    ready_q = []

while (finish_cnt < num):
    while (not parse_valid_sig(GPIO2_MMIO.read(0))):
        continue
    for i in ready_q:
        get_hash_result(i,mine=True)
        finish_cnt += 1

print("Time elapsed:", timeit.default_timer() - start)

FPGA calculation result:
0078ff9559687873a7392b0d6ca772bbc66084c4c2a5abae9c563ada6a1869c7
00d9cb11f75ddc4fc578d9f17df8936aa086f541a3aeaea69ef8b7488cc1eff3
00cc0cbf087f205dfc512f7a830ae797bdf0fd9222c6d1e85a1d41320b429279
009d07ea624152c7988a8ce9fb6e7b2d14582e90b9757433c467329e96b150f7
003ab42992d98636e3d4e0cd4ebf2325c1689d0daa6f47e5b1c699c4ba339de5
002efd9b948f6dbd342fabe40d31b84b0cbdb92e94de2a5086a3cd4b702c343f
001a6e83a39d37f5eaa7227ff262be96e4dfc9f660f1e1140e5737b4b942080a
00839b45660fff295dcfa8850f5822755cae6839aabc40cee75cdcf6171b37d2
003e8cd64ed5e7dc4ca6015ec74fb3ec3917c4dbab2c288c151117d4fb66eb70
001c2800449f503a913519f8d522df82103f90dae0b1c6355629dc272420b48e
00b208d3d3ee3166190d15d15224c98220c6734677d38782b578ce6f22f99b4c
00a01c682e0abd9b019441588c249021dd96e5664635f2c8e188be8fd5fe8059
009cc6ccb827ac97f9413904e78cdfd12c5e2c066803f52096ae746a413e6920
00f3fd4ac7ff054f62c45c0206cb96714a2aa49ab3c9ea3fc7e917369d29ea2a
00ad9fd2b63c1a6eaff3b3e7d9d12c32c6905c39f0284ccc25232ff2cb708def
