In [251]:
import numpy as np
class memory_model:
    def __init__(self, size=4, copy=None):
        if copy is None:
            # randomize data
            self.data = np.random.randint(low=0, high=2, size=size)
        else:
            # copy data from another memory
            self.data = np.copy(copy.data)
        self.already_wrote_this_clock = False
        self.already_read_this_clock = False
        self.read_accesses = 0
    def write(self, addr, data):
        # validate that any individual memory is not written more than once per clock
        assert not self.already_wrote_this_clock
        self.data[addr] = data
        self.already_wrote_this_clock = True
    def read(self, addr):
        # validate that any individual memory is not read more than once per clock
        assert self.read_accesses == 0
        self.read_accesses += 1
        return self.data[addr]
    def observe(self, addr):
        # fake read, do not use for hardware modeling, can be used for observing/printing state in sim
        return self.data[addr]
    def clock(self): # clear assertion tracking info
        self.already_wrote_this_clock = False
        self.read_accesses = 0


Memories are ordered like:

X0Y0(A) X1Y0(B) X2Y0(C) \\
X0Y1(D) X1Y1(E) X2Y1(F) \\
X0Y2(G) X1Y2(H) X2Y2(I)

A full screen is arranged like:

ABCABC... \\ 
DEFDEF \\
GHIGHI \\
ABCABC \\
DEFDEF \\
GHIGHI

Each memory in the array can be read once per clock and written once per clock. This pixel arrangement means that for any 3x3 window on the screen, no single memory is read more than once per clock, and we can read all 9 of the pixels in the window every clock.

Two of these memories are implemented, so that one can stream results to the other, effectively a double buffer. After a full frame pass is complete, the buffers are switched so that the result is then fed back into the computation.

In [391]:
# golden patterns for control counters, hand-coded for a 6x6 screen.

# the submemory for the center pixel of the conway calculation
center_gold = np.array(
              [[0,0], [0,1], [0,2], [0,0], [0,1], [0,2],
               [1,0], [1,1], [1,2], [1,0], [1,1], [1,2], 
               [2,0], [2,1], [2,2], [2,0], [2,1], [2,2], 
               [0,0], [0,1], [0,2], [0,0], [0,1], [0,2], 
               [1,0], [1,1], [1,2], [1,0], [1,1], [1,2], 
               [2,0], [2,1], [2,2], [2,0], [2,1], [2,2]]
)

# the subaddress of the center pixel of the conway calculation, within its memory
write_gold = np.array(
             [[[0,    None, None, 1,    None, None,   None, None, None, None, None, None,   None, None, None, None, None, None,   2,    None, None, 3,    None, None,   None, None, None, None, None, None,   None, None, None, None, None, None],
               [None, 0,    None, None, 1,    None,   None, None, None, None, None, None,   None, None, None, None, None, None,   None, 2,    None, None, 3,    None,   None, None, None, None, None, None,   None, None, None, None, None, None],
               [None, None, 0,    None, None, 1,      None, None, None, None, None, None,   None, None, None, None, None, None,   None, None, 2,    None, None, 3,      None, None, None, None, None, None,   None, None, None, None, None, None]],
              [[None, None, None, None, None, None,   0,    None, None, 1,    None, None,   None, None, None, None, None, None,   None, None, None, None, None, None,   2,    None, None, 3,    None, None,   None, None, None, None, None, None],
               [None, None, None, None, None, None,   None, 0,    None, None, 1,    None,   None, None, None, None, None, None,   None, None, None, None, None, None,   None, 2,    None, None, 3,    None,   None, None, None, None, None, None],
               [None, None, None, None, None, None,   None, None, 0,    None, None, 1,      None, None, None, None, None, None,   None, None, None, None, None, None,   None, None, 2,    None, None, 3,      None, None, None, None, None, None]],
              [[None, None, None, None, None, None,   None, None, None, None, None, None,   0,    None, None, 1,    None, None,   None, None, None, None, None, None,   None, None, None, None, None, None,   2,    None, None, 3,    None, None],
               [None, None, None, None, None, None,   None, None, None, None, None, None,   None, 0,    None, None, 1,    None,   None, None, None, None, None, None,   None, None, None, None, None, None,   None, 2,    None, None, 3,    None],
               [None, None, None, None, None, None,   None, None, None, None, None, None,   None, None, 0,    None, None, 1,      None, None, None, None, None, None,   None, None, None, None, None, None,   None, None, 2,    None, None, 3   ]]]
)

write_enable_gold = []
for y in range(len(write_gold)):
    write_enable_gold.append([])
    for x in range(len(write_gold[y])):
        write_enable_gold[y].append([])
        for i in range(len(write_gold[y][x])):
            write_enable_gold[y][x].append(not write_gold[y,x,i] is None)
write_enable_gold = np.array(write_enable_gold)

# the subaddress of all pixels which are to be read for the conway calculation, within each memory
read_gold = np.array(
            [[[0,    0,    1,    1,    1,    None,   0,    0,    1,    1,    1,    None,   2,    2,    3,    3,    3,    None,   2,    2,    3,    3,    3,    None,   2,    2,    3,    3,    3,    None,   None, None, None, None, None, None],
              [0,    0,    0,    1,    1,    1,      0,    0,    0,    1,    1,    1,      2,    2,    2,    3,    3,    3,      2,    2,    2,    3,    3,    3,      2,    2,    2,    3,    3,    3,      None, None, None, None, None, None],
              [None, 0,    0,    0,    1,    1,      None, 0,    0,    0,    1,    1,      None, 2,    2,    2,    3,    3,      None, 2,    2,    2,    3,    3,      None, 2,    2,    2,    3,    3,      None, None, None, None, None, None]],
             [[0,    0,    1,    1,    1,    None,   0,    0,    1,    1,    1,    None,   0,    0,    1,    1,    1,    None,   2,    2,    3,    3,    3,    None,   2,    2,    3,    3,    3,    None,   2,    2,    3,    3,    3,    None],
              [0,    0,    0,    1,    1,    1,      0,    0,    0,    1,    1,    1,      0,    0,    0,    1,    1,    1,      2,    2,    2,    3,    3,    3,      2,    2,    2,    3,    3,    3,      2,    2,    2,    3,    3,    3   ],
              [None, 0,    0,    0,    1,    1,      None, 0,    0,    0,    1,    1,      None, 0,    0,    0,    1,    1,      None, 2,    2,    2,    3,    3,      None, 2,    2,    2,    3,    3,      None, 2,    2,    2,    3,    3   ]],
             [[None, None, None, None, None, None,   0,    0,    1,    1,    1,    None,   0,    0,    1,    1,    1,    None,   0,    0,    1,    1,    1,    None,   2,    2,    3,    3,    3,    None,   2,    2,    3,    3,    3,    None],
              [None, None, None, None, None, None,   0,    0,    0,    1,    1,    1,      0,    0,    0,    1,    1,    1,      0,    0,    0,    1,    1,    1,      2,    2,    2,    3,    3,    3,      2,    2,    2,    3,    3,    3   ],
              [None, None, None, None, None, None,   None, 0,    0,    0,    1,    1,      None, 0,    0,    0,    1,    1,      None, 0,    0,    0,    1,    1,      None, 2,    2,    2,    3,    3,      None, 2,    2,    2,    3,    3   ]]]
)

read_enable_gold = []
for y in range(len(read_gold)):
    read_enable_gold.append([])
    for x in range(len(read_gold[y])):
        read_enable_gold[y].append([])
        for i in range(len(read_enable_gold[y][x])):
            read_enable_gold[y][x].append(not read_gold[y,x,i] is None)
read_enable_gold = np.array(read_enable_gold)

In [392]:
# base class for hardware models. use updated flag and stored last_state to allow any-order updates when one model depends on another. state can store whatever
class hardware_model:
    def __init__(self, name):
        self.name = name
        self.updated = False
    def get_state(self):
        if self.updated:
            return self.last_state
        else:
            return self.state
    def update(self):
        if self.updated:
            return
        self.updated = True
        self.last_state = self.state
        self.update_state()
    def clock(self):
        self.updated = False

# counter model implements terminal count output, enable input, and count output. max_value parameter. do nothing when not enabled, else increment by one each clock cycle, resetting when one less than max_value is achieved.
# note: inputs are wired up via lambda callbacks so that they can be defined at initialization
class counter_model(hardware_model):
    def __init__(self, name, max_value, f_get_enable, f_get_clear, increment = 1, reset_value = 0):
        super(counter_model, self).__init__(name)
        self.max_value = max_value
        self.state = reset_value
        self.updated = False
        self.f_get_enable = f_get_enable
        self.f_get_clear = f_get_clear
        self.increment = increment
    def update_state(self):
        if self.f_get_enable():
            if self.f_get_clear():
                self.state = 0
            else:
                self.state = (self.state + self.increment) if (self.state + self.increment < self.max_value) else 0
    def get_count(self):
        return self.get_state()
    def get_carry(self):
        # print(f'{self.name}.get_carry = {state}, {state + 1 >= self.max_value}')
        return self.get_state() + 1 >= self.max_value
    


In [408]:
width = 6
height = 6
addr_high = (width // 3) * (height // 3)
clocks = width * height

def get_read_enable(y, x):
    global y_counter, x_counter, y2_counter, x2_counter
    if y_counter.get_carry() and y2_counter.get_carry() and y == 0:
        return False
    if x_counter.get_carry() and x2_counter.get_carry() and x == 0:
        return False
    if y_counter.get_state() == 0 and y2_counter.get_state() == 0 and y == 2:
        return False
    if x_counter.get_state() == 0 and x2_counter.get_state() == 0 and x == 2:
        return False
    return True

end_of_line = lambda: x2_counter.get_carry() and x_counter.get_carry()

x_counter  = counter_model('x_counter',  3,           lambda: True,                                             lambda: False, increment=1)
x2_counter = counter_model('x2_counter', width // 3,  lambda: x_counter.get_carry(),                            lambda: False, increment=1)
y_counter  = counter_model('y_counter',  3,           lambda: x2_counter.get_carry() and x_counter.get_carry(), lambda: False, increment=1)
y2_counter = counter_model('y2_counter', height // 3, lambda: end_of_line() and y_counter.get_carry(),          lambda: False, increment=1)

x_write_addr_counter = counter_model('x_write_addr_counter', width // 3, lambda: x_counter.get_carry(), lambda: False, increment=1)
y_write_addr_counter = counter_model('y_write_addr_counter', addr_high, lambda: end_of_line() and y_counter.get_carry(), lambda: False, increment=height//3)
get_write_addr = lambda: x_write_addr_counter.get_state() + y_write_addr_counter.get_state()

read_enable_x_counters = [
    counter_model('read_enable_x_counters_0', width // 3, lambda: x_counter.get_count() == 1, lambda: False, increment=1),
    counter_model('read_enable_x_counters_1', width // 3, lambda: x_counter.get_count() == 2, lambda: False, increment=1),
    counter_model('read_enable_x_counters_2', width // 3, lambda: x_counter.get_count() == 0, lambda: False, increment=1, reset_value=2)
]
read_enable_y_counters = [
    counter_model('read_enable_y_counters_0', addr_high, lambda: x2_counter.get_carry() and x_counter.get_carry() and y_counter.get_count() == 1, lambda: False, increment=height//3),
    counter_model('read_enable_y_counters_1', addr_high, lambda: x2_counter.get_carry() and x_counter.get_carry() and y_counter.get_count() == 2, lambda: False, increment=height//3),
    counter_model('read_enable_y_counters_2', addr_high, lambda: x2_counter.get_carry() and x_counter.get_carry() and y_counter.get_count() == 0, lambda: False, increment=height//3, reset_value=2)
]
get_read_addr = lambda y, x: read_enable_x_counters[x].get_count() + read_enable_y_counters[y].get_count()

models = [x_counter, y_counter, x2_counter, y2_counter, x_write_addr_counter, y_write_addr_counter, *read_enable_x_counters, *read_enable_y_counters]

center = []
write_enable = np.zeros(shape=(3, 3, clocks))
read_enable = {}
write_addr = [[[None for i in range(clocks)] for x in range(3)] for y in range(3)]
read_addr = [[[None for i in range(clocks)] for x in range(3)] for y in range(3)]

for i in range(clocks):
    center.append((y_counter.get_count(), x_counter.get_count()))
    
    for y in range(3):
        for x in range(3):
            write_enable[y,x,i] = y_counter.get_count() == y and x_counter.get_count() == x
            read_enable[(i,y,x)] = get_read_enable(y, x)
            if write_enable[y,x,i]:
                write_addr[y][x][i] = get_write_addr() # else leave None
            if read_enable[(i,y,x)]:
                read_addr[y][x][i] = get_read_addr(y, x)


    for model in models: model.update()
    for model in models: model.clock()

# Prints for debugging:

# # Print write_enable
# for y in range(3):
#     for x in range(3):
#         for i in range(clocks):
#             print(f'{"1" if write_enable[y,x,i] else "_"}', end='')
#         print()
#         for i in range(clocks):
#             print(f'{"1" if not write_gold[y,x,i] is None else "_"}', end='')
#         print()

# Print read_enable
# for y in range(3):
#     for x in range(3):
#         for i in range(clocks):
#             print(f'{"1" if read_enable[(i,y,x)] else "_"}', end='')
#         print()
#         for i in range(clocks):
#             print(f'{"1" if not read_gold[y,x,i] is None else "_"}', end='')
#         print()
#         print()

# # Print write_addr
# for y in range(3):
#     for x in range(3):
#         for i in range(clocks):
#             print(f'{write_addr[y][x][i] if not write_addr[y][x][i] is None else "_"}', end='')
#         print()
#         for i in range(clocks):
#             print(f'{write_gold[y,x,i] if not write_gold[y,x,i] is None else "_"}', end='')
#         print()
# print()

# # Print read_addr
# for y in range(3):
#     for x in range(3):
#         for i in range(clocks):
#             print(f'{read_addr[y][x][i] if not read_addr[y][x][i] is None else "_"}', end='')
#         print()
#         for i in range(clocks):
#             print(f'{read_gold[y,x,i] if not read_gold[y,x,i] is None else "_"}', end='')
#         print()

assert np.all(center_gold == np.array(center)), "center does not match gold"
assert np.all(write_enable_gold == write_enable), "write_enable does not match gold"
assert np.all(read_enable_gold == read_enable), "read_enable does not match gold"
assert np.all(write_gold == np.array(write_addr)), "write_addr does not match gold"
assert np.all(read_gold == np.array(read_addr)), "read_addr does not match gold"
print("successfully generated correct sequences")

successfully generated correct sequences


In [409]:
# basic function definitions and memory initialization
def conway (center, others):
    count = sum(others)
    if center:
        if count < 2 or count > 3:
            return 0
        else:
            return 1
    else:
        if count == 3:
            return 1
        else:
            return 0

def print_memory_contents(memories, primary):
    global clock
    print("======    ======")
    for y1 in range(2):
        for y2 in range(3):
            for x1 in range(2):
                for x2 in range(3):
                    print(memories[primary][y2][x2].observe(y1*2+x1), end='')
            print(' -> ' if y1*3+y2 == 3 else '    ', end='')
            for x1 in range(2):
                for x2 in range(3):
                    print(memories[flip(primary)][y2][x2].observe(y1*2+x1), end='')
            print()
    print("======    ======")

flip = lambda val: 1 if val == 0 else 0

memories = [[[memory_model(4) for x in range(3)] for y in range(3)]]
memories.append([[memory_model(4, copy=memories[0][y][x]) for x in range(3)] for y in range(3)])

clock = 0
trace_y, trace_x = 0, 0
primary = 0
n = 0

print_memory_contents(memories, primary)


000110    000110
101011    101011
011100    011100
101111 -> 101111
110110    110110
100101    100101


In [420]:
# per clock-cycle calculations

def do_clock(memories, clock):
    global primary

    local_clock = clock % 36
    center_mem = center_gold[local_clock]
    center_addr = write_gold[center_gold[local_clock][0]][center_gold[local_clock][1]][local_clock]
    center = memories[primary][center_gold[local_clock][0]][center_gold[local_clock][1]].read(center_addr)

    others = np.array([[(y, x) for x in range(3)] for y in range(3)]).reshape(9, 2)
    others = [c for c in others if np.any(c != center_gold[local_clock])]
    others = [memories[primary][c[0]][c[1]].read(read_gold[c[0]][c[1]][local_clock]) for c in others if not read_gold[c[0]][c[1]][local_clock] is None]

    result = conway(center, others)
    memories[flip(primary)][center_gold[local_clock][0]][center_gold[local_clock][1]].write(center_addr, result)

    if local_clock == 35:
        print(f"=== clock {clock}; result {result}; neighbors {sum(others)}; center {center_mem} ===")
        print_memory_contents(memories, primary)

    for y in range(3):
        for x in range(3):
            memories[primary][y][x].clock()
            memories[flip(primary)][y][x].clock()
    
    return clock + 1



n = 0
print(f'primary {primary}')
while n < 36: # do a whole frame
    clock = do_clock(memories, clock)
    n += 1
primary = flip(primary)

primary 0
=== clock 395; result 0; neighbors 0; center [2 2] ===
000011    000011
000011    000011
000000    000000
010000 -> 010000
101000    101000
010000    010000


In [271]:
## generate testbench golden vectors
print("initial begin")
for clock in range(36):
    print(f"@(posedge clk); # cycle {clock}")
    for y in range(3):
        for x in range(3):
            print(f"read_en_x{x}_y{y} <= 1'b{'1' if not read_gold[y][x][clock] is None else '0'};")
print("end")
print("")
print("initial begin")
for clock in range(36):
    print(f"@(posedge clk); # cycle {clock}")
    for y in range(3):
        for x in range(3):
            if not read_gold[y][x][clock] is None:
                print(f"read_addr_x{x}_y{y} <= 'd{read_gold[y][x][clock]};")
print("end")
print("")
print("initial begin")
for clock in range(36):
    print(f"@(posedge clk); # cycle {clock}")
    for y in range(3):
        for x in range(3):
            print(f"write_en_x{x}_y{y} <= 1'b{'1' if not write_gold[y][x][clock] is None else '0'};")
print("end")
print("")
print("initial begin")
for clock in range(36):
    print(f"@(posedge clk); # cycle {clock}")
    for y in range(3):
        for x in range(3):
            if not write_gold[y][x][clock] is None:
                print(f"write_addr_x{x}_y{y} <= 'd{write_gold[y][x][clock]};")
print("end")
print("")

initial begin
@(posedge clk); # cycle 0
write_addr_x0_y0 <= 'd0;
@(posedge clk); # cycle 1
write_addr_x1_y0 <= 'd0;
@(posedge clk); # cycle 2
write_addr_x2_y0 <= 'd0;
@(posedge clk); # cycle 3
write_addr_x0_y0 <= 'd1;
@(posedge clk); # cycle 4
write_addr_x1_y0 <= 'd1;
@(posedge clk); # cycle 5
write_addr_x2_y0 <= 'd1;
@(posedge clk); # cycle 6
write_addr_x0_y1 <= 'd0;
@(posedge clk); # cycle 7
write_addr_x1_y1 <= 'd0;
@(posedge clk); # cycle 8
write_addr_x2_y1 <= 'd0;
@(posedge clk); # cycle 9
write_addr_x0_y1 <= 'd1;
@(posedge clk); # cycle 10
write_addr_x1_y1 <= 'd1;
@(posedge clk); # cycle 11
write_addr_x2_y1 <= 'd1;
@(posedge clk); # cycle 12
write_addr_x0_y2 <= 'd0;
@(posedge clk); # cycle 13
write_addr_x1_y2 <= 'd0;
@(posedge clk); # cycle 14
write_addr_x2_y2 <= 'd0;
@(posedge clk); # cycle 15
write_addr_x0_y2 <= 'd1;
@(posedge clk); # cycle 16
write_addr_x1_y2 <= 'd1;
@(posedge clk); # cycle 17
write_addr_x2_y2 <= 'd1;
@(posedge clk); # cycle 18
write_addr_x0_y0 <= 'd2;
@(posedg