In [1]:
#
# Support parallel with JIT. Derive number of worker threads based on range and batch size
#
from numba import jit, njit, cuda , prange
import numba
import numpy as np

# registers = [729, 0, 0]
# program = [0,1,5,4,3,0]

register_a = 0
register_b = 1
register_c = 2

# enabling the JIT causes print statements to not print lists as expected
@njit(parallel=True)
def loop_em(start, end, batch_size, program):
    print(program)

    total_range = end - start
    # handle if not exactly divisible
    num_batches = (total_range // batch_size) + 1
    print(f'total_range={total_range} batch_size={batch_size} num_batches={num_batches} start={start} end={end} program = {program}')

    # iterate across each batch
    # prange sizes to the number of virtual cores
    for batch_index in prange(num_batches):
        # iterate across each iteration of the batch
        print(f'starting batch {batch_index}')
        for in_batch_index in range(batch_size):
            current = start + (batch_index*batch_size) + in_batch_index
            # handle the fact the batch size is not an exact factor of the total range
            if (current <= end):
                # This program is 8 octal operands so the A register needs to be 8 octal digits.
                registers = np.array([current, 0, 0])
                output = []
                resolve_operand = lambda operand: operand if (operand<4) else registers[operand-4]
                # if (i%1000000 == 0):
                #     print(f'at: {current} : {i}/{loop_iterations}')
                
                # print(f'{in_batch_index}:{current}:{end} program: {program} registers: {registers}')
                address_ptr = 0
                while (address_ptr < len(program)):
                    # numba says these are int64
                    operator = program[address_ptr]
                    operand = program[address_ptr+1]
                    next_address_ptr = address_ptr+2
                    # print (f'address: {address_ptr} operator: {operator} operand: {operand} registers: {registers} ')
                    match (operator):
                        case 0: # adv division register_a ~/ 2^comboOperand
                            registers[register_a] = registers[register_a] // 2 ** resolve_operand(operand)
                        case 1: # bxl bitwise XOR (registerB , operand)
                            registers[register_b] = registers[register_b] ^ operand
                        case 2: # bst operand modulo 8
                            registers[register_b] = resolve_operand(operand) % 8
                        case 3: # jnz jump not zero
                            if (registers[register_a] != 0):
                                next_address_ptr =  operand
                        case 4: #bxc bitwise xor reg b, reg c
                            registers[register_b] = registers[register_b] ^ registers[register_c]
                        case 5: # out % modulo 8
                            output.append(resolve_operand(operand) %8)
                        case 6: # BDV integer division on A , stored in B
                            divisor = 2 ** resolve_operand(operand)
                            registers[register_b] = registers[register_a] // divisor
                        case 7: # CDV
                            divisor = 2 ** resolve_operand(operand)
                            registers[register_c] = registers[register_a] // divisor
                        case _:
                            print('oh no')
                            result = -1
                    
                    address_ptr = next_address_ptr
                    # print(f'now at: {address_ptr} output after {output}')
                    # print(f'final registers: {registers} output {np.array(output)} 
                    # This exists because I played with different lengths while experimenting
                    if (len(output) >= 15
                        and output[0]==program[0] 
                        and output[1]==program[1] 
                        and output[2]==program[2] 
                        and output[3]==program[3]
                        and output[4]==program[4]
                        and output[5]==program[5]
                        and output[6]==program[6]
                        and output[7]==program[7]
                        and output[8]==program[8]
                        and output[9]==program[9]
                        and output[10]==program[10]
                        and output[11]==program[11]
                        and output[12]==program[12]
                        and output[13]==program[13]
                        and output[14]==program[14]
                        #and output[15]==program[15]
                        ):
                        # will not print if njit is enabled
                        #print(f'matches {oct(current)} - {output} -{current}')
                        # use with njit
                        print(f'matches {current}')
                        print(output)
                        # from before we put the loop in
                        # return current
                #print(f'{oct(current)} - {output}')
        print(f'finished batch {batch_index}')
        # return output
    print('done');


In [2]:
%%time
loop_em(int(0o1000000000000000),
        int(0o1000077777777777),
        int(0o0000000100000000),
        np.array([2,4,1,3,7,5,4,7,0,3,1,5,5,5,3,0]))

# 16 digits octal
# 16th digit must be 1 otherwise the return is shorter than the program


[2 4 1 3 7 5 4 7 0 3 1 5 5 5 3 0]
total_range=8589934591 batch_size=16777216 num_batches=512 start=35184372088832 end=35192962023423 program = <object type:array(int64, 1d, C)>
starting batch 0
starting batch 344
starting batch 22
starting batch 302
starting batch 281
starting batch 260
starting batch 428
starting batch 449
starting batch 176
starting batch 470
starting batch 239
starting batch 154
starting batch 110
starting batch 365
starting batch 386
starting batch 407
starting batch 66
starting batch 491
starting batch 44
starting batch 88
starting batch 323
starting batch 132
starting batch 197
starting batch 218
finished batch 218
starting batch 219
finished batch 132
starting batch 133
finished batch 449
starting batch 450
finished batch 386
starting batch 387
finished batch 344
starting batch 345
finished batch 323
starting batch 324
finished batch 88
starting batch 89
finished batch 66
starting batch 67
finished batch 491
starting batch 492
finished batch 176
starting batch 1