In [1]:
import awkward.forth

In [2]:
from numba import jit
import numba
import numpy as np

In [3]:
import timeit

In [4]:
@jit(nopython=True, nogil=True)
def formula(x):
    start = x
    start += 123
    start *= x
    start -= x
    start *= x
    return start

@jit(nopython=True, nogil=True)
def f_implicit(x):
    out = 0
    for _ in range(1000000000):
        x = formula(x)
        out += x
    return out

In [5]:
@jit(nopython=True, nogil=True)
def stack_push(stack,elem, cur):
    stack[cur] = elem
    cur += 1
    return cur

@jit(nopython=True, nogil=True)
def stack_pop(stack, cur):
    cur -= 1
    return stack[cur], cur

@jit(nopython=True, nogil=True)
def formula_2(x, stack, cur):
    cur = stack_push(stack, x, cur)
    cur = stack_push(stack, 123, cur)
    a, cur = stack_pop(stack, cur)
    b, cur = stack_pop(stack, cur)
    cur = stack_push(stack, a + b, cur)
    cur = stack_push(stack, x, cur)
    a, cur = stack_pop(stack, cur)
    b, cur = stack_pop(stack, cur)
    cur = stack_push(stack, a * b, cur)
    cur = stack_push(stack, x, cur)
    a, cur = stack_pop(stack, cur)
    b, cur = stack_pop(stack, cur)
    cur = stack_push(stack, b - a, cur)
    cur = stack_push(stack, x, cur)
    a,cur = stack_pop(stack, cur)
    b,cur = stack_pop(stack, cur)
    cur = stack_push(stack, a * b, cur)
    out, cur = stack_pop(stack, cur)
    return out

@jit(nopython=True, nogil=True)
def f_explicit(x):
    out = 0
    cur = np.uint64(0)
    stack = np.zeros(100, dtype=np.int64)
    stack_push(stack, out, cur)
    for _ in range(1000000000):
        x = formula_2(x, stack, cur)
        out += x
    return out

In [6]:
forth_code = "1000000 0 do 100 100 + 100 + 100 * 100 - 100 * drop loop"
machine = awkward.forth.ForthMachine64(forth_code)

In [7]:
f_implicit(100), f_explicit(100)

(3267123752616544224, 3267123752616544224)

In [8]:
%%timeit
f_implicit(100)

2.02 s ± 23.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [9]:
%%timeit
f_explicit(100)

1.98 s ± 17.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [31]:
%%timeit
machine.run()

57.4 ms ± 5.58 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [10]:
print(list(f_implicit.inspect_asm().values())[0])

	.text
	.file	"<string>"
	.globl	_ZN8__main__10f_implicitB2v1B40c8tJTIcFHzwl2ILiXkcBV0KBSgP9CGUhpQmwiCYAEx
	.p2align	4, 0x90
	.type	_ZN8__main__10f_implicitB2v1B40c8tJTIcFHzwl2ILiXkcBV0KBSgP9CGUhpQmwiCYAEx,@function
_ZN8__main__10f_implicitB2v1B40c8tJTIcFHzwl2ILiXkcBV0KBSgP9CGUhpQmwiCYAEx:
	movl	$1000000000, %ecx
	xorl	%eax, %eax
	.p2align	4, 0x90
.LBB0_1:
	leaq	123(%rdx), %rsi
	imulq	%rdx, %rsi
	subq	%rdx, %rsi
	imulq	%rdx, %rsi
	addq	%rsi, %rax
	leaq	123(%rsi), %rdx
	imulq	%rsi, %rdx
	subq	%rsi, %rdx
	imulq	%rsi, %rdx
	addq	%rdx, %rax
	leaq	123(%rdx), %rsi
	imulq	%rdx, %rsi
	subq	%rdx, %rsi
	imulq	%rdx, %rsi
	addq	%rsi, %rax
	leaq	123(%rsi), %rdx
	imulq	%rsi, %rdx
	subq	%rsi, %rdx
	imulq	%rsi, %rdx
	addq	%rdx, %rax
	leaq	123(%rdx), %rsi
	imulq	%rdx, %rsi
	subq	%rdx, %rsi
	imulq	%rdx, %rsi
	addq	%rsi, %rax
	leaq	123(%rsi), %rdx
	imulq	%rsi, %rdx
	subq	%rsi, %rdx
	imulq	%rsi, %rdx
	addq	%rdx, %rax
	leaq	123(%rdx), %rsi
	imulq	%rdx, %rsi
	subq	%rdx, %rsi
	imulq	%rdx, %rsi
	addq	%rsi, %r

[20]

[0, 10, 38, 0, 10, 0, 10, 44]