hanabi1224
diff --git a/‎.github/workflows/bench.yml‎
Lines changed: 11 additions & 0 deletions b/‎.github/workflows/bench.yml‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎bench/algorithm/binarytrees/4.lua‎
Lines changed: 105 additions & 0 deletions b/‎bench/algorithm/binarytrees/4.lua‎
Lines changed: 105 additions & 0 deletions
diff --git a/‎bench/algorithm/fannkuch-redux/2.jl‎
Lines changed: 175 additions & 0 deletions b/‎bench/algorithm/fannkuch-redux/2.jl‎
Lines changed: 175 additions & 0 deletions
@@ -23,6 +23,7 @@ jobs:
             javascript,
             julia,
             kotlin,
+            lua,
             nim,
             python,
             ruby,
@@ -33,6 +34,16 @@ jobs:
           ]
     steps:
       - uses: actions/checkout@v2
+      - name: Install lua
+        if: matrix.lang == 'lua'
+        run: |
+          sudo apt-get install lua5.3 -y
+          lua -v
+          git clone https://github.com/LuaJIT/LuaJIT
+          cd LuaJIT
+          make && sudo make install
+          sudo ln -sf luajit-2.1.0-beta3 /usr/local/bin/luajit
+          luajit -v
       - name: Install ruby
         if: matrix.lang == 'ruby'
         uses: ruby/setup-ruby@v1
 
@@ -0,0 +1,105 @@
+-- The Computer Language Benchmarks Game
+-- https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
+-- contributed by Mike Pall
+-- modified by Sebastian Engel to be parallel, derived from mandelbrot-lua-6
+
+-- called with the following arguments on the command line;
+-- 1: Initial depth of the tree 
+-- 2: number of children to spawn (defaults to 6, which works well on 4-way)
+-- If this is a child, then there will be additional parameters;
+-- 3: current tree depth 
+-- 4: chunk start
+-- 5: chunk end
+
+
+local N          = tonumber(arg and arg[1]) or 0
+local children   = tonumber(arg and arg[2]) or 4
+local cdepth     = tonumber(arg and arg[3])
+local chunkstart = tonumber(arg and arg[4])
+local chunkend   = tonumber(arg and arg[5])
+
+local write = io.write
+
+local function BottomUpTree(depth)
+    if depth > 0 then
+        depth = depth - 1
+        local left, right = BottomUpTree(depth), BottomUpTree(depth)
+        return { left, right }
+    else
+        return { }
+    end
+end
+
+local function ItemCheck(tree)
+    if tree[1]  then
+        return 1 + ItemCheck(tree[1]) + ItemCheck(tree[2])
+    else
+        return 1
+    end
+end
+
+if not chunkstart then
+    -- we are the parent process.  
+    -- emit the header, and then spawn children
+
+    local mindepth = 4
+    local maxdepth = mindepth + 2
+    if maxdepth < N then maxdepth = N end
+
+    do
+        local stretchdepth = maxdepth + 1
+        local stretchtree = BottomUpTree(stretchdepth)
+        write(string.format("stretch tree of depth %d\t check: %d\n",
+                stretchdepth, ItemCheck(stretchtree)))
+    end
+
+    local longlivedtree = BottomUpTree(maxdepth)
+
+    for depth=mindepth,maxdepth,2 do
+        local iterations = 2 ^ (maxdepth - depth + mindepth)
+        local check = 0
+
+        local workunit = math.floor(iterations / children )
+        local handles = { }
+
+        for i = 1,children do 
+            local cs, ce
+
+            if i == 1 then
+                cs = 1
+                ce = workunit
+            elseif i == children then
+                cs = (workunit * (i-1)) + 1
+                ce = iterations 
+            else
+                cs = (workunit * (i-1)) + 1
+                ce = cs + workunit - 1
+            end
+
+            handles[i+1] = io.popen(("%s %s %d %d %d %d %d"):format(
+            arg[-1], arg[0], N, children, depth, cs, ce))
+        end
+
+        -- collect answers, and emit
+        for i = 1, children do
+            check = check + (handles[i+1]:read "*a")
+        end
+
+        write(string.format("%d\t trees of depth %d\t check: %d\n",
+                iterations, depth, check))
+    end
+
+    write(string.format("long lived tree of depth %d\t check: %d\n",
+            maxdepth, ItemCheck(longlivedtree)))
+
+else
+    -- we are a child process.  
+    -- do the work allocated to us.
+    local partialcheck = 0
+
+    for i=chunkstart,chunkend do
+        partialcheck = partialcheck + ItemCheck(BottomUpTree(cdepth))
+    end
+
+    write(partialcheck)
+end
@@ -0,0 +1,175 @@
+# The Computer Language Benchmarks Game
+# https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
+
+# based on Oleg Mazurov's Java Implementation and Jeremy Zerfas' C implementation
+# transliterated and modified by Hamza Yusuf Çakır
+
+global const preferred_num_blocks = 24
+
+struct Fannkuch
+    n::Int64
+    blocksz::Int64
+    maxflips::Vector{Int32}
+    chksums::Vector{Int32}
+
+    function Fannkuch(n, nthreads)
+        nfact = factorial(n)
+
+        blocksz = nfact ÷ (nfact < preferred_num_blocks ? 1 : preferred_num_blocks)
+        maxflips = zeros(Int32, nthreads)
+        chksums = zeros(Int32, nthreads)
+
+        new(n, blocksz, maxflips, chksums)
+    end
+end
+
+struct Perm
+    p::Vector{Int8}
+    pp::Vector{Int8}
+    count::Vector{Int32}
+
+    function Perm(n)
+        p = zeros(Int8, n)
+        pp = zeros(Int8, n)
+        count = zeros(Int32, n)
+
+        new(p, pp, count)
+    end
+end
+
+Base.@propagate_inbounds @inline function first_permutation(perm::Perm, idx)
+    p = perm.p
+    pp = perm.pp
+
+    for i = 2:length(p)
+        p[i] = (i - 1) % Int8
+    end
+
+    for i = length(p):-1:2
+        ifact = factorial(i-1)
+        d = idx ÷ ifact
+        perm.count[i] = d
+        idx = idx % ifact
+
+        for j = 1:i
+            pp[j] = p[j]
+        end
+
+        for j = 1:i
+            p[j] = j+d <= i ? pp[j+d] : pp[j+d-i]
+        end
+    end
+end
+
+Base.@propagate_inbounds @inline function next_permutation(perm::Perm)
+    p = perm.p
+    count = perm.count
+
+    first = p[2]
+    p[2]  = p[1]
+    p[1]  = first
+
+    i = 2
+    while count[i] >= i - 1
+        count[i] = 0
+
+        next = p[1] = p[2]
+
+        for j = 1:i
+            p[j] = p[j+1]
+        end
+
+        i += 1
+        p[i] = first
+        first = next
+    end
+    count[i] += 1
+    nothing
+end
+
+Base.@propagate_inbounds @inline function count_flips(perm::Perm)
+    p = perm.p
+    pp = perm.pp
+
+    flips = Int32(1)
+
+    first = p[1] + 1
+
+    if p[first] != 0
+
+        unsafe_copyto!(pp, 2, p, 2, length(p) - 1)
+
+        while true
+            flips += one(flips)
+            new_first = pp[first]
+            pp[first] = (first - 1) % Int8
+
+            if first > 3
+                lo = 2; hi = first - 1
+                # see the note in Jeremy Zerfas' C implementation for
+                # this loop
+                for k = 0:13
+                    t = pp[lo]
+                    pp[lo] = pp[hi]
+                    pp[hi] = t
+                    (hi < lo + 3) && break
+                    lo += 1
+                    hi -= 1
+                end
+            end
+
+            first = new_first + 1
+            pp[first] == 0 && break
+        end
+    end
+
+    return flips
+end
+
+Base.@propagate_inbounds function run_task(f::Fannkuch, perm::Perm, idxmin, idxmax)
+    maxflips = Int32(0)
+    chksum = Int32(0)
+
+    i = idxmin
+    while true
+        if perm.p[1] != 0
+            flips = count_flips(perm)
+            (flips > maxflips) && (maxflips = flips)
+            chksum += iseven(i) ? flips : -flips
+        end
+        i != idxmax || break
+        i += 1
+        next_permutation(perm)
+    end
+
+    id = Threads.threadid()
+    (maxflips > f.maxflips[id]) && (f.maxflips[id] = maxflips)
+    f.chksums[id] += chksum
+    nothing
+end
+
+function runf(f::Fannkuch)
+    factn = factorial(f.n)
+
+    Threads.@threads for idxmin = 0:f.blocksz:factn-1
+        perm = Perm(f.n)
+        @inbounds first_permutation(perm, idxmin)
+        idxmax = idxmin + f.blocksz - 1
+        @inbounds run_task(f, perm, idxmin, idxmax)
+    end
+end
+
+function fannkuchredux(n)
+    f = Fannkuch(n, Threads.nthreads())
+
+    runf(f)
+
+    # reduce results
+    chk = sum(f.chksums)
+    res = maximum(f.maxflips)
+
+    println(chk, "\nPfannkuchen(", n, ") = ", res)
+end
+
+n = parse(Int, ARGS[1])
+fannkuchredux(n)