Skip to content

Commit 4ba9d23

Browse files
authored
Lua (#52)
* lua * lua
1 parent 9a725ea commit 4ba9d23

File tree

12 files changed

+956
-0
lines changed

12 files changed

+956
-0
lines changed

.github/workflows/bench.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ jobs:
2323
javascript,
2424
julia,
2525
kotlin,
26+
lua,
2627
nim,
2728
python,
2829
ruby,
@@ -33,6 +34,16 @@ jobs:
3334
]
3435
steps:
3536
- uses: actions/checkout@v2
37+
- name: Install lua
38+
if: matrix.lang == 'lua'
39+
run: |
40+
sudo apt-get install lua5.3 -y
41+
lua -v
42+
git clone https://github.com/LuaJIT/LuaJIT
43+
cd LuaJIT
44+
make && sudo make install
45+
sudo ln -sf luajit-2.1.0-beta3 /usr/local/bin/luajit
46+
luajit -v
3647
- name: Install ruby
3748
if: matrix.lang == 'ruby'
3849
uses: ruby/setup-ruby@v1

bench/algorithm/binarytrees/4.lua

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
-- The Computer Language Benchmarks Game
2+
-- https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
3+
-- contributed by Mike Pall
4+
-- modified by Sebastian Engel to be parallel, derived from mandelbrot-lua-6
5+
6+
-- called with the following arguments on the command line;
7+
-- 1: Initial depth of the tree
8+
-- 2: number of children to spawn (defaults to 6, which works well on 4-way)
9+
-- If this is a child, then there will be additional parameters;
10+
-- 3: current tree depth
11+
-- 4: chunk start
12+
-- 5: chunk end
13+
14+
15+
local N = tonumber(arg and arg[1]) or 0
16+
local children = tonumber(arg and arg[2]) or 4
17+
local cdepth = tonumber(arg and arg[3])
18+
local chunkstart = tonumber(arg and arg[4])
19+
local chunkend = tonumber(arg and arg[5])
20+
21+
local write = io.write
22+
23+
local function BottomUpTree(depth)
24+
if depth > 0 then
25+
depth = depth - 1
26+
local left, right = BottomUpTree(depth), BottomUpTree(depth)
27+
return { left, right }
28+
else
29+
return { }
30+
end
31+
end
32+
33+
local function ItemCheck(tree)
34+
if tree[1] then
35+
return 1 + ItemCheck(tree[1]) + ItemCheck(tree[2])
36+
else
37+
return 1
38+
end
39+
end
40+
41+
if not chunkstart then
42+
-- we are the parent process.
43+
-- emit the header, and then spawn children
44+
45+
local mindepth = 4
46+
local maxdepth = mindepth + 2
47+
if maxdepth < N then maxdepth = N end
48+
49+
do
50+
local stretchdepth = maxdepth + 1
51+
local stretchtree = BottomUpTree(stretchdepth)
52+
write(string.format("stretch tree of depth %d\t check: %d\n",
53+
stretchdepth, ItemCheck(stretchtree)))
54+
end
55+
56+
local longlivedtree = BottomUpTree(maxdepth)
57+
58+
for depth=mindepth,maxdepth,2 do
59+
local iterations = 2 ^ (maxdepth - depth + mindepth)
60+
local check = 0
61+
62+
local workunit = math.floor(iterations / children )
63+
local handles = { }
64+
65+
for i = 1,children do
66+
local cs, ce
67+
68+
if i == 1 then
69+
cs = 1
70+
ce = workunit
71+
elseif i == children then
72+
cs = (workunit * (i-1)) + 1
73+
ce = iterations
74+
else
75+
cs = (workunit * (i-1)) + 1
76+
ce = cs + workunit - 1
77+
end
78+
79+
handles[i+1] = io.popen(("%s %s %d %d %d %d %d"):format(
80+
arg[-1], arg[0], N, children, depth, cs, ce))
81+
end
82+
83+
-- collect answers, and emit
84+
for i = 1, children do
85+
check = check + (handles[i+1]:read "*a")
86+
end
87+
88+
write(string.format("%d\t trees of depth %d\t check: %d\n",
89+
iterations, depth, check))
90+
end
91+
92+
write(string.format("long lived tree of depth %d\t check: %d\n",
93+
maxdepth, ItemCheck(longlivedtree)))
94+
95+
else
96+
-- we are a child process.
97+
-- do the work allocated to us.
98+
local partialcheck = 0
99+
100+
for i=chunkstart,chunkend do
101+
partialcheck = partialcheck + ItemCheck(BottomUpTree(cdepth))
102+
end
103+
104+
write(partialcheck)
105+
end
Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
# The Computer Language Benchmarks Game
2+
# https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
3+
4+
# based on Oleg Mazurov's Java Implementation and Jeremy Zerfas' C implementation
5+
# transliterated and modified by Hamza Yusuf Çakır
6+
7+
global const preferred_num_blocks = 24
8+
9+
struct Fannkuch
10+
n::Int64
11+
blocksz::Int64
12+
maxflips::Vector{Int32}
13+
chksums::Vector{Int32}
14+
15+
function Fannkuch(n, nthreads)
16+
nfact = factorial(n)
17+
18+
blocksz = nfact ÷ (nfact < preferred_num_blocks ? 1 : preferred_num_blocks)
19+
maxflips = zeros(Int32, nthreads)
20+
chksums = zeros(Int32, nthreads)
21+
22+
new(n, blocksz, maxflips, chksums)
23+
end
24+
end
25+
26+
struct Perm
27+
p::Vector{Int8}
28+
pp::Vector{Int8}
29+
count::Vector{Int32}
30+
31+
function Perm(n)
32+
p = zeros(Int8, n)
33+
pp = zeros(Int8, n)
34+
count = zeros(Int32, n)
35+
36+
new(p, pp, count)
37+
end
38+
end
39+
40+
Base.@propagate_inbounds @inline function first_permutation(perm::Perm, idx)
41+
p = perm.p
42+
pp = perm.pp
43+
44+
for i = 2:length(p)
45+
p[i] = (i - 1) % Int8
46+
end
47+
48+
for i = length(p):-1:2
49+
ifact = factorial(i-1)
50+
d = idx ÷ ifact
51+
perm.count[i] = d
52+
idx = idx % ifact
53+
54+
for j = 1:i
55+
pp[j] = p[j]
56+
end
57+
58+
for j = 1:i
59+
p[j] = j+d <= i ? pp[j+d] : pp[j+d-i]
60+
end
61+
end
62+
end
63+
64+
Base.@propagate_inbounds @inline function next_permutation(perm::Perm)
65+
p = perm.p
66+
count = perm.count
67+
68+
first = p[2]
69+
p[2] = p[1]
70+
p[1] = first
71+
72+
i = 2
73+
while count[i] >= i - 1
74+
count[i] = 0
75+
76+
next = p[1] = p[2]
77+
78+
for j = 1:i
79+
p[j] = p[j+1]
80+
end
81+
82+
i += 1
83+
p[i] = first
84+
first = next
85+
end
86+
count[i] += 1
87+
nothing
88+
end
89+
90+
Base.@propagate_inbounds @inline function count_flips(perm::Perm)
91+
p = perm.p
92+
pp = perm.pp
93+
94+
flips = Int32(1)
95+
96+
first = p[1] + 1
97+
98+
if p[first] != 0
99+
100+
unsafe_copyto!(pp, 2, p, 2, length(p) - 1)
101+
102+
while true
103+
flips += one(flips)
104+
new_first = pp[first]
105+
pp[first] = (first - 1) % Int8
106+
107+
if first > 3
108+
lo = 2; hi = first - 1
109+
# see the note in Jeremy Zerfas' C implementation for
110+
# this loop
111+
for k = 0:13
112+
t = pp[lo]
113+
pp[lo] = pp[hi]
114+
pp[hi] = t
115+
(hi < lo + 3) && break
116+
lo += 1
117+
hi -= 1
118+
end
119+
end
120+
121+
first = new_first + 1
122+
pp[first] == 0 && break
123+
end
124+
end
125+
126+
return flips
127+
end
128+
129+
Base.@propagate_inbounds function run_task(f::Fannkuch, perm::Perm, idxmin, idxmax)
130+
maxflips = Int32(0)
131+
chksum = Int32(0)
132+
133+
i = idxmin
134+
while true
135+
if perm.p[1] != 0
136+
flips = count_flips(perm)
137+
(flips > maxflips) && (maxflips = flips)
138+
chksum += iseven(i) ? flips : -flips
139+
end
140+
i != idxmax || break
141+
i += 1
142+
next_permutation(perm)
143+
end
144+
145+
id = Threads.threadid()
146+
(maxflips > f.maxflips[id]) && (f.maxflips[id] = maxflips)
147+
f.chksums[id] += chksum
148+
nothing
149+
end
150+
151+
function runf(f::Fannkuch)
152+
factn = factorial(f.n)
153+
154+
Threads.@threads for idxmin = 0:f.blocksz:factn-1
155+
perm = Perm(f.n)
156+
@inbounds first_permutation(perm, idxmin)
157+
idxmax = idxmin + f.blocksz - 1
158+
@inbounds run_task(f, perm, idxmin, idxmax)
159+
end
160+
end
161+
162+
function fannkuchredux(n)
163+
f = Fannkuch(n, Threads.nthreads())
164+
165+
runf(f)
166+
167+
# reduce results
168+
chk = sum(f.chksums)
169+
res = maximum(f.maxflips)
170+
171+
println(chk, "\nPfannkuchen(", n, ") = ", res)
172+
end
173+
174+
n = parse(Int, ARGS[1])
175+
fannkuchredux(n)

0 commit comments

Comments
 (0)