You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Created attachment 1220
A small raytracer
This is a benchmark program that shows the difference in run-time between a
struct with and without explicit constructor:
import core.stdc.stdio: printf;
import core.stdc.stdlib: atoi;
struct V3a {
double x, y, z;
this(in double x_, in double y_, in double z_)pure nothrow { this.x = x_; this.y = y_; this.z = z_;}
}
struct V3b {
double x, y, z;
}
double spam1(in uint N) pure nothrow {
double total = 0.0;
for (uint i = 0; i < N; i++) { immutable v = V3a(i, i, i); total += v.y;}return total;
}
double spam2(in uint N) pure nothrow {
double total = 0.0;
for (uint i = 0; i < N; i++) { immutable v = V3b(i, i, i); total += v.y;}return total;
}
void main(in string[] args) {
immutable uint N = (args.length >= 2) ?
atoi((args[1] ~ '\0').ptr) :
1_000;
if (args.length >= 3 && args[2] == "1") printf("%f
", spam1(N));
else
printf("%f
", spam2(N));
}
If you run it you see a performance difference between creating V3a and V3b.
This is the asm generated by dmd (dmd 2.064alpha, -O -release -inline
-noboundscheck):
_D5test25spam1FNaNbxkZd:
sub ESP,030h
mov EDX,EAX
xor ECX,ECX
push EBX
test EDX,EDX
push ESI
push EDI
mov dword ptr 034h[ESP],0
mov dword ptr 038h[ESP],0
je L52
L1E: mov ESI,offset FLAT:_D5test23V3a6__initZ
lea EDI,01Ch[ESP]
movsd
movsd
movsd
movsd
movsd
movsd
xor EBX,EBX
mov EAX,ECX
mov 0Ch[ESP],ECX
inc ECX
cmp ECX,EDX
mov 010h[ESP],EBX
fild long64 ptr 0Ch[ESP]
fstp qword ptr 024h[ESP]
fld qword ptr 024h[ESP]
fadd qword ptr 034h[ESP]
fstp qword ptr 034h[ESP]
jb L1E
L52: fld qword ptr 034h[ESP]
pop EDI
pop ESI
pop EBX
add ESP,030h
ret
_D5test25spam2FNaNbxkZd:
sub ESP,030h
mov EDX,EAX
xor ECX,ECX
push EBX
test EDX,EDX
mov dword ptr 02Ch[ESP],0
mov dword ptr 030h[ESP],0
je L63
L1C: mov 4[ESP],ECX
xor EBX,EBX
mov EAX,ECX
mov 8[ESP],EBX
inc ECX
cmp ECX,EDX
fild long64 ptr 4[ESP]
fstp qword ptr 014h[ESP]
mov 4[ESP],EAX
mov 8[ESP],EBX
fild long64 ptr 4[ESP]
fstp qword ptr 01Ch[ESP]
mov 4[ESP],EAX
mov 8[ESP],EBX
fild long64 ptr 4[ESP]
fld qword ptr 01Ch[ESP]
fxch ST1
fstp qword ptr 024h[ESP]
fadd qword ptr 02Ch[ESP]
fstp qword ptr 02Ch[ESP]
jb L1C
L63: fld qword ptr 02Ch[ESP]
pop EBX
add ESP,030h
ret
One visible difference is that block of movsd:
movsd
movsd
movsd
movsd
movsd
movsd
Compiling with ldc2 (V. 0.11.0, based on DMD v2.062 and LLVM 3.3svn, -O5
-release -profile-verifier-noassert):
__D5test25spam1FNaNbxkZd:
pushl %ebp
movl %esp, %ebp
andl $-8, %esp
subl $8, %esp
xorps %xmm0, %xmm0
testl %eax, %eax
je LBB1_1
movsd LCPI1_0, %xmm2
xorps %xmm1, %xmm1
.align 16, 0x90
LBB1_3:
addsd %xmm0, %xmm1
addsd %xmm2, %xmm0
decl %eax
jne LBB1_3
jmp LBB1_4
LBB1_1:
xorps %xmm1, %xmm1
LBB1_4:
movsd %xmm1, (%esp)
fldl (%esp)
movl %ebp, %esp
popl %ebp
ret
__D5test25spam2FNaNbxkZd:
pushl %ebp
movl %esp, %ebp
andl $-8, %esp
subl $8, %esp
xorps %xmm0, %xmm0
testl %eax, %eax
je LBB2_3
movsd LCPI2_0, %xmm1
xorps %xmm2, %xmm2
.align 16, 0x90
LBB2_2:
addsd %xmm2, %xmm0
addsd %xmm1, %xmm2
decl %eax
jne LBB2_2
LBB2_3:
movsd %xmm0, (%esp)
fldl (%esp)
movl %ebp, %esp
popl %ebp
ret
You see that ldc2 compiles the two functions at the same way, and indeed their
run-time is the same.
But the problem is not limited to DMD. As soon as your program becomes a little
longer than such tiny benchmark, the performance difference between structs
with constructors and struct without constructors becomes well visible even
with ldc2.
In attach you find a small single-module ray tracer, it contains a V3 struct.
It contains a basic constructor like this:
this(in double x_, in double y_, in double z_) pure nothrow { this.x = x_; this.y = y_; this.z = z_;}
If you run the program with or without that constructor, using both ldc2 and
dmd, you see a significant performance difference.
!!!There are attachements in the bugzilla issue that have not been copied over!!!
The text was updated successfully, but these errors were encountered:
bearophile_hugs reported this on 2013-06-06T17:35:31Z
Transferred from https://issues.dlang.org/show_bug.cgi?id=10286
CC List
Description
Created attachment 1220 A small raytracer This is a benchmark program that shows the difference in run-time between a struct with and without explicit constructor: import core.stdc.stdio: printf; import core.stdc.stdlib: atoi; struct V3a { double x, y, z; this(in double x_, in double y_, in double z_) pure nothrow { this.x = x_; this.y = y_; this.z = z_; } } struct V3b { double x, y, z; } double spam1(in uint N) pure nothrow { double total = 0.0; for (uint i = 0; i < N; i++) { immutable v = V3a(i, i, i); total += v.y; } return total; } double spam2(in uint N) pure nothrow { double total = 0.0; for (uint i = 0; i < N; i++) { immutable v = V3b(i, i, i); total += v.y; } return total; } void main(in string[] args) { immutable uint N = (args.length >= 2) ? atoi((args[1] ~ '\0').ptr) : 1_000; if (args.length >= 3 && args[2] == "1") printf("%f ", spam1(N)); else printf("%f ", spam2(N)); } If you run it you see a performance difference between creating V3a and V3b. This is the asm generated by dmd (dmd 2.064alpha, -O -release -inline -noboundscheck): _D5test25spam1FNaNbxkZd: sub ESP,030h mov EDX,EAX xor ECX,ECX push EBX test EDX,EDX push ESI push EDI mov dword ptr 034h[ESP],0 mov dword ptr 038h[ESP],0 je L52 L1E: mov ESI,offset FLAT:_D5test23V3a6__initZ lea EDI,01Ch[ESP] movsd movsd movsd movsd movsd movsd xor EBX,EBX mov EAX,ECX mov 0Ch[ESP],ECX inc ECX cmp ECX,EDX mov 010h[ESP],EBX fild long64 ptr 0Ch[ESP] fstp qword ptr 024h[ESP] fld qword ptr 024h[ESP] fadd qword ptr 034h[ESP] fstp qword ptr 034h[ESP] jb L1E L52: fld qword ptr 034h[ESP] pop EDI pop ESI pop EBX add ESP,030h ret _D5test25spam2FNaNbxkZd: sub ESP,030h mov EDX,EAX xor ECX,ECX push EBX test EDX,EDX mov dword ptr 02Ch[ESP],0 mov dword ptr 030h[ESP],0 je L63 L1C: mov 4[ESP],ECX xor EBX,EBX mov EAX,ECX mov 8[ESP],EBX inc ECX cmp ECX,EDX fild long64 ptr 4[ESP] fstp qword ptr 014h[ESP] mov 4[ESP],EAX mov 8[ESP],EBX fild long64 ptr 4[ESP] fstp qword ptr 01Ch[ESP] mov 4[ESP],EAX mov 8[ESP],EBX fild long64 ptr 4[ESP] fld qword ptr 01Ch[ESP] fxch ST1 fstp qword ptr 024h[ESP] fadd qword ptr 02Ch[ESP] fstp qword ptr 02Ch[ESP] jb L1C L63: fld qword ptr 02Ch[ESP] pop EBX add ESP,030h ret One visible difference is that block of movsd: movsd movsd movsd movsd movsd movsd Compiling with ldc2 (V. 0.11.0, based on DMD v2.062 and LLVM 3.3svn, -O5 -release -profile-verifier-noassert): __D5test25spam1FNaNbxkZd: pushl %ebp movl %esp, %ebp andl $-8, %esp subl $8, %esp xorps %xmm0, %xmm0 testl %eax, %eax je LBB1_1 movsd LCPI1_0, %xmm2 xorps %xmm1, %xmm1 .align 16, 0x90 LBB1_3: addsd %xmm0, %xmm1 addsd %xmm2, %xmm0 decl %eax jne LBB1_3 jmp LBB1_4 LBB1_1: xorps %xmm1, %xmm1 LBB1_4: movsd %xmm1, (%esp) fldl (%esp) movl %ebp, %esp popl %ebp ret __D5test25spam2FNaNbxkZd: pushl %ebp movl %esp, %ebp andl $-8, %esp subl $8, %esp xorps %xmm0, %xmm0 testl %eax, %eax je LBB2_3 movsd LCPI2_0, %xmm1 xorps %xmm2, %xmm2 .align 16, 0x90 LBB2_2: addsd %xmm2, %xmm0 addsd %xmm1, %xmm2 decl %eax jne LBB2_2 LBB2_3: movsd %xmm0, (%esp) fldl (%esp) movl %ebp, %esp popl %ebp ret You see that ldc2 compiles the two functions at the same way, and indeed their run-time is the same. But the problem is not limited to DMD. As soon as your program becomes a little longer than such tiny benchmark, the performance difference between structs with constructors and struct without constructors becomes well visible even with ldc2. In attach you find a small single-module ray tracer, it contains a V3 struct. It contains a basic constructor like this: this(in double x_, in double y_, in double z_) pure nothrow { this.x = x_; this.y = y_; this.z = z_; } If you run the program with or without that constructor, using both ldc2 and dmd, you see a significant performance difference.!!!There are attachements in the bugzilla issue that have not been copied over!!!
The text was updated successfully, but these errors were encountered: