-
-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Commit
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -248,29 +248,20 @@ module Crystal | |
|
||
private def codegen(program, units : Array(CompilationUnit), lib_flags, output_filename, output_dir) | ||
object_names = units.map &.object_filename | ||
multithreaded = LLVM.start_multithreaded | ||
|
||
# First write bitcodes: it breaks if we paralellize it | ||
unless multithreaded | ||
Crystal.timing("Codegen (crystal)", @stats) do | ||
units.each &.write_bitcode | ||
end | ||
end | ||
|
||
msg = multithreaded ? "Codegen (bc+obj)" : "Codegen (obj)" | ||
target_triple = target_machine.triple | ||
|
||
Crystal.timing(msg, @stats) do | ||
Crystal.timing("Codegen (bc+obj)", @stats) do | ||
if units.size == 1 | ||
first_unit = units.first | ||
|
||
codegen_single_unit(program, first_unit, target_triple, multithreaded) | ||
codegen_single_unit(program, first_unit, target_triple) | ||
|
||
if emit = @emit | ||
first_unit.emit(emit, emit_base_filename || output_filename) | ||
end | ||
else | ||
codegen_many_units(program, units, target_triple, multithreaded) | ||
codegen_many_units(program, units, target_triple) | ||
end | ||
end | ||
|
||
|
@@ -288,37 +279,28 @@ module Crystal | |
end | ||
end | ||
|
||
private def codegen_many_units(program, units, target_triple, multithreaded) | ||
private def codegen_many_units(program, units, target_triple) | ||
jobs_count = 0 | ||
wait_channel = Channel(Nil).new(@n_threads) | ||
|
||
while unit = units.pop? | ||
fork_and_codegen_single_unit(program, unit, target_triple, multithreaded, wait_channel) | ||
units.each_slice(Math.max(units.size / @n_threads, 1)) do |slice| | ||
jobs_count += 1 | ||
|
||
if jobs_count >= @n_threads | ||
wait_channel.receive | ||
jobs_count -= 1 | ||
spawn do | ||
codegen_process = fork do | ||
slice.each do |unit| | ||
codegen_single_unit(program, unit, target_triple) | ||
end | ||
end | ||
codegen_process.wait | ||
wait_channel.send nil | ||
end | ||
end | ||
|
||
while jobs_count > 0 | ||
wait_channel.receive | ||
jobs_count -= 1 | ||
end | ||
end | ||
|
||
private def fork_and_codegen_single_unit(program, unit, target_triple, multithreaded, wait_channel) | ||
spawn do | ||
codegen_process = fork { codegen_single_unit(program, unit, target_triple, multithreaded) } | ||
codegen_process.wait | ||
wait_channel.send nil | ||
end | ||
jobs_count.times { wait_channel.receive } | ||
end | ||
|
||
private def codegen_single_unit(program, unit, target_triple, multithreaded) | ||
private def codegen_single_unit(program, unit, target_triple) | ||
unit.llvm_mod.target = target_triple | ||
unit.write_bitcode if multithreaded | ||
unit.compile | ||
end | ||
|
||
|
@@ -420,16 +402,11 @@ module Crystal | |
end | ||
end | ||
|
||
def write_bitcode | ||
llvm_mod.write_bitcode(bc_name_new) | ||
end | ||
|
||
def compile | ||
bc_name = self.bc_name | ||
bc_name_new = self.bc_name_new | ||
object_name = self.object_name | ||
|
||
must_compile = true | ||
memory_buffer = llvm_mod.write_bitcode | ||
|
||
# To compile a file we first generate a `.bc` file and then | ||
# create an object file from it. These `.bc` files are stored | ||
|
@@ -442,18 +419,26 @@ module Crystal | |
# the `.o` file will also be the same, so we simply reuse the | ||
# old one. Generating an `.o` file is what takes most time. | ||
if !compiler.emit && !@bc_flags_changed && File.exists?(bc_name) && File.exists?(object_name) | ||
if FileUtils.cmp(bc_name, bc_name_new) | ||
# If the user cancelled a previous compilation it might be that | ||
# the .o file is empty | ||
if File.size(object_name) > 0 | ||
File.delete bc_name_new | ||
must_compile = false | ||
end | ||
memory_io = IO::Memory.new(memory_buffer.to_slice) | ||
changed = File.open(bc_name) { |bc_file| !FileUtils.cmp(bc_file, memory_io) } | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong.
asterite
Author
Member
|
||
|
||
# If the user cancelled a previous compilation | ||
# it might be that the .o file is empty | ||
if !changed && File.size(object_name) > 0 | ||
# We can skip compilation | ||
memory_buffer.dispose | ||
memory_buffer = nil | ||
else | ||
# We need to compile, so we'll write the memory buffer to file | ||
end | ||
end | ||
|
||
if must_compile | ||
File.rename(bc_name_new, bc_name) | ||
# If there's a memory buffer, it means we must create a .o from it | ||
if memory_buffer | ||
# Create the .bc file (for next compilations) | ||
File.write(bc_name, memory_buffer.to_slice) | ||
memory_buffer.dispose | ||
|
||
compiler.optimize llvm_mod if compiler.release? | ||
compiler.target_machine.emit_obj_to_file llvm_mod, object_name | ||
end | ||
|
@@ -492,10 +477,6 @@ module Crystal | |
"#{@output_dir}/#{@name}.bc" | ||
end | ||
|
||
def bc_name_new | ||
"#{@output_dir}/#{@name}.new.bc" | ||
end | ||
|
||
def ll_name | ||
"#{@output_dir}/#{@name}.ll" | ||
end | ||
|
6 comments
on commit c792139
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
From what i've seen from 5 times of compiling Kemal this brings great improvements after first compile.
0.20.1
crystal build --release --stats src/kemal.cr 3.59s user 0.15s system 101% cpu 3.683 total
Master
/crystal/bin/crystal build --release --stats 0.83s user 0.14s system 106% cpu
π
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@sdogruyol Cool! What if you compile many times without --release
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@luislavena Nice! Is this on linux or mac? I expect performance on linux to improve a lot more than in mac, mostly because now only 8 forks are done instead of thousands.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@asterite Linux (4.2.0 x86_64) (sorry for delayed response π)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Some quick compilations of Crystal (master) on Linux (Ubuntu 14.04, x86_64) without optimizations:
# crystal 0.20.1 (empty cache):
real 0m44.628s
user 0m36.692s
sys 0m42.833s
# master (empty cache):
real 0m30.164s
user 0m48.214s
sys 0m2.035s
# master (cache):
real 0m19.821s
user 0m22.709s
sys 0m1.577s
I note that sys
time goes from above 40s to below 2s! Here go the slow Linux forks? With a cache, compiling Crystal gets down from 44s to 20s, which is more than twice faster, overall. Good job!
Would it be better to store a checksum in the filename, or other metadata, then simply checksum the module in-memory? I imagine it would be faster.