Compiler: optimize codegen performance

crystal-lang · Dec 16, 2016 · c792139 · RX14 · Dec 16, 2016 · asterite
1 parent 0ef2c6f
commit c792139
Showing 1 changed file with 33 additions and 52 deletions.
diff --git a/src/compiler/crystal/compiler.cr b/src/compiler/crystal/compiler.cr
@@ -248,29 +248,20 @@ module Crystal
 
     private def codegen(program, units : Array(CompilationUnit), lib_flags, output_filename, output_dir)
       object_names = units.map &.object_filename
-      multithreaded = LLVM.start_multithreaded
 
-      # First write bitcodes: it breaks if we paralellize it
-      unless multithreaded
-        Crystal.timing("Codegen (crystal)", @stats) do
-          units.each &.write_bitcode
-        end
-      end
-
-      msg = multithreaded ? "Codegen (bc+obj)" : "Codegen (obj)"
       target_triple = target_machine.triple
 
-      Crystal.timing(msg, @stats) do
+      Crystal.timing("Codegen (bc+obj)", @stats) do
         if units.size == 1
           first_unit = units.first
 
-          codegen_single_unit(program, first_unit, target_triple, multithreaded)
+          codegen_single_unit(program, first_unit, target_triple)
 
           if emit = @emit
             first_unit.emit(emit, emit_base_filename || output_filename)
           end
         else
-          codegen_many_units(program, units, target_triple, multithreaded)
+          codegen_many_units(program, units, target_triple)
         end
       end
 
@@ -288,37 +279,28 @@ module Crystal
       end
     end
 
-    private def codegen_many_units(program, units, target_triple, multithreaded)
+    private def codegen_many_units(program, units, target_triple)
       jobs_count = 0
       wait_channel = Channel(Nil).new(@n_threads)
 
-      while unit = units.pop?
-        fork_and_codegen_single_unit(program, unit, target_triple, multithreaded, wait_channel)
+      units.each_slice(Math.max(units.size / @n_threads, 1)) do |slice|
         jobs_count += 1
-
-        if jobs_count >= @n_threads
-          wait_channel.receive
-          jobs_count -= 1
+        spawn do
+          codegen_process = fork do
+            slice.each do |unit|
+              codegen_single_unit(program, unit, target_triple)
+            end
+          end
+          codegen_process.wait
+          wait_channel.send nil
         end
       end
 
-      while jobs_count > 0
-        wait_channel.receive
-        jobs_count -= 1
-      end
-    end
-
-    private def fork_and_codegen_single_unit(program, unit, target_triple, multithreaded, wait_channel)
-      spawn do
-        codegen_process = fork { codegen_single_unit(program, unit, target_triple, multithreaded) }
-        codegen_process.wait
-        wait_channel.send nil
-      end
+      jobs_count.times { wait_channel.receive }
     end
 
-    private def codegen_single_unit(program, unit, target_triple, multithreaded)
+    private def codegen_single_unit(program, unit, target_triple)
       unit.llvm_mod.target = target_triple
-      unit.write_bitcode if multithreaded
       unit.compile
     end
 
@@ -420,16 +402,11 @@ module Crystal
         end
       end
 
-      def write_bitcode
-        llvm_mod.write_bitcode(bc_name_new)
-      end
-
       def compile
         bc_name = self.bc_name
-        bc_name_new = self.bc_name_new
         object_name = self.object_name
 
-        must_compile = true
+        memory_buffer = llvm_mod.write_bitcode
 
         # To compile a file we first generate a `.bc` file and then
         # create an object file from it. These `.bc` files are stored
@@ -442,18 +419,26 @@ module Crystal
         # the `.o` file will also be the same, so we simply reuse the
         # old one. Generating an `.o` file is what takes most time.
         if !compiler.emit && !@bc_flags_changed && File.exists?(bc_name) && File.exists?(object_name)
-          if FileUtils.cmp(bc_name, bc_name_new)
-            # If the user cancelled a previous compilation it might be that
-            # the .o file is empty
-            if File.size(object_name) > 0
-              File.delete bc_name_new
-              must_compile = false
-            end
+          memory_io = IO::Memory.new(memory_buffer.to_slice)
+          changed = File.open(bc_name) { |bc_file| !FileUtils.cmp(bc_file, memory_io) }
+
+          # If the user cancelled a previous compilation
+          # it might be that the .o file is empty
+          if !changed && File.size(object_name) > 0
+            # We can skip compilation
+            memory_buffer.dispose
+            memory_buffer = nil
+          else
+            # We need to compile, so we'll write the memory buffer to file
           end
         end
 
-        if must_compile
-          File.rename(bc_name_new, bc_name)
+        # If there's a memory buffer, it means we must create a .o from it
+        if memory_buffer
+          # Create the .bc file (for next compilations)
+          File.write(bc_name, memory_buffer.to_slice)
+          memory_buffer.dispose
+
           compiler.optimize llvm_mod if compiler.release?
           compiler.target_machine.emit_obj_to_file llvm_mod, object_name
         end
@@ -492,10 +477,6 @@ module Crystal
         "#{@output_dir}/#{@name}.bc"
       end
 
-      def bc_name_new
-        "#{@output_dir}/#{@name}.new.bc"
-      end
-
       def ll_name
         "#{@output_dir}/#{@name}.ll"
       end