Timeout support for 'heroku run'

heroku · Jan 24, 2023 · 8fa0e45 · 8fa0e45
1 parent eaa3eff
commit 8fa0e45
Show file tree

Hide file tree

Showing 4 changed files with 88 additions and 33 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,7 @@
 - Add support for GitHub Actions env vars (https://github.com/heroku/hatchet/pull/189)
 - Change HerokuRun#call to use exceptions for empty output retries in preparation of related work
 - Refactor run_shell! to use Open3.popen3 in preparation for timeout handling
+- Timeout support for 'heroku run' (in case of networking, hanging or "server boot" tests etc), defaults to 60 s, global override `$HATCHET_DEFAULT_RUN_TIMEOUT`, per-test option `:timeout`
 
 ## 7.4.0
 

diff --git a/lib/hatchet/app.rb b/lib/hatchet/app.rb
@@ -194,7 +194,8 @@ def run(cmd_type, command = DefaultCommand, options = {}, &block)
         app: self,
         retry_on_empty: options.fetch(:retry_on_empty, !ENV["HATCHET_DISABLE_EMPTY_RUN_RETRY"]),
         heroku: options[:heroku],
-        raw: options[:raw]
+        raw: options[:raw],
+        timeout: options.fetch(:timeout, (ENV["HATCHET_DEFAULT_RUN_TIMEOUT"] || 60).to_i)
       ).call
 
       return run_obj.output
@@ -248,7 +249,8 @@ def run_multi(command, options = {}, &block)
           app: self,
           retry_on_empty: options.fetch(:retry_on_empty, !ENV["HATCHET_DISABLE_EMPTY_RUN_RETRY"]),
           heroku: options[:heroku],
-          raw: options[:raw]
+          raw: options[:raw],
+          timeout: options.fetch(:timeout, (ENV["HATCHET_DEFAULT_RUN_TIMEOUT"] || 60).to_i)
         ).call
 
         yield run_obj.output, run_obj.status

diff --git a/lib/hatchet/heroku_run.rb b/lib/hatchet/heroku_run.rb
@@ -1,19 +1,33 @@
 require 'open3'
+require 'timeout'
 
 module Hatchet
   class BashResult
-    attr_reader :stdout, :stderr, :status
+    attr_reader :stdout, :stderr, :status, :status_obj
 
     def initialize(stdout:, stderr:, status:, set_global_status: false)
       @stdout = stdout
       @stderr = stderr
-      @status = status.respond_to?(:exitstatus) ? status.exitstatus : status.to_i
-      `exit #{@status}` if set_global_status
+      @status_obj = status
+      @status = @status_obj.exitstatus
+      return unless set_global_status
+      # we're now populating `$?` by hand for any callers that rely on that
+      if @status_obj.signaled?
+        # termination of 'heroku run' from our TERM signal
+        # a signaled program will not have an exit status
+        # the shell just represents that case as 128+$signal, so e.g. 128+15=143 for SIGTERM
+        # the correct behavior for a program is to terminate itself using the signal it received
+        # this will also produce the correct $? contents (signaled? again, termsig set, no exitstatus)
+        `kill -#{@status_obj.termsig} $$`
+      else
+        # the dyno exited and the CLI is reporting that back as an exit status
+        `exit #{@status}`
+      end
     end
 
     # @return [Boolean]
     def success?
-      @status == 0
+      @status_obj.success?
     end
 
     def failed?
@@ -49,6 +63,7 @@ def match(value)
   #
   class HerokuRun
     class HerokuRunEmptyOutputError < RuntimeError; end
+    class HerokuRunTimeoutError < RuntimeError; end
 
     attr_reader :command
 
@@ -58,15 +73,19 @@ def initialize(
       heroku: {},
       retry_on_empty: !ENV["HATCHET_DISABLE_EMPTY_RUN_RETRY"],
       raw: false,
-      stderr: $stderr)
+      stderr: $stderr,
+      timeout: 0)
 
       @raw = raw
       @app = app
+      @timeout = timeout
       @command = build_heroku_command(command, heroku || {})
       @retry_on_empty = retry_on_empty
       @stderr = stderr
       @result = nil
+      @dyno_id = nil
       @empty_fail_count = 0
+      @timeout_fail_count = 0
     end
 
     def result
@@ -89,14 +108,27 @@ def call
       rescue HerokuRunEmptyOutputError => e
         if @retry_on_empty and @empty_fail_count < 3
           @empty_fail_count += 1
-          message = String.new("Empty output from command #{@command}, retrying the command.")
+          message = String.new("Empty output from run #{@dyno_id} with command #{@command}, retrying...")
           message << "\nTo disable pass in `retry_on_empty: false` or set HATCHET_DISABLE_EMPTY_RUN_RETRY=1 globally"
           message << "\nfailed_count: #{@empty_fail_count}"
           message << "\nreleases: #{@app.releases}"
           message << "\n#{caller.join("\n")}"
           @stderr.puts message
           retry
         end
+      rescue HerokuRunTimeoutError => e
+        if @timeout_fail_count < 3
+          @timeout_fail_count += 1
+          message = String.new("Run #{@dyno_id} with command #{@command} timed out after #{@timeout} seconds, retrying...")
+          message << "\nstderr until moment of termination was: #{@result.stderr}"
+          message << "\nstdout until moment of termination was: #{@result.stdout}"
+          message << "\nTo disable pass in `timeout: 0` or set HATCHET_DEFAULT_RUN_TIMEOUT=0 globally"
+          message << "\nfailed_count: #{@timeout_fail_count}"
+          message << "\nreleases: #{@app.releases}"
+          message << "\n#{caller.join("\n")}"
+          @stderr.puts message
+          retry
+        end
       end
 
       self
@@ -106,34 +138,46 @@ def call
       ShellThrottle.new(platform_api: @app.platform_api).call do |throttle|
         run_shell!
         throw(:throttle) if @result.stderr.match?(/reached the API rate limit/)
+        raise HerokuRunTimeoutError if @result.status_obj.signaled? # program got terminated by our SIGTERM, raise
         raise HerokuRunEmptyOutputError if @result.stdout.empty?
       end
     end
 
     private def run_shell!
       r_stdout = ""
       r_stderr = ""
+      r_status = nil
+      @dyno_id = nil
       Open3.popen3(@command) do |stdin, stdout, stderr, wait_thread|
-        Thread.new do
-          begin
-            until stdout.eof? do
-              r_stdout += stdout.gets
+        begin
+          Timeout.timeout(@timeout) do
+            Thread.new do
+              begin
+                until stdout.eof? do
+                  r_stdout += stdout.gets
+                end
+              rescue IOError # eof? and gets race condition
+              end
             end
-          rescue IOError # eof? and gets race condition
-          end
-        end
-        Thread.new do
-          begin
-            until stderr.eof? do
-              r_stderr += stderr.gets
+            Thread.new do
+              begin
+                until stderr.eof? do
+                  r_stderr += line = stderr.gets
+                  if !@dyno_id and run = line.match(/, (run\.\d+)/)
+                    @dyno_id = run.captures.first
+                  end
+                end
+              rescue IOError # eof? and gets race condition
+              end
             end
-          rescue IOError # eof? and gets race condition
           end
+          r_status = wait_thread.value # wait for termination
+        rescue Timeout::Error
+          Process.kill("TERM", wait_thread.pid)
+          r_status = wait_thread.value # wait for termination
         end
-        r_status = wait_thread.value # wait for termination
       end
       @result = BashResult.new(stdout: r_stdout, stderr: r_stderr, status: r_status, set_global_status: true)
-      # FIXME: usage of $? does not allow a test to distinguish between a TERM of the 'heroku run' itself, or inside the dyno
       @status = $?
     end
 

diff --git a/spec/unit/heroku_run_spec.rb b/spec/unit/heroku_run_spec.rb
@@ -50,23 +50,27 @@ def app.name; "fake_app"; end
       run_obj = Hatchet::HerokuRun.new("ruby -v", app: @app, stderr: stderr)
 
       def run_obj.run_shell!
-        @result = Hatchet::BashResult.new(stdout: "", stderr: "", status: 1)
-        @status = Object.new
+        # to populate $? with a correct status variable
+        `exit 1`
+        @result = Hatchet::BashResult.new(stdout: "", stderr: "", status: $?)
+        @status = $?
       end
 
       run_obj.call
 
       expect(run_obj.instance_variable_get(:@empty_fail_count)).to eq(3)
-      expect(stderr.string).to include("retrying the command.")
+      expect(stderr.string).to include("retrying...")
     end
 
     it "retries 0 times on NON empty result" do
       stderr = StringIO.new
       run_obj = Hatchet::HerokuRun.new("ruby -v", app: @app, stderr: stderr)
 
       def run_obj.run_shell!
-        @result = Hatchet::BashResult.new(stdout: "not empty", stderr: "", status: 1)
-        @status = Object.new
+        # to populate $? with a correct status variable
+        `exit 1`
+        @result = Hatchet::BashResult.new(stdout: "not empty", stderr: "", status: $?)
+        @status = $?
       end
 
       run_obj.call
@@ -80,14 +84,16 @@ def run_obj.run_shell!
       run_obj = Hatchet::HerokuRun.new("ruby -v", app: @app, stderr: stderr, retry_on_empty: false)
 
       def run_obj.run_shell!
-        @result = Hatchet::BashResult.new(stdout: "", stderr: "", status: 1)
-        @status = Object.new
+        # to populate $? with a correct status variable
+        `exit 1`
+        @result = Hatchet::BashResult.new(stdout: "", stderr: "", status: $?)
+        @status = $?
       end
 
       run_obj.call
 
       expect(run_obj.instance_variable_get(:@empty_fail_count)).to eq(0)
-      expect(stderr.string).to_not include("retrying the command.")
+      expect(stderr.string).to_not include("retrying...")
     end
 
     it "retries work when message is delivered via stderr" do
@@ -116,14 +122,16 @@ def run_obj.run_shell!
         run_obj = Hatchet::HerokuRun.new("ruby -v", app: @app, stderr: stderr)
 
         def run_obj.run_shell!
-          @result = Hatchet::BashResult.new(stdout: "", stderr: "", status: 1)
-          @status = Object.new
+          # to populate $? with a correct status variable
+          `exit 1`
+          @result = Hatchet::BashResult.new(stdout: "", stderr: "", status: $?)
+          @status = $?
         end
 
         run_obj.call
 
         expect(run_obj.instance_variable_get(:@empty_fail_count)).to eq(0)
-        expect(stderr.string).to_not include("retrying the command.")
+        expect(stderr.string).to_not include("retrying...")
       ensure
         ENV["HATCHET_DISABLE_EMPTY_RUN_RETRY"] = original_env
       end