diff --git a/app/shared/github_handler.rb b/app/shared/github_handler.rb index e8bc4a1a..11dc23d6 100644 --- a/app/shared/github_handler.rb +++ b/app/shared/github_handler.rb @@ -1,4 +1,5 @@ require "octokit" +require "faraday" require_relative "logging_module" # TODO: eventually we'll want to consider handling all these: @@ -40,33 +41,59 @@ def self.included(klass) end def github_action(client, &block) - # `retry` retains the variables through iterations so we assign to 0 the first time. - retry_count ||= 0 if client.kind_of?(Octokit::Client) + # `rate_limit_retry_count` retains the variables through iterations so we assign to 0 the first time. + rate_limit_retry_count ||= 0 begin if client.rate_limit!.remaining.zero? - sleep_time = client.rate_limit!.resets_in - logger.debug("Rate Limit exceeded, sleeping for #{sleep_time} seconds") - sleep(sleep_time) + rate_limit_reset_time_length = client.rate_limit!.resets_in + logger.debug("Rate Limit exceeded, sleeping for #{rate_limit_reset_time_length} seconds") + sleep(rate_limit_reset_time_length) end - rescue Octokit::TooManyRequests => ex - logger.error(ex) - raise ex rescue Octokit::Unauthorized => ex # Maybe the token does not give access to rate limits. + logger.error("Your GitHub Personal Auth Token is not unauthorized to check the rate_limit") logger.error(ex) + # We want to die now, since this is a server config issue + # Ultimately, this shouldn't kill the server, but rather, send a notification + # TODO: accomplish the above ^ + raise ex + rescue Octokit::ServerError, Octokit::TooManyRequests, Faraday::ConnectionFailed => ex + if (rate_limit_retry_count += 1) < 5 + rate_limit_sleep_length = 2**rate_limit_retry_count + logger.debug("Unable to get rate limit, sleeping for #{rate_limit_sleep_length} seconds and retrying") + logger.debug(ex) + sleep(rate_limit_sleep_length) + retry + end + logger.debug("Unable to get rate limit after retrying multiple time, failing") + # Ultimately, this shouldn't kill the server, but rather, send a notification + # TODO: accomplish the above ^ + raise ex end end + + # `retry_count` retains the variables through iterations so we assign to 0 the first time. + retry_count ||= 0 begin return block.call(client) - rescue Octokit::ServerError => ex + rescue Octokit::ServerError, Octokit::TooManyRequests, Faraday::ConnectionFailed => ex if (retry_count += 1) < 5 # exponential backoff sleep_length = 2**retry_count logger.debug("A GitHub action failed, sleeping for #{sleep_length} seconds and retrying") + logger.debug(ex) sleep(sleep_length) retry end - + logger.debug("Unable to perform GitHub action after retrying multiple time, failing") + # Ultimately, this shouldn't kill the server, but rather, send a notification + # TODO: accomplish the above ^ + raise ex + rescue Octokit::Unauthorized => ex # Maybe the token does not give access to rate limits. + logger.error("Your GitHub Personal Auth Token is unauthorized to perform the github action") + logger.error(ex) + # Ultimately, this shouldn't kill the server, but rather, send a notification + # TODO: accomplish the above ^ raise ex end end diff --git a/app/workers/worker_base.rb b/app/workers/worker_base.rb index 78c75f25..9c88bf52 100644 --- a/app/workers/worker_base.rb +++ b/app/workers/worker_base.rb @@ -17,23 +17,39 @@ def thread_id end def initialize + # TODO: do we need a thread here to do the work or does `scheduler.schedule` handle that? @thread = Thread.new do begin # We have the `work` inside a `begin rescue` # so that if something fails, the thread still is alive scheduler.schedule do - work - # If we're running in debug mode, don't run these things continuously - if ENV["FASTLANE_CI_THREAD_DEBUG_MODE"] - logger.debug("Stopping worker after this work unit") - die! + begin + # This can cause an exception, in production mode, we don't re-raise the exception + # in development mode, we re-raise so we can catch it and understand how to handle it + work + # If we're running in debug mode, don't run these things continuously + if ENV["FASTLANE_CI_THREAD_DEBUG_MODE"] + logger.debug("Stopping worker after this work unit") + die! + end + rescue StandardError => ex + logger.error("[#{self.class} Exception], work unit caused exception: #{ex}: ") + logger.error(ex.backtrace.join("\n")) + if Thread.abort_on_exception == true + logger.error("[#{self.class}] Thread.abort_on_exception is `true`, killing task re-raising exception") + die! + raise ex + end end end rescue StandardError => ex - logger.error("[#{self.class} Exception]: #{ex}: ") + logger.error("Worker scheduler had a problem") logger.error(ex.backtrace.join("\n")) - logger.error("[#{self.class}] Killing thread #{thread_id} due to exception\n") - die! + if Thread.abort_on_exception == true + logger.error("[#{self.class}] Thread.abort_on_exception is `true`, killing task and re-raising exception") + die! + raise ex + end end end end