From 4f5edf9a7ca5a341528e8ab785ce515e2e95fce9 Mon Sep 17 00:00:00 2001 From: Erez Freiberger Date: Thu, 30 Mar 2017 16:29:57 +0300 Subject: [PATCH] container ssa: use readiness prob Instead of accessing the /healthz endpoint we can use kubernetes's readinessProbe to do this for us and use the standard API instead of the pod proxy which is less documented. This also simplifies the code. --- .../container_manager/scanning/job.rb | 54 ++++++++++--------- .../container_manager/scanning/job_spec.rb | 31 ++++++----- .../container_manager/scanning/job.yml | 41 -------------- 3 files changed, 45 insertions(+), 81 deletions(-) delete mode 100644 spec/vcr_cassettes/manageiq/providers/kubernetes/container_manager/scanning/job.yml diff --git a/app/models/manageiq/providers/kubernetes/container_manager/scanning/job.rb b/app/models/manageiq/providers/kubernetes/container_manager/scanning/job.rb index f89659c1ab4..e4e42628efd 100644 --- a/app/models/manageiq/providers/kubernetes/container_manager/scanning/job.rb +++ b/app/models/manageiq/providers/kubernetes/container_manager/scanning/job.rb @@ -80,34 +80,32 @@ def start queue_signal(:pod_wait) end + def poll_pod_wait + queue_signal(:pod_wait, :deliver_on => POD_POLL_INTERVAL.seconds.from_now.utc) + end + def pod_wait _log.info("waiting for pod #{pod_full_name} to be available") - client = kubernetes_client - health_url = pod_proxy_url(client, INSPECTOR_HEALTH_PATH) - http_options = { - :use_ssl => health_url.scheme == 'https', - :verify_mode => ext_management_system.verify_ssl_mode, - :cert_store => ext_management_system.ssl_cert_store, - } - - # TODO: move this to a more appropriate place (lib) - response = pod_health_poll(client, health_url, http_options) - - case response - when Net::HTTPOK + begin + statuses = kubernetes_client.get_pod(options[:pod_name], options[:pod_namespace])[:status].try(:containerStatuses) + unless statuses + _log.info("No containerStatuses for pod #{options[:pod_name]}") + return poll_pod_wait + end + ready = statuses[0][:ready] + rescue SocketError, KubeException => e + msg = "unknown access error to pod #{pod_full_name}: [#{e.message}]" + _log.info(msg) + return queue_signal(:abort_job, msg, "error") + end + if ready _log.info("pod #{pod_full_name} is ready and accessible") - queue_signal(:analyze) - when Net::HTTPServiceUnavailable - # TODO: check that the pod wasn't terminated (exit code) - # continue: pod is still not up and running - _log.info("pod #{pod_full_name} is not available") - queue_signal(:pod_wait, - :deliver_on => POD_POLL_INTERVAL.seconds.from_now.utc) + return queue_signal(:analyze) else - msg = "unknown access error to pod #{pod_full_name}: #{response}" - _log.info(msg) - queue_signal(:abort_job, msg, "error") + # continue: pod is still not up and running + _log.info("pod #{pod_full_name} is not ready") + return poll_pod_wait end end @@ -394,7 +392,15 @@ def pod_definition(inspector_admin_secret_name) :name => "docker-socket" } ], - :env => inspector_proxy_env_variables + :env => inspector_proxy_env_variables, + :readinessProbe => { + "initialDelaySeconds" => 15, + "periodSeconds" => 5, + "httpGet" => { + "path" => "/healthz", + "port" => options[:pod_port] + } + } } ], :volumes => [ diff --git a/spec/models/manageiq/providers/kubernetes/container_manager/scanning/job_spec.rb b/spec/models/manageiq/providers/kubernetes/container_manager/scanning/job_spec.rb index f2cf98904b5..2394173e875 100644 --- a/spec/models/manageiq/providers/kubernetes/container_manager/scanning/job_spec.rb +++ b/spec/models/manageiq/providers/kubernetes/container_manager/scanning/job_spec.rb @@ -19,6 +19,11 @@ def get_pod(*_args) :annotations => { 'manageiq.org/jobid' => '5' } + }, + :status => { + :containerStatuses => [ + { :ready => true }, + ] } ) end @@ -137,10 +142,8 @@ def fetch_oscap_arf before(:each) do allow_any_instance_of(described_class).to receive_messages(:collect_compliance_data) unless OpenscapResult.openscap_available? - VCR.use_cassette(described_class.name.underscore, :record => :none) do # needed for health check - expect(@job.state).to eq 'waiting_to_start' - @job.signal(:start) - end + expect(@job.state).to eq 'waiting_to_start' + @job.signal(:start) end it 'should report success' do @@ -258,21 +261,17 @@ def fetch_oscap_arf allow_any_instance_of(described_class).to receive_messages(:collect_compliance_data) unless OpenscapResult.openscap_available? allow_any_instance_of(described_class).to receive_messages( :image_inspector_client => MockImageInspectorClient.new(MODIFIED_IMAGE_ID, IMAGE_ID)) - VCR.use_cassette(described_class.name.underscore, :record => :none) do # needed for health check - @job.signal(:start) - expect(@job.state).to eq 'finished' - expect(@job.status).to eq 'ok' - end + @job.signal(:start) + expect(@job.state).to eq 'finished' + expect(@job.status).to eq 'ok' end it 'should report the error' do - VCR.use_cassette(described_class.name.underscore, :record => :none) do # needed for health check - @job.signal(:start) - expect(@job.state).to eq 'finished' - expect(@job.status).to eq 'error' - expect(@job.message).to eq "cannot analyze image #{IMAGE_NAME} with id #{IMAGE_ID[0..11]}:"\ - " detected ids were #{MODIFIED_IMAGE_ID[0..11]}" - end + @job.signal(:start) + expect(@job.state).to eq 'finished' + expect(@job.status).to eq 'error' + expect(@job.message).to eq "cannot analyze image #{IMAGE_NAME} with id #{IMAGE_ID[0..11]}:"\ + " detected ids were #{MODIFIED_IMAGE_ID[0..11]}" end end diff --git a/spec/vcr_cassettes/manageiq/providers/kubernetes/container_manager/scanning/job.yml b/spec/vcr_cassettes/manageiq/providers/kubernetes/container_manager/scanning/job.yml deleted file mode 100644 index d0782432a40..00000000000 --- a/spec/vcr_cassettes/manageiq/providers/kubernetes/container_manager/scanning/job.yml +++ /dev/null @@ -1,41 +0,0 @@ ---- -http_interactions: -- request: - method: get - uri: https://test.com/healthz - body: - encoding: US-ASCII - string: '' - headers: - Accept-Encoding: - - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 - Accept: - - "*/*" - User-Agent: - - Ruby - response: - status: - code: 200 - message: Moved Temporarily - headers: - Server: - - nginx/1.7.12 - Date: - - Mon, 07 Dec 2015 14:16:18 GMT - Content-Type: - - text/html - Content-Length: - - '161' - Connection: - - keep-alive - Keep-Alive: - - timeout=20 - Location: - - https://www.test.com/ - body: - encoding: UTF-8 - string: "\r\n302 Found\r\n\r\n

302 - Found

\r\n
nginx/1.7.12
\r\n\r\n\r\n" - http_version: - recorded_at: Mon, 07 Dec 2015 14:16:18 GMT -recorded_with: VCR 2.9.3