From 7c352e2d1a1f92855bcaf27c50be171eb20bafde Mon Sep 17 00:00:00 2001 From: Anatolii Bazko Date: Wed, 21 Oct 2020 10:02:24 +0300 Subject: [PATCH] feat: Detects issues with downloading images and starting containers (#908) * Detect issues while deploying Eclipse Che Signed-off-by: Anatolii Bazko * Revert .gitignore Signed-off-by: Anatolii Bazko * Fixed remarks Signed-off-by: Anatolii Bazko * Fix importing Signed-off-by: Anatolii Bazko --- README.md | 2 +- src/api/kube.ts | 76 ++++++++-------- src/tasks/che.ts | 17 ++-- src/tasks/installers/operator.ts | 8 ++ src/tasks/kube.ts | 147 ++++++++++++++++++++++++++----- test/api/kube.test.ts | 53 +---------- 6 files changed, 177 insertions(+), 126 deletions(-) diff --git a/README.md b/README.md index c413b6489..de2f128c1 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ If you're using linux or macOS, here is how to install chectl by using one singl ``` $ bash <(curl -sL https://www.eclipse.org/che/chectl/) ``` - + - For `next` channel: ``` $ bash <(curl -sL https://www.eclipse.org/che/chectl/) --channel=next diff --git a/src/api/kube.ts b/src/api/kube.ts index e7108bff5..56ba9ee0d 100644 --- a/src/api/kube.ts +++ b/src/api/kube.ts @@ -8,7 +8,7 @@ * SPDX-License-Identifier: EPL-2.0 **********************************************************************/ -import { ApiextensionsV1beta1Api, ApisApi, AppsV1Api, AuthorizationV1Api, BatchV1Api, CoreV1Api, CustomObjectsApi, ExtensionsV1beta1Api, ExtensionsV1beta1IngressList, KubeConfig, Log, PortForward, RbacAuthorizationV1Api, V1beta1CustomResourceDefinition, V1ClusterRole, V1ClusterRoleBinding, V1ConfigMap, V1ConfigMapEnvSource, V1Container, V1Deployment, V1DeploymentList, V1DeploymentSpec, V1EnvFromSource, V1Job, V1JobSpec, V1LabelSelector, V1NamespaceList, V1ObjectMeta, V1PersistentVolumeClaimList, V1Pod, V1PodList, V1PodSpec, V1PodTemplateSpec, V1PolicyRule, V1Role, V1RoleBinding, V1RoleRef, V1Secret, V1SelfSubjectAccessReview, V1SelfSubjectAccessReviewSpec, V1Service, V1ServiceAccount, V1ServiceList, V1Subject, Watch } from '@kubernetes/client-node' +import { ApiextensionsV1beta1Api, ApisApi, AppsV1Api, AuthorizationV1Api, BatchV1Api, CoreV1Api, CustomObjectsApi, ExtensionsV1beta1Api, ExtensionsV1beta1IngressList, KubeConfig, Log, PortForward, RbacAuthorizationV1Api, V1beta1CustomResourceDefinition, V1ClusterRole, V1ClusterRoleBinding, V1ConfigMap, V1ConfigMapEnvSource, V1Container, V1ContainerStateWaiting, V1Deployment, V1DeploymentList, V1DeploymentSpec, V1EnvFromSource, V1Job, V1JobSpec, V1LabelSelector, V1NamespaceList, V1ObjectMeta, V1PersistentVolumeClaimList, V1Pod, V1PodCondition, V1PodList, V1PodSpec, V1PodTemplateSpec, V1PolicyRule, V1Role, V1RoleBinding, V1RoleRef, V1Secret, V1SelfSubjectAccessReview, V1SelfSubjectAccessReviewSpec, V1Service, V1ServiceAccount, V1ServiceList, V1Subject, Watch } from '@kubernetes/client-node' import { Cluster, Context } from '@kubernetes/client-node/dist/config_types' import axios, { AxiosRequestConfig } from 'axios' import { cli } from 'cli-ux' @@ -348,7 +348,7 @@ export class KubeHelper { } } - async getPodListByLabel(namespace= '', labelSelector: string): Promise { + async getPodListByLabel(namespace: string, labelSelector: string): Promise { const k8sCoreApi = KubeHelper.KUBE_CONFIG.makeApiClient(CoreV1Api) try { const { body: podList } = await k8sCoreApi.listNamespacedPod(namespace, undefined, undefined, undefined, undefined, labelSelector) @@ -659,24 +659,51 @@ export class KubeHelper { return (res.body.items.length > 0) } - async getPodPhase(labelSelector: string, namespace = ''): Promise { + /** + * Returns pod waiting state. + */ + async getPodWaitingState(namespace: string, selector: string, desiredPhase: string): Promise { + const pods = await this.getPodListByLabel(namespace, selector) + if (!pods.length) { + return + } + + for (const pod of pods) { + if (pod.status && pod.status.phase === desiredPhase && pod.status.containerStatuses) { + for (const status of pod.status.containerStatuses) { + if (status.state && status.state.waiting && status.state.waiting.message && status.state.waiting.reason) { + return status.state.waiting + } + } + } + } + } + + async getPodCondition(namespace: string, selector: string, conditionType: string): Promise { const k8sCoreApi = KubeHelper.KUBE_CONFIG.makeApiClient(CoreV1Api) let res try { - res = await k8sCoreApi.listNamespacedPod(namespace, undefined, undefined, undefined, undefined, labelSelector) + res = await k8sCoreApi.listNamespacedPod(namespace, undefined, undefined, undefined, undefined, selector) } catch (e) { throw this.wrapK8sClientError(e) } - if (!res || !res.body || !res.body.items || res.body.items.length !== 1) { - return + if (!res || !res.body || !res.body.items) { + return [] } - if (!res.body.items[0].status || !res.body.items[0].status.phase) { - return + const conditions: V1PodCondition[] = [] + for (const pod of res.body.items) { + if (pod.status && pod.status.conditions) { + for (const condition of pod.status.conditions) { + if (condition.type === conditionType) { + conditions.push(condition) + } + } + } } - return res.body.items[0].status.phase + return conditions } async getPodReadyConditionStatus(selector: string, namespace = ''): Promise { @@ -714,37 +741,6 @@ export class KubeHelper { } } - async waitForPodPhase(selector: string, targetPhase: string, namespace = '', intervalMs = 500, timeoutMs = this.podWaitTimeout) { - const iterations = timeoutMs / intervalMs - for (let index = 0; index < iterations; index++) { - let currentPhase = await this.getPodPhase(selector, namespace) - if (targetPhase === currentPhase) { - return - } - await cli.wait(intervalMs) - } - throw new Error(`ERR_TIMEOUT: Timeout set to pod wait timeout ${this.podWaitTimeout}`) - } - - async waitForPodPending(selector: string, namespace = '', intervalMs = 500, timeoutMs = this.podWaitTimeout) { - const iterations = timeoutMs / intervalMs - let podExist - let currentPhase - for (let index = 0; index < iterations; index++) { - podExist = await this.podsExistBySelector(selector, namespace) - if (podExist) { - currentPhase = await this.getPodPhase(selector, namespace) - if (currentPhase === 'Pending' || currentPhase === 'Running') { - return - } else { - throw new Error(`ERR_UNEXPECTED_PHASE: ${currentPhase} (Pending expected) `) - } - } - await cli.wait(intervalMs) - } - throw new Error(`ERR_TIMEOUT: Timeout set to pod wait timeout ${this.podWaitTimeout}. podExist: ${podExist}, currentPhase: ${currentPhase}`) - } - async waitForPodReady(selector: string, namespace = '', intervalMs = 500, timeoutMs = this.podReadyTimeout) { const iterations = timeoutMs / intervalMs for (let index = 0; index < iterations; index++) { diff --git a/src/tasks/che.ts b/src/tasks/che.ts index 87b9f494d..abf7ada7f 100644 --- a/src/tasks/che.ts +++ b/src/tasks/che.ts @@ -17,6 +17,7 @@ import { OpenShiftHelper } from '../api/openshift' import { VersionHelper } from '../api/version' import { DOC_LINK, DOC_LINK_OBTAIN_ACCESS_TOKEN, DOC_LINK_OBTAIN_ACCESS_TOKEN_OAUTH, DOC_LINK_RELEASE_NOTES } from '../constants' +import { OperatorTasks } from './installers/operator' import { KubeTasks } from './kube' /** @@ -46,8 +47,6 @@ export class CheTasks { pluginRegistryDeploymentName = 'plugin-registry' pluginRegistrySelector = 'app=che,component=plugin-registry' - cheOperatorSelector = 'app=che-operator' - cheConsoleLinkName = 'che' constructor(flags: any) { @@ -68,34 +67,34 @@ export class CheTasks { * * @see che.checkIfCheIsInstalledTasks */ - waitDeployedChe(flags: any, command: Command): ReadonlyArray { + waitDeployedChe(flags: any, _command: Command): ReadonlyArray { return [ { title: 'PostgreSQL pod bootstrap', skip: () => !flags.multiuser, enabled: ctx => ctx.isPostgresDeployed && !ctx.isPostgresReady, - task: () => this.kubeTasks.podStartTasks(command, this.postgresSelector, this.cheNamespace) + task: () => this.kubeTasks.podStartTasks(this.postgresSelector, this.cheNamespace) }, { title: 'Keycloak pod bootstrap', skip: () => !flags.multiuser, enabled: ctx => ctx.isKeycloakDeployed && !ctx.isKeycloakReady, - task: () => this.kubeTasks.podStartTasks(command, this.keycloakSelector, this.cheNamespace) + task: () => this.kubeTasks.podStartTasks(this.keycloakSelector, this.cheNamespace) }, { title: 'Devfile registry pod bootstrap', enabled: ctx => ctx.isDevfileRegistryDeployed && !ctx.isDevfileRegistryReady, - task: () => this.kubeTasks.podStartTasks(command, this.devfileRegistrySelector, this.cheNamespace) + task: () => this.kubeTasks.podStartTasks(this.devfileRegistrySelector, this.cheNamespace) }, { title: 'Plugin registry pod bootstrap', enabled: ctx => ctx.isPluginRegistryDeployed && !ctx.isPluginRegistryReady, - task: () => this.kubeTasks.podStartTasks(command, this.pluginRegistrySelector, this.cheNamespace) + task: () => this.kubeTasks.podStartTasks(this.pluginRegistrySelector, this.cheNamespace) }, { title: 'Eclipse Che pod bootstrap', enabled: ctx => !ctx.isCheReady, - task: () => this.kubeTasks.podStartTasks(command, this.cheSelector, this.cheNamespace) + task: () => this.kubeTasks.podStartTasks(this.cheSelector, this.cheNamespace) }, ...this.checkEclipseCheStatus() ] @@ -523,7 +522,7 @@ export class CheTasks { title: `${follow ? 'Start following' : 'Read'} Operator logs`, skip: () => flags.installer !== 'operator' && flags.installer !== 'olm', task: async (ctx: any, task: any) => { - await this.che.readPodLog(flags.chenamespace, this.cheOperatorSelector, ctx.directory, follow) + await this.che.readPodLog(flags.chenamespace, OperatorTasks.CHE_OPERATOR_SELECTOR, ctx.directory, follow) task.title = `${task.title}...done` } }, diff --git a/src/tasks/installers/operator.ts b/src/tasks/installers/operator.ts index dd675ab2b..69f9c8fd6 100644 --- a/src/tasks/installers/operator.ts +++ b/src/tasks/installers/operator.ts @@ -17,10 +17,13 @@ import * as Listr from 'listr' import { KubeHelper } from '../../api/kube' import { CHE_CLUSTER_CRD } from '../../constants' import { isStableVersion } from '../../util' +import { KubeTasks } from '../kube' import { copyOperatorResources, createEclipseCheCluster, createNamespaceTask, updateEclipseCheCluster } from './common-tasks' export class OperatorTasks { + public static CHE_OPERATOR_SELECTOR = 'app=che-operator' + operatorServiceAccount = 'che-operator' operatorRole = 'che-operator' operatorClusterRole = 'che-operator' @@ -36,6 +39,7 @@ export class OperatorTasks { const clusterRoleName = `${flags.chenamespace}-${this.operatorClusterRole}` const clusterRoleBindingName = `${flags.chenamespace}-${this.operatorClusterRoleBinding}` const kube = new KubeHelper(flags) + const kubeTasks = new KubeTasks(flags) if (isStableVersion(flags)) { command.warn('Consider using the more reliable \'OLM\' installer when deploying a stable release of Eclipse Che (--installer=olm).') } @@ -149,6 +153,10 @@ export class OperatorTasks { } } }, + { + title: 'Operator pod bootstrap', + task: () => kubeTasks.podStartTasks(OperatorTasks.CHE_OPERATOR_SELECTOR, flags.chenamespace) + }, createEclipseCheCluster(flags, kube) ], { renderer: flags['listr-renderer'] as any }) } diff --git a/src/tasks/kube.ts b/src/tasks/kube.ts index 93ea4378e..b3b6a546e 100644 --- a/src/tasks/kube.ts +++ b/src/tasks/kube.ts @@ -7,52 +7,151 @@ * * SPDX-License-Identifier: EPL-2.0 **********************************************************************/ -import { Command } from '@oclif/command' +import { V1ContainerStateWaiting, V1PodCondition } from '@kubernetes/client-node' +import { cli } from 'cli-ux' import * as Listr from 'listr' import { KubeHelper } from '../api/kube' export class KubeTasks { - kube: KubeHelper - debug = require('debug') + kubeHelper: KubeHelper constructor(flags?: any) { - this.kube = new KubeHelper(flags) + this.kubeHelper = new KubeHelper(flags) } - podStartTasks(_command: Command, selector: string, namespace = ''): Listr { + podStartTasks(selector: string, namespace: string): Listr { return new Listr([ { - title: 'scheduling', + title: 'Scheduling', task: async (_ctx: any, task: any) => { - let phase - const title = task.title - try { - phase = await this.kube.getPodPhase(selector, namespace) - } catch (err) { - // not able to grab current phase - this.debug(err) - } - // wait only if not yet running - if (phase !== 'Running') { - await this.kube.waitForPodPending(selector, namespace) + // any way use 5 minutes (600*500=5*60*1000 ms) timeout + for (let i = 1; i <= 600; i++) { + const failedCondition = await this.getFailedPodCondition(namespace, selector, 'PodScheduled') + if (failedCondition) { + task.title = `${task.title}...failed` + throw new Error(`Failed to schedule a pod, reason: ${failedCondition.reason}, message: ${failedCondition.message}`) + } + + const allScheduled = await this.isPodConditionStatusPassed(namespace, selector, 'PodScheduled') + if (allScheduled) { + task.title = `${task.title}...done.` + return + } + + await cli.wait(500) } - task.title = `${title}...done.` + + throw new Error(`Failed to schedule a pod: ${await this.getTimeOutErrorMessage(namespace, selector)}`) } }, { - title: 'downloading images', + title: 'Downloading images', task: async (_ctx: any, task: any) => { - await this.kube.waitForPodPhase(selector, 'Running', namespace) - task.title = `${task.title}...done.` + // any way use 5 minutes (600*500=5*60*1000 ms) timeout + for (let i = 1; i <= 600; i++) { + const failedState = await this.getFailedWaitingState(namespace, selector, 'Pending') + if (failedState) { + task.title = `${task.title}...failed` + throw new Error(`Failed to download image, reason: ${failedState.reason}, message: ${failedState.message}`) + } + + const pods = await this.kubeHelper.getPodListByLabel(namespace, selector) + const allRunning = !pods.some(value => !value.status || value.status.phase !== 'Running') + if (pods.length && allRunning) { + task.title = `${task.title}...done.` + return + } + + await cli.wait(500) + } + + throw new Error(`Failed to download image: ${await this.getTimeOutErrorMessage(namespace, selector)}`) } }, { - title: 'starting', + title: 'Starting', task: async (_ctx: any, task: any) => { - await this.kube.waitForPodReady(selector, namespace) - task.title = `${task.title}...done.` + // any way use 5 minutes (600*500=5*60*1000 ms) timeout + for (let i = 1; i <= 600; i++) { + const failedState = await this.getFailedWaitingState(namespace, selector, 'Running') + if (failedState) { + task.title = `${task.title}...failed` + throw new Error(`Failed to start a pod, reason: ${failedState.reason}, message: ${failedState.message}`) + } + + const allStarted = await this.isPodConditionStatusPassed(namespace, selector, 'Ready') + if (allStarted) { + task.title = `${task.title}...done.` + return + } + + await cli.wait(500) + } + + throw new Error(`Failed to download image: ${await this.getTimeOutErrorMessage(namespace, selector)}`) } } ]) } + + private async getFailedPodCondition(namespace: string, selector: string, conditionType: string): Promise { + const status = await this.kubeHelper.getPodCondition(namespace, selector, conditionType) + const failedPod = status.find(s => s.status === 'False' && s.message && s.reason) + if (failedPod) { + // wait 10 sec, check again and only then fail + await cli.wait(10000) + + const condition = await this.kubeHelper.getPodCondition(namespace, selector, conditionType) + return condition.find(s => s.status === 'False' && s.message && s.reason) + } + } + + private async isPodConditionStatusPassed(namespace: string, selector: string, conditionType: string): Promise { + const status = await this.kubeHelper.getPodCondition(namespace, selector, conditionType) + const allScheduled = !status.some(s => s.status !== 'True') + return !!status.length && allScheduled + } + + /** + * Checks if there is any reason for a given pod state and returns message if so. + */ + private async getFailedWaitingState(namespace: string, selector: string, state: string): Promise { + const waitingState = await this.kubeHelper.getPodWaitingState(namespace, selector, state) + if (waitingState && waitingState.reason && waitingState.message) { + // wait 10 sec, check again and only then fail + await cli.wait(10000) + + const waitingState = await this.kubeHelper.getPodWaitingState(namespace, selector, state) + if (waitingState && waitingState.reason && waitingState.message) { + return waitingState + } + } + } + + /** + * Returns extended timeout error message explaining a failure. + */ + private async getTimeOutErrorMessage(namespace: string, selector: string): Promise { + const pods = await this.kubeHelper.getPodListByLabel(namespace, selector) + if (!pods.length) { + return 'Timeout: there no pods.' + } + + let errorMessage = 'Timeout:' + for (const pod of pods) { + errorMessage += `\nPod: ${pod.metadata!.name}` + if (pod.status) { + if (pod.status.containerStatuses) { + errorMessage += `\n\t\tstatus: ${JSON.stringify(pod.status.containerStatuses, undefined, ' ')}` + } + if (pod.status.conditions) { + errorMessage += `\n\t\tconditions: ${JSON.stringify(pod.status.conditions, undefined, ' ')}` + } + } else { + errorMessage += ', status not found.' + } + } + + return errorMessage + } } diff --git a/test/api/kube.test.ts b/test/api/kube.test.ts index 89dec5c79..776a9a9a0 100644 --- a/test/api/kube.test.ts +++ b/test/api/kube.test.ts @@ -8,6 +8,7 @@ * SPDX-License-Identifier: EPL-2.0 **********************************************************************/ import { expect, fancy } from 'fancy-test' + import { KubeHelper } from '../../src/api/kube' const namespace = 'che' @@ -33,58 +34,6 @@ const kube = new KubeHelper({}) KubeHelper.KUBE_CONFIG.loadFromString(kubeContext) describe('Kube API helper', () => { - fancy - .nock(kubeClusterURL, api => api - .get(`/api/v1/namespaces/${namespace}/pods?labelSelector=app%3Dche`) - .replyWithFile(200, __dirname + '/replies/get-pod-by-selector-running.json', { 'Content-Type': 'application/json' })) - .it('retrieves the phase of a pod', async () => { - const selector = 'app=che' - const res = await kube.getPodPhase(selector, namespace) - expect(res).to.equal('Running') - }) - fancy - .nock(kubeClusterURL, api => api - .get(`/api/v1/namespaces/${namespace}/pods?labelSelector=app%3Dche`) - .replyWithFile(200, __dirname + '/replies/get-pod-by-selector-pending.json', { 'Content-Type': 'application/json' }) - .get(`/api/v1/namespaces/${namespace}/pods?labelSelector=app%3Dche`) - .replyWithFile(200, __dirname + '/replies/get-pod-by-selector-pending.json', { 'Content-Type': 'application/json' }) - .get(`/api/v1/namespaces/${namespace}/pods?labelSelector=app%3Dche`) - .replyWithFile(200, __dirname + '/replies/get-pod-by-selector-running.json', { 'Content-Type': 'application/json' })) - .it('waits until the pod is in the "Running" phase', async () => { - const selector = 'app=che' - const phase = 'Running' - const interval = 10 - const timeout = 1000 - await kube.waitForPodPhase(selector, phase, namespace, interval, timeout) - }) - fancy - .nock(kubeClusterURL, api => api - .get(`/api/v1/namespaces/${namespace}/pods?labelSelector=app%3Dche`) - .times(4) - .replyWithFile(200, __dirname + '/replies/get-pod-by-selector-pending.json', { 'Content-Type': 'application/json' })) - .do(async () => { - const selector = 'app=che' - const phase = 'Running' - const interval = 10 - const timeout = 40 - await kube.waitForPodPhase(selector, phase, namespace, interval, timeout) - }) - .catch(err => expect(err.message).to.match(/ERR/)) - .it('fails if timeout is reached waiting for a pod "Running" phase') - fancy - .nock(kubeClusterURL, api => api - .get(`/api/v1/namespaces/${namespace}/pods?labelSelector=app%3Dche`) - .times(2) - .replyWithFile(200, __dirname + '/replies/get-pod-by-selector-not-existing.json', { 'Content-Type': 'application/json' }) - .get(`/api/v1/namespaces/${namespace}/pods?labelSelector=app%3Dche`) - .times(2) - .replyWithFile(200, __dirname + '/replies/get-pod-by-selector-pending.json', { 'Content-Type': 'application/json' })) - .it('waits until the pod is in the "Pending" phase', async () => { - const selector = 'app=che' - const interval = 10 - const timeout = 1000 - await kube.waitForPodPending(selector, namespace, interval, timeout) - }) fancy .nock(kubeClusterURL, api => api .get(`/api/v1/namespaces/${namespace}/pods?labelSelector=app%3Dche`)