Skip to content

Commit

Permalink
feat: Detects issues with downloading images and starting containers (#…
Browse files Browse the repository at this point in the history
…908)

* Detect issues while deploying Eclipse Che

Signed-off-by: Anatolii Bazko <abazko@redhat.com>

* Revert .gitignore

Signed-off-by: Anatolii Bazko <abazko@redhat.com>

* Fixed remarks

Signed-off-by: Anatolii Bazko <abazko@redhat.com>

* Fix importing

Signed-off-by: Anatolii Bazko <abazko@redhat.com>
  • Loading branch information
tolusha committed Oct 21, 2020
1 parent 6ee93eb commit 7c352e2
Show file tree
Hide file tree
Showing 6 changed files with 177 additions and 126 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ If you're using linux or macOS, here is how to install chectl by using one singl
```
$ bash <(curl -sL https://www.eclipse.org/che/chectl/)
```

- For `next` channel:
```
$ bash <(curl -sL https://www.eclipse.org/che/chectl/) --channel=next
Expand Down
76 changes: 36 additions & 40 deletions src/api/kube.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
* SPDX-License-Identifier: EPL-2.0
**********************************************************************/

import { ApiextensionsV1beta1Api, ApisApi, AppsV1Api, AuthorizationV1Api, BatchV1Api, CoreV1Api, CustomObjectsApi, ExtensionsV1beta1Api, ExtensionsV1beta1IngressList, KubeConfig, Log, PortForward, RbacAuthorizationV1Api, V1beta1CustomResourceDefinition, V1ClusterRole, V1ClusterRoleBinding, V1ConfigMap, V1ConfigMapEnvSource, V1Container, V1Deployment, V1DeploymentList, V1DeploymentSpec, V1EnvFromSource, V1Job, V1JobSpec, V1LabelSelector, V1NamespaceList, V1ObjectMeta, V1PersistentVolumeClaimList, V1Pod, V1PodList, V1PodSpec, V1PodTemplateSpec, V1PolicyRule, V1Role, V1RoleBinding, V1RoleRef, V1Secret, V1SelfSubjectAccessReview, V1SelfSubjectAccessReviewSpec, V1Service, V1ServiceAccount, V1ServiceList, V1Subject, Watch } from '@kubernetes/client-node'
import { ApiextensionsV1beta1Api, ApisApi, AppsV1Api, AuthorizationV1Api, BatchV1Api, CoreV1Api, CustomObjectsApi, ExtensionsV1beta1Api, ExtensionsV1beta1IngressList, KubeConfig, Log, PortForward, RbacAuthorizationV1Api, V1beta1CustomResourceDefinition, V1ClusterRole, V1ClusterRoleBinding, V1ConfigMap, V1ConfigMapEnvSource, V1Container, V1ContainerStateWaiting, V1Deployment, V1DeploymentList, V1DeploymentSpec, V1EnvFromSource, V1Job, V1JobSpec, V1LabelSelector, V1NamespaceList, V1ObjectMeta, V1PersistentVolumeClaimList, V1Pod, V1PodCondition, V1PodList, V1PodSpec, V1PodTemplateSpec, V1PolicyRule, V1Role, V1RoleBinding, V1RoleRef, V1Secret, V1SelfSubjectAccessReview, V1SelfSubjectAccessReviewSpec, V1Service, V1ServiceAccount, V1ServiceList, V1Subject, Watch } from '@kubernetes/client-node'
import { Cluster, Context } from '@kubernetes/client-node/dist/config_types'
import axios, { AxiosRequestConfig } from 'axios'
import { cli } from 'cli-ux'
Expand Down Expand Up @@ -348,7 +348,7 @@ export class KubeHelper {
}
}

async getPodListByLabel(namespace= '', labelSelector: string): Promise<V1Pod[]> {
async getPodListByLabel(namespace: string, labelSelector: string): Promise<V1Pod[]> {
const k8sCoreApi = KubeHelper.KUBE_CONFIG.makeApiClient(CoreV1Api)
try {
const { body: podList } = await k8sCoreApi.listNamespacedPod(namespace, undefined, undefined, undefined, undefined, labelSelector)
Expand Down Expand Up @@ -659,24 +659,51 @@ export class KubeHelper {
return (res.body.items.length > 0)
}

async getPodPhase(labelSelector: string, namespace = ''): Promise<string | undefined> {
/**
* Returns pod waiting state.
*/
async getPodWaitingState(namespace: string, selector: string, desiredPhase: string): Promise<V1ContainerStateWaiting | undefined> {
const pods = await this.getPodListByLabel(namespace, selector)
if (!pods.length) {
return
}

for (const pod of pods) {
if (pod.status && pod.status.phase === desiredPhase && pod.status.containerStatuses) {
for (const status of pod.status.containerStatuses) {
if (status.state && status.state.waiting && status.state.waiting.message && status.state.waiting.reason) {
return status.state.waiting
}
}
}
}
}

async getPodCondition(namespace: string, selector: string, conditionType: string): Promise<V1PodCondition[]> {
const k8sCoreApi = KubeHelper.KUBE_CONFIG.makeApiClient(CoreV1Api)
let res
try {
res = await k8sCoreApi.listNamespacedPod(namespace, undefined, undefined, undefined, undefined, labelSelector)
res = await k8sCoreApi.listNamespacedPod(namespace, undefined, undefined, undefined, undefined, selector)
} catch (e) {
throw this.wrapK8sClientError(e)
}

if (!res || !res.body || !res.body.items || res.body.items.length !== 1) {
return
if (!res || !res.body || !res.body.items) {
return []
}

if (!res.body.items[0].status || !res.body.items[0].status.phase) {
return
const conditions: V1PodCondition[] = []
for (const pod of res.body.items) {
if (pod.status && pod.status.conditions) {
for (const condition of pod.status.conditions) {
if (condition.type === conditionType) {
conditions.push(condition)
}
}
}
}

return res.body.items[0].status.phase
return conditions
}

async getPodReadyConditionStatus(selector: string, namespace = ''): Promise<string | undefined> {
Expand Down Expand Up @@ -714,37 +741,6 @@ export class KubeHelper {
}
}

async waitForPodPhase(selector: string, targetPhase: string, namespace = '', intervalMs = 500, timeoutMs = this.podWaitTimeout) {
const iterations = timeoutMs / intervalMs
for (let index = 0; index < iterations; index++) {
let currentPhase = await this.getPodPhase(selector, namespace)
if (targetPhase === currentPhase) {
return
}
await cli.wait(intervalMs)
}
throw new Error(`ERR_TIMEOUT: Timeout set to pod wait timeout ${this.podWaitTimeout}`)
}

async waitForPodPending(selector: string, namespace = '', intervalMs = 500, timeoutMs = this.podWaitTimeout) {
const iterations = timeoutMs / intervalMs
let podExist
let currentPhase
for (let index = 0; index < iterations; index++) {
podExist = await this.podsExistBySelector(selector, namespace)
if (podExist) {
currentPhase = await this.getPodPhase(selector, namespace)
if (currentPhase === 'Pending' || currentPhase === 'Running') {
return
} else {
throw new Error(`ERR_UNEXPECTED_PHASE: ${currentPhase} (Pending expected) `)
}
}
await cli.wait(intervalMs)
}
throw new Error(`ERR_TIMEOUT: Timeout set to pod wait timeout ${this.podWaitTimeout}. podExist: ${podExist}, currentPhase: ${currentPhase}`)
}

async waitForPodReady(selector: string, namespace = '', intervalMs = 500, timeoutMs = this.podReadyTimeout) {
const iterations = timeoutMs / intervalMs
for (let index = 0; index < iterations; index++) {
Expand Down
17 changes: 8 additions & 9 deletions src/tasks/che.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import { OpenShiftHelper } from '../api/openshift'
import { VersionHelper } from '../api/version'
import { DOC_LINK, DOC_LINK_OBTAIN_ACCESS_TOKEN, DOC_LINK_OBTAIN_ACCESS_TOKEN_OAUTH, DOC_LINK_RELEASE_NOTES } from '../constants'

import { OperatorTasks } from './installers/operator'
import { KubeTasks } from './kube'

/**
Expand Down Expand Up @@ -46,8 +47,6 @@ export class CheTasks {
pluginRegistryDeploymentName = 'plugin-registry'
pluginRegistrySelector = 'app=che,component=plugin-registry'

cheOperatorSelector = 'app=che-operator'

cheConsoleLinkName = 'che'

constructor(flags: any) {
Expand All @@ -68,34 +67,34 @@ export class CheTasks {
*
* @see che.checkIfCheIsInstalledTasks
*/
waitDeployedChe(flags: any, command: Command): ReadonlyArray<Listr.ListrTask> {
waitDeployedChe(flags: any, _command: Command): ReadonlyArray<Listr.ListrTask> {
return [
{
title: 'PostgreSQL pod bootstrap',
skip: () => !flags.multiuser,
enabled: ctx => ctx.isPostgresDeployed && !ctx.isPostgresReady,
task: () => this.kubeTasks.podStartTasks(command, this.postgresSelector, this.cheNamespace)
task: () => this.kubeTasks.podStartTasks(this.postgresSelector, this.cheNamespace)
},
{
title: 'Keycloak pod bootstrap',
skip: () => !flags.multiuser,
enabled: ctx => ctx.isKeycloakDeployed && !ctx.isKeycloakReady,
task: () => this.kubeTasks.podStartTasks(command, this.keycloakSelector, this.cheNamespace)
task: () => this.kubeTasks.podStartTasks(this.keycloakSelector, this.cheNamespace)
},
{
title: 'Devfile registry pod bootstrap',
enabled: ctx => ctx.isDevfileRegistryDeployed && !ctx.isDevfileRegistryReady,
task: () => this.kubeTasks.podStartTasks(command, this.devfileRegistrySelector, this.cheNamespace)
task: () => this.kubeTasks.podStartTasks(this.devfileRegistrySelector, this.cheNamespace)
},
{
title: 'Plugin registry pod bootstrap',
enabled: ctx => ctx.isPluginRegistryDeployed && !ctx.isPluginRegistryReady,
task: () => this.kubeTasks.podStartTasks(command, this.pluginRegistrySelector, this.cheNamespace)
task: () => this.kubeTasks.podStartTasks(this.pluginRegistrySelector, this.cheNamespace)
},
{
title: 'Eclipse Che pod bootstrap',
enabled: ctx => !ctx.isCheReady,
task: () => this.kubeTasks.podStartTasks(command, this.cheSelector, this.cheNamespace)
task: () => this.kubeTasks.podStartTasks(this.cheSelector, this.cheNamespace)
},
...this.checkEclipseCheStatus()
]
Expand Down Expand Up @@ -523,7 +522,7 @@ export class CheTasks {
title: `${follow ? 'Start following' : 'Read'} Operator logs`,
skip: () => flags.installer !== 'operator' && flags.installer !== 'olm',
task: async (ctx: any, task: any) => {
await this.che.readPodLog(flags.chenamespace, this.cheOperatorSelector, ctx.directory, follow)
await this.che.readPodLog(flags.chenamespace, OperatorTasks.CHE_OPERATOR_SELECTOR, ctx.directory, follow)
task.title = `${task.title}...done`
}
},
Expand Down
8 changes: 8 additions & 0 deletions src/tasks/installers/operator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,13 @@ import * as Listr from 'listr'
import { KubeHelper } from '../../api/kube'
import { CHE_CLUSTER_CRD } from '../../constants'
import { isStableVersion } from '../../util'
import { KubeTasks } from '../kube'

import { copyOperatorResources, createEclipseCheCluster, createNamespaceTask, updateEclipseCheCluster } from './common-tasks'

export class OperatorTasks {
public static CHE_OPERATOR_SELECTOR = 'app=che-operator'

operatorServiceAccount = 'che-operator'
operatorRole = 'che-operator'
operatorClusterRole = 'che-operator'
Expand All @@ -36,6 +39,7 @@ export class OperatorTasks {
const clusterRoleName = `${flags.chenamespace}-${this.operatorClusterRole}`
const clusterRoleBindingName = `${flags.chenamespace}-${this.operatorClusterRoleBinding}`
const kube = new KubeHelper(flags)
const kubeTasks = new KubeTasks(flags)
if (isStableVersion(flags)) {
command.warn('Consider using the more reliable \'OLM\' installer when deploying a stable release of Eclipse Che (--installer=olm).')
}
Expand Down Expand Up @@ -149,6 +153,10 @@ export class OperatorTasks {
}
}
},
{
title: 'Operator pod bootstrap',
task: () => kubeTasks.podStartTasks(OperatorTasks.CHE_OPERATOR_SELECTOR, flags.chenamespace)
},
createEclipseCheCluster(flags, kube)
], { renderer: flags['listr-renderer'] as any })
}
Expand Down
147 changes: 123 additions & 24 deletions src/tasks/kube.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,52 +7,151 @@
*
* SPDX-License-Identifier: EPL-2.0
**********************************************************************/
import { Command } from '@oclif/command'
import { V1ContainerStateWaiting, V1PodCondition } from '@kubernetes/client-node'
import { cli } from 'cli-ux'
import * as Listr from 'listr'

import { KubeHelper } from '../api/kube'

export class KubeTasks {
kube: KubeHelper
debug = require('debug')
kubeHelper: KubeHelper
constructor(flags?: any) {
this.kube = new KubeHelper(flags)
this.kubeHelper = new KubeHelper(flags)
}

podStartTasks(_command: Command, selector: string, namespace = ''): Listr {
podStartTasks(selector: string, namespace: string): Listr {
return new Listr([
{
title: 'scheduling',
title: 'Scheduling',
task: async (_ctx: any, task: any) => {
let phase
const title = task.title
try {
phase = await this.kube.getPodPhase(selector, namespace)
} catch (err) {
// not able to grab current phase
this.debug(err)
}
// wait only if not yet running
if (phase !== 'Running') {
await this.kube.waitForPodPending(selector, namespace)
// any way use 5 minutes (600*500=5*60*1000 ms) timeout
for (let i = 1; i <= 600; i++) {
const failedCondition = await this.getFailedPodCondition(namespace, selector, 'PodScheduled')
if (failedCondition) {
task.title = `${task.title}...failed`
throw new Error(`Failed to schedule a pod, reason: ${failedCondition.reason}, message: ${failedCondition.message}`)
}

const allScheduled = await this.isPodConditionStatusPassed(namespace, selector, 'PodScheduled')
if (allScheduled) {
task.title = `${task.title}...done.`
return
}

await cli.wait(500)
}
task.title = `${title}...done.`

throw new Error(`Failed to schedule a pod: ${await this.getTimeOutErrorMessage(namespace, selector)}`)
}
},
{
title: 'downloading images',
title: 'Downloading images',
task: async (_ctx: any, task: any) => {
await this.kube.waitForPodPhase(selector, 'Running', namespace)
task.title = `${task.title}...done.`
// any way use 5 minutes (600*500=5*60*1000 ms) timeout
for (let i = 1; i <= 600; i++) {
const failedState = await this.getFailedWaitingState(namespace, selector, 'Pending')
if (failedState) {
task.title = `${task.title}...failed`
throw new Error(`Failed to download image, reason: ${failedState.reason}, message: ${failedState.message}`)
}

const pods = await this.kubeHelper.getPodListByLabel(namespace, selector)
const allRunning = !pods.some(value => !value.status || value.status.phase !== 'Running')
if (pods.length && allRunning) {
task.title = `${task.title}...done.`
return
}

await cli.wait(500)
}

throw new Error(`Failed to download image: ${await this.getTimeOutErrorMessage(namespace, selector)}`)
}
},
{
title: 'starting',
title: 'Starting',
task: async (_ctx: any, task: any) => {
await this.kube.waitForPodReady(selector, namespace)
task.title = `${task.title}...done.`
// any way use 5 minutes (600*500=5*60*1000 ms) timeout
for (let i = 1; i <= 600; i++) {
const failedState = await this.getFailedWaitingState(namespace, selector, 'Running')
if (failedState) {
task.title = `${task.title}...failed`
throw new Error(`Failed to start a pod, reason: ${failedState.reason}, message: ${failedState.message}`)
}

const allStarted = await this.isPodConditionStatusPassed(namespace, selector, 'Ready')
if (allStarted) {
task.title = `${task.title}...done.`
return
}

await cli.wait(500)
}

throw new Error(`Failed to download image: ${await this.getTimeOutErrorMessage(namespace, selector)}`)
}
}
])
}

private async getFailedPodCondition(namespace: string, selector: string, conditionType: string): Promise<V1PodCondition | undefined> {
const status = await this.kubeHelper.getPodCondition(namespace, selector, conditionType)
const failedPod = status.find(s => s.status === 'False' && s.message && s.reason)
if (failedPod) {
// wait 10 sec, check again and only then fail
await cli.wait(10000)

const condition = await this.kubeHelper.getPodCondition(namespace, selector, conditionType)
return condition.find(s => s.status === 'False' && s.message && s.reason)
}
}

private async isPodConditionStatusPassed(namespace: string, selector: string, conditionType: string): Promise<boolean> {
const status = await this.kubeHelper.getPodCondition(namespace, selector, conditionType)
const allScheduled = !status.some(s => s.status !== 'True')
return !!status.length && allScheduled
}

/**
* Checks if there is any reason for a given pod state and returns message if so.
*/
private async getFailedWaitingState(namespace: string, selector: string, state: string): Promise<V1ContainerStateWaiting | undefined> {
const waitingState = await this.kubeHelper.getPodWaitingState(namespace, selector, state)
if (waitingState && waitingState.reason && waitingState.message) {
// wait 10 sec, check again and only then fail
await cli.wait(10000)

const waitingState = await this.kubeHelper.getPodWaitingState(namespace, selector, state)
if (waitingState && waitingState.reason && waitingState.message) {
return waitingState
}
}
}

/**
* Returns extended timeout error message explaining a failure.
*/
private async getTimeOutErrorMessage(namespace: string, selector: string): Promise<string> {
const pods = await this.kubeHelper.getPodListByLabel(namespace, selector)
if (!pods.length) {
return 'Timeout: there no pods.'
}

let errorMessage = 'Timeout:'
for (const pod of pods) {
errorMessage += `\nPod: ${pod.metadata!.name}`
if (pod.status) {
if (pod.status.containerStatuses) {
errorMessage += `\n\t\tstatus: ${JSON.stringify(pod.status.containerStatuses, undefined, ' ')}`
}
if (pod.status.conditions) {
errorMessage += `\n\t\tconditions: ${JSON.stringify(pod.status.conditions, undefined, ' ')}`
}
} else {
errorMessage += ', status not found.'
}
}

return errorMessage
}
}
Loading

0 comments on commit 7c352e2

Please sign in to comment.