Skip to content

Commit

Permalink
rh-che eclipse-che#557: Adding property and handler for stopping k8s …
Browse files Browse the repository at this point in the history
…/ openshift runtime if unrecoverable event occurs during workspace startup

Signed-off-by: Ilya Buziuk <ibuziuk@redhat.com>
  • Loading branch information
ibuziuk authored and hbhargav committed Dec 4, 2018
1 parent d21beaf commit 85d8456
Show file tree
Hide file tree
Showing 7 changed files with 257 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,10 @@ che.infra.kubernetes.workspace_start_timeout_min=8
# Defines the timeout in minutes that limits the period for which Kubernetes Ingress become ready
che.infra.kubernetes.ingress_start_timeout_min=5

# If during workspace startup an unrecoverable event defined in the property occurs,
# terminate workspace immediately instead of waiting until timeout
che.infra.kubernetes.workspace_unrecoverable_events=Failed Mount,Failed Scheduling,Failed to pull image

che.infra.kubernetes.bootstrapper.binary_url=http://${CHE_HOST}:${CHE_PORT}/agent-binaries/linux_amd64/bootstrapper/bootstrapper
che.infra.kubernetes.bootstrapper.installer_timeout_sec=180
che.infra.kubernetes.bootstrapper.server_check_period_sec=3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import static java.lang.String.format;
import static java.util.Collections.emptyMap;

import com.google.common.base.Strings;
import com.google.common.collect.ImmutableMap;
import com.google.inject.assistedinject.Assisted;
import io.fabric8.kubernetes.api.model.Container;
Expand All @@ -22,7 +23,9 @@
import io.fabric8.kubernetes.api.model.extensions.Ingress;
import io.fabric8.kubernetes.client.Watcher.Action;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
Expand Down Expand Up @@ -89,6 +92,7 @@ public class KubernetesInternalRuntime<

private final int workspaceStartTimeout;
private final int ingressStartTimeout;
private final String unrecoverableEvents;
private final ServersCheckerFactory serverCheckerFactory;
private final KubernetesBootstrapperFactory bootstrapperFactory;
private final ProbeScheduler probeScheduler;
Expand All @@ -104,6 +108,7 @@ public class KubernetesInternalRuntime<
public KubernetesInternalRuntime(
@Named("che.infra.kubernetes.workspace_start_timeout_min") int workspaceStartTimeout,
@Named("che.infra.kubernetes.ingress_start_timeout_min") int ingressStartTimeout,
@Named("che.infra.kubernetes.workspace_unrecoverable_events") String unrecoverableEvents,
NoOpURLRewriter urlRewriter,
KubernetesBootstrapperFactory bootstrapperFactory,
ServersCheckerFactory serverCheckerFactory,
Expand All @@ -123,6 +128,7 @@ public KubernetesInternalRuntime(
this.volumesStrategy = volumesStrategy;
this.workspaceStartTimeout = workspaceStartTimeout;
this.ingressStartTimeout = ingressStartTimeout;
this.unrecoverableEvents = unrecoverableEvents;
this.probeScheduler = probeScheduler;
this.probesFactory = probesFactory;
this.namespace = namespace;
Expand Down Expand Up @@ -183,6 +189,8 @@ protected void internalStart(Map<String, String> startOptions) throws Infrastruc
throw new InfrastructureException("Kubernetes environment start was interrupted");
}
wrapAndRethrow(e);
} finally {
namespace.pods().stopWatch();
}
}

Expand Down Expand Up @@ -418,7 +426,10 @@ protected void startMachines() throws InfrastructureException {

// TODO https://github.com/eclipse/che/issues/7653
// namespace.pods().watch(new AbnormalStopHandler());
// namespace.pods().watchContainers(new MachineLogsPublisher());
namespace.pods().watchContainers(new MachineLogsPublisher());
if (!Strings.isNullOrEmpty(unrecoverableEvents)) {
namespace.pods().watchContainers(new UnrecoverableEventHanler());
}

final KubernetesServerResolver serverResolver =
new KubernetesServerResolver(createdServices, readyIngresses);
Expand Down Expand Up @@ -600,8 +611,69 @@ public void accept(ProbeResult probeResult) {
}
}

/** Listens container's events and terminates workspace if unrecoverable event occurs. */
public class UnrecoverableEventHanler implements ContainerEventHandler {
private List<String> events;

public UnrecoverableEventHanler() {
this.events =
Strings.isNullOrEmpty(unrecoverableEvents)
? Collections.EMPTY_LIST
: Arrays.asList(unrecoverableEvents.split(","));
}

@Override
public void handle(ContainerEvent event) {
if (isUnrecoverable(event)) {
String reason = event.getReason();
String message = event.getMessage();
String podName = event.getPodName();
try {
internalStop(emptyMap());
} catch (InfrastructureException e) {
String workspaceId = getContext().getIdentity().getWorkspaceId();
LOG.error(
"Unrecoverable event occured during workspace '{}' startup: {}, {}, {}",
workspaceId,
reason,
message,
podName);
} finally {
eventPublisher.sendRuntimeStoppedEvent(
format("Unrecoverable event occured: '%s', '%s', '%s'", reason, message, podName),
getContext().getIdentity());
}
}
}

/**
* @param ContainerEvent
* @return true if event reason or message matches one of the comma separated values defined in
* 'che.infra.kubernetes.workspace_unrecoverable_events',false otherwise
*/
private boolean isUnrecoverable(ContainerEvent event) {
boolean isUnrecoverable = false;
String reason = event.getReason();
String message = event.getMessage();
// Consider unrecoverable if event reason 'equals' one of the property values e.g. "Failed
// Mount"
if (events.contains(reason)) {
isUnrecoverable = true;
} else {
for (String e : events) {
// Consider unrecoverable if event message 'startsWith' one of the property values e.g.
// "Failed to pull image"
if (message != null && message.startsWith(e)) {
isUnrecoverable = true;
}
}
}
return isUnrecoverable;
}
}

/** Listens container's events and publish them as machine logs. */
class MachineLogsPublisher implements ContainerEventHandler {
public class MachineLogsPublisher implements ContainerEventHandler {

@Override
public void handle(ContainerEvent event) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,7 @@ public void eventReceived(Action action, Event event) {
new ContainerEvent(
podName,
containerName,
event.getReason(),
event.getMessage(),
event.getMetadata().getCreationTimestamp());
containerEventsHandlers.forEach(h -> h.handle(containerEvent));
Expand All @@ -346,7 +347,7 @@ public void onClose(KubernetesClientException ignored) {}
}

/** Stops watching the pods inside Kubernetes namespace. */
void stopWatch() {
public void stopWatch() {
try {
if (podWatch != null) {
podWatch.close();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,15 @@
public class ContainerEvent {
private final String podName;
private final String containerName;
private final String reason;
private final String message;
private final String time;

public ContainerEvent(String podName, String containerName, String message, String time) {
public ContainerEvent(
String podName, String containerName, String reason, String message, String time) {
this.podName = podName;
this.containerName = containerName;
this.reason = reason;
this.message = message;
this.time = time;
}
Expand All @@ -41,6 +44,11 @@ public String getContainerName() {
return containerName;
}

/** Returns the reason of the event. */
public String getReason() {
return reason;
}

/** Returns the contents of the event. */
public String getMessage() {
return message;
Expand All @@ -62,13 +70,14 @@ public boolean equals(Object o) {
ContainerEvent that = (ContainerEvent) o;
return Objects.equals(podName, that.podName)
&& Objects.equals(containerName, that.containerName)
&& Objects.equals(reason, that.reason)
&& Objects.equals(message, that.message)
&& Objects.equals(time, that.time);
}

@Override
public int hashCode() {
return Objects.hash(podName, containerName, message, time);
return Objects.hash(podName, containerName, reason, message, time);
}

@Override
Expand All @@ -80,6 +89,9 @@ public String toString() {
+ ", containerName='"
+ containerName
+ '\''
+ ", reason='"
+ reason
+ '\''
+ ", message='"
+ message
+ '\''
Expand Down
Loading

0 comments on commit 85d8456

Please sign in to comment.