Skip to content

Commit

Permalink
fix(build-sync): randomly choose pod for rsync
Browse files Browse the repository at this point in the history
Turns out it was not random enough in our cluster - it always
picked the same pod for us. This ensures that we do not pick
the same pod over and over again if k8s api ends up returning
the pods in the same order
  • Loading branch information
swist authored and edvald committed Jun 21, 2020
1 parent dbc601b commit be81679
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 37 deletions.
Expand Up @@ -31,7 +31,7 @@ import { waitForResources } from "../status/status"
import { execInWorkload } from "../container/exec"
import { dedent, deline } from "../../../util/string"
import { execInPod, getDeploymentPodName, BuilderExecParams, buildSyncDeploymentName } from "../container/build"
import { getPods } from "../util"
import { getRunningPodInDeployment } from "../util"
import { getSystemNamespace } from "../namespace"

const workspaceSyncDirTtl = 0.5 * 86400 // 2 days
Expand Down Expand Up @@ -388,14 +388,22 @@ async function cleanupBuildSyncVolume(provider: KubernetesProvider, log: LogEntr
status: "active",
})

const podName = await getBuildSyncPodName(provider, log)
const pod = await getRunningPodInDeployment(buildSyncDeploymentName, provider, log)
const systemNamespace = await getSystemNamespace(provider, log)
if (!pod) {
throw new PluginError(`Could not find running image builder`, {
builderDeploymentName: buildSyncDeploymentName,
systemNamespace,
})
}

const statArgs = ["sh", "-c", 'stat /data/* -c "%n %X"']
const stat = await execInBuildSync({
provider,
log,
args: statArgs,
timeout: 30,
podName,
podName: pod.metadata.name,
containerName: dockerDaemonContainerName,
})

Expand All @@ -422,33 +430,13 @@ async function cleanupBuildSyncVolume(provider: KubernetesProvider, log: LogEntr
log,
args: deleteArgs,
timeout: 300,
podName,
podName: pod.metadata.name,
containerName: dockerDaemonContainerName,
})

log.setSuccess()
}

// Returns the name for one of the build-sync pods in the cluster
// (doesn't matter which one, they all use the same volume)
async function getBuildSyncPodName(provider: KubernetesProvider, log: LogEntry) {
const api = await KubeApi.factory(log, provider)
const systemNamespace = await getSystemNamespace(provider, log)

const builderStatusRes = await api.apps.readNamespacedDeployment(buildSyncDeploymentName, systemNamespace)
const builderPods = await getPods(api, systemNamespace, builderStatusRes.spec.selector.matchLabels)
const pod = builderPods[0]

if (!pod) {
throw new PluginError(`Could not find running image builder`, {
builderDeploymentName: buildSyncDeploymentName,
systemNamespace,
})
}

return builderPods[0].metadata.name
}

async function execInBuildSync({ provider, log, args, timeout, podName }: BuilderExecParams) {
const execCmd = ["exec", "-i", podName, "--", ...args]
const systemNamespace = await getSystemNamespace(provider, log)
Expand Down
25 changes: 12 additions & 13 deletions garden-service/src/plugins/kubernetes/container/build.ts
Expand Up @@ -222,14 +222,25 @@ const remoteBuild: BuildHandler = async (params) => {
return {}
}

const buildSyncPod = await getRunningPodInDeployment(buildSyncDeploymentName, provider, log)

// TODO: remove this after a few releases (from 0.10.15), since this is only necessary for environments initialized
// with 0.10.14 or earlier.
if (!buildSyncPod) {
throw new PluginError(`Could not find running build sync Pod`, {
deploymentName: buildSyncDeploymentName,
systemNamespace,
})
}

// Sync the build context to the remote sync service
// -> Get a tunnel to the service
log.setState("Syncing sources to cluster...")
const syncFwd = await getPortForward({
ctx,
log,
namespace: systemNamespace,
targetResource: `Deployment/${buildSyncDeploymentName}`,
targetResource: `Pod/${buildSyncPod.metadata.name}`,
port: RSYNC_PORT,
})

Expand Down Expand Up @@ -257,18 +268,6 @@ const remoteBuild: BuildHandler = async (params) => {
]

log.debug(`Syncing from ${src} to ${destination}`)

// TODO: remove this after a few releases (from 0.10.15), since this is only necessary for environments initialized
// with 0.10.14 or earlier.
const buildSyncPod = await getRunningPodInDeployment(buildSyncDeploymentName, provider, log)

if (!buildSyncPod) {
throw new PluginError(`Could not find running build sync Pod`, {
deploymentName: buildSyncDeploymentName,
systemNamespace,
})
}

// We retry a couple of times, because we may get intermittent connection issues or concurrency issues
await pRetry(() => exec("rsync", syncArgs), {
retries: 3,
Expand Down

0 comments on commit be81679

Please sign in to comment.