Skip to content

Commit

Permalink
Cloud runner terminate improvements (#653)
Browse files Browse the repository at this point in the history
* wip

* waitCompletedPipelineJobs

* on exit proc

* spotnotifier

* remove unused

* spotnotifier

* notRetry SpotNotifier

* remove status runner

* pipelineJobs interface

* test sha issue

* 40000
  • Loading branch information
DavidGOrtega committed Jul 20, 2021
1 parent af5e78a commit 0c6abf3
Show file tree
Hide file tree
Showing 8 changed files with 112 additions and 75 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:
GITHUB_TOKEN: ${{ github.token }}
TEST_GITHUB_TOKEN: ${{ secrets.TEST_GITHUB_TOKEN }}
TEST_GITHUB_REPO: https://github.com/iterative/cml_qa_tests_dummy
TEST_GITHUB_SHA: 62edc8b3f46a60b3fe1e5c08fd3e0046d350ee29
TEST_GITHUB_SHA: 0cd16da26e35f8e5d57b2549a97e22618abf08f6
TEST_GITLAB_TOKEN: ${{ secrets.TEST_GITLAB_TOKEN }}
TEST_GITLAB_REPO: https://gitlab.com/iterative.ai/cml_qa_tests_dummy
TEST_GITLAB_SHA: c4c13286e78dc252dd2611f31a755f10d343fbd4
Expand Down
50 changes: 41 additions & 9 deletions bin/cml-runner.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ const { homedir } = require('os');

const fs = require('fs').promises;
const yargs = require('yargs');
const { SpotNotifier } = require('ec2-spot-notification');

const { exec, randid, sleep } = require('../src/utils');
const tf = require('../src/terraform');
Expand Down Expand Up @@ -234,12 +235,34 @@ const runLocal = async (opts) => {
} else if (log && log.status === 'job_ended') {
const { job } = log;

const waitCompletedPipelineJobs = () => {
return new Promise((resolve, reject) => {
try {
if (RUNNER_JOBS_RUNNING.length === 1) {
resolve([RUNNER_JOBS_RUNNING[0].id]);
return;
}

const watcher = setInterval(async () => {
const jobs = (
await cml.pipelineJobs({ jobs: RUNNER_JOBS_RUNNING })
)
.filter((job) => job.status === 'completed')
.map((job) => job.id);

if (jobs.length) {
resolve(jobs);
clearInterval(watcher);
}
}, 5 * 1000);
} catch (err) {
reject(err);
}
});
};

if (!RUNNER_SHUTTING_DOWN) {
const jobs = job
? [job]
: (await cml.pipelineJobs({ jobs: RUNNER_JOBS_RUNNING }))
.filter((job) => job.status === 'completed')
.map((job) => job.id);
const jobs = job ? [job] : await waitCompletedPipelineJobs();

RUNNER_JOBS_RUNNING = RUNNER_JOBS_RUNNING.filter(
(job) => !jobs.includes(job.id)
Expand All @@ -251,13 +274,22 @@ const runLocal = async (opts) => {
proc.stderr.on('data', dataHandler);
proc.stdout.on('data', dataHandler);
proc.on('uncaughtException', () => shutdown(opts));
proc.on('SIGINT', () => shutdown(opts));
proc.on('SIGTERM', () => shutdown(opts));
proc.on('SIGQUIT', () => shutdown(opts));
proc.on('disconnect', () => shutdown(opts));
proc.on('exit', () => shutdown(opts));

if (!noRetry) {
try {
console.log(`EC2 id ${await SpotNotifier.instanceId()}`);
SpotNotifier.on('termination', () => shutdown(opts));
SpotNotifier.start();
} catch (err) {
console.log('SpotNotifier can not be started.');
}
}

if (parseInt(idleTimeout) !== 0) {
const watcher = setInterval(() => {
RUNNER_TIMEOUT_TIMER >= idleTimeout &&
RUNNER_TIMEOUT_TIMER > idleTimeout &&
shutdown(opts) &&
clearInterval(watcher);

Expand Down
Loading

0 comments on commit 0c6abf3

Please sign in to comment.