Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for running inside Docker containers #56

Merged
merged 21 commits into from
Aug 31, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
language: rust
rust:
- 1.33.0
- 1.34.0
- stable
- beta
- nightly
before_script:
- if [ "$TRAVIS_RUST_VERSION" != "nightly" ]; then rustup component add rustfmt clippy; fi
- if [ "$TRAVIS_RUST_VERSION" == "stable" ]; then rustup component add rustfmt clippy; fi
script:
- cargo build
- cargo test --all
- if [ "$TRAVIS_RUST_VERSION" != "nightly" ]; then cargo fmt --all -- --check; fi
- if [ "$TRAVIS_RUST_VERSION" != "nightly" ]; then cargo clippy -- -D warnings; fi
- if [ "$TRAVIS_RUST_VERSION" == "stable" ]; then cargo fmt --all -- --check; fi
- if [ "$TRAVIS_RUST_VERSION" == "stable" ]; then cargo clippy -- -D warnings; fi

# See https://levans.fr/rust_travis_cache.html
cache:
Expand Down
971 changes: 506 additions & 465 deletions Cargo.lock

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ structopt = "0.2.15"
tokio = "0.1.18"
tokio-signal = "0.2.7"
toml = "0.5.0"
openssl = "0.10.23"
openssh-keys = "0.4.1"
tokio-ctrlc-error = "0.1.0"

[dependencies.failure_ext]
version = "0.1.1"
Expand Down
17 changes: 10 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,7 @@ CircleCI status: [![status](https://circleci.com/gh/golemfactory/gumpi.svg?style

Known to work with [this GU version](https://github.com/golemfactory/golem-unlimited/tree/gumpi-freeze), commit 93c9f37e1765ad743a6b16209561e6374fb88e84.

Minimum supported version:
* Rust: 1.33
* OpenMPI: 3.0

# Docker image

gumpi requires at least OpenMPI 3.0 on the provider machine. Since the current LTS version of Ubuntu only has OpenMPI 2.x, you can find a compatible Docker image [here](https://github.com/marmistrz/docker-openmpi).
Minimum supported Rust version: 1.34

# Example usage

Expand All @@ -29,6 +23,15 @@ args = ["-a"]

See [examples/Tutorial.md](examples/Tutorial.md) for a more details.

# Directories inside the Docker image
The structure of the directories:
* `/app` contains the sources and the built binary of the application
* `/input` contains the uploaded input data
* `/output` is the working directory of the app

In particular, when providing input data, you should refer to it as either
`../input/file.dat` or `/input/file.dat`.

# Known issues and limitations
## Connectivity
If you want to run the application over LAN, you may need to specify your IP address space, e.g.
Expand Down
4 changes: 3 additions & 1 deletion examples/game-life.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ args = ["2", "1", "12000", "10"]
[output]
# the path to the directory on the PROVIDER node containing the relevant output,
# relatively to the application working directory
source = "output"
#
# Defaults to: /output
source = "/output"

# desired path to the file containing the outputs from the provider,
# relative to the program working directory
Expand Down
4 changes: 3 additions & 1 deletion examples/heat_solver.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# the name of the binary that will be produced
progname = "heat_solver"
# the command line arguments that should be passed to the program
args = ["--size", "480", "--initial", "input.txt", "--noresults"]
args = ["--size", "480", "--initial", "/input/input.txt", "--noresults"]
# (optional) extra arguments that should be passed to mpirun
# mpiargs = ["--mca", "btl_tcp_if_include", "10.30.8.0/22"]

Expand All @@ -27,6 +27,8 @@ source = "heat_solver_input.tar"
# [output]
# the path to the directory on the PROVIDER node containing the relevant output,
# relatively to the application working directory
#
# Defaults to: /output
# source = "output"

# desired path to the file containing the outputs from the provider,
Expand Down
38 changes: 0 additions & 38 deletions src/async_ctrlc.rs

This file was deleted.

2 changes: 2 additions & 0 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use std::fmt;
pub enum Error {
ExecutionError(String),
CompilationError(Vec<String>),
KeyDeploymentError(Vec<String>),
}

impl fmt::Display for Error {
Expand All @@ -15,6 +16,7 @@ impl fmt::Display for Error {
let joined = logs.join("\n----------\n");
writeln!(f, "compilation error:\n{}", joined)
}
Error::KeyDeploymentError(e) => writeln!(f, "error deploying keys:\n{:?}", e),
}
}
}
5 changes: 5 additions & 0 deletions src/jobconfig.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,13 @@ pub struct Sources {
pub mode: BuildType,
}

fn default_output_location() -> PathBuf {
PathBuf::from("/output")
}

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct OutputConfig {
#[serde(default = "default_output_location")]
pub source: PathBuf,
pub target: PathBuf,
}
Expand Down
49 changes: 25 additions & 24 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
#![warn(clippy::all)]
#![warn(rust_2018_idioms)]

mod async_ctrlc;
mod error;
mod jobconfig;
mod session;

use crate::{
async_ctrlc::{AsyncCtrlc, CtrlcEvent},
jobconfig::{JobConfig, Opt},
session::mpi::{DeploymentInfo, SessionMPI},
session::mpi::SessionMPI,
};
use actix::prelude::*;
use failure::{format_err, Fallible, ResultExt};
Expand All @@ -21,9 +19,10 @@ use futures::{
use log::{debug, error, info};
use std::env;
use structopt::StructOpt;
use tokio_ctrlc_error::{AsyncCtrlc, KeyboardInterrupt};

fn show_error(e: &failure::Error) {
match e.find_root_cause().downcast_ref::<CtrlcEvent>() {
match e.find_root_cause().downcast_ref::<KeyboardInterrupt>() {
Some(_) => eprintln!("Execution interrupted..."),
None => {
eprint!("Error");
Expand Down Expand Up @@ -54,7 +53,6 @@ fn gumpi_async(
opt: Opt,
config: JobConfig,
) -> Fallible<impl Future<Item = (), Error = failure::Error>> {
let progname = config.progname.clone();
let cpus_requested = opt.numproc;
let prov_filter = if opt.providers.is_empty() {
None
Expand Down Expand Up @@ -87,7 +85,8 @@ fn gumpi_async(
}

let future = SessionMPI::init(opt.hub, prov_filter)
.handle_ctrlc()
.ctrlc_as_error() // This is not a bug - we have a second `.ctrlc_as_error()`
// inside the `and_then`
.context("initializing session")
.and_then(move |session| {
use std::rc::Rc;
Expand All @@ -104,32 +103,28 @@ fn gumpi_async(
)));
}
info!("Compiling the sources...");
// deploy_prefix is the location of the folder, where the executable
// resides. See the documentation for SessionMPI::deploy for more
// details
let deploy_prefix = if let Some(sources) = config.sources.clone() {
// impl Future<Item = bool>
// * `true` if we have compiled the sources on the provider node
// * `false` otherwise
let deploy_future = if let Some(sources) = config.sources.clone() {
Either::A(
session
.deploy(jobconfig_dir.clone(), sources, progname)
.deploy(jobconfig_dir.clone(), sources)
.context("deploying the sources")
.and_then(|depl| {
let DeploymentInfo {
logs,
deploy_prefix,
} = depl;
for comp in logs {
for comp in depl.logs {
let logs = comp.logs.join("\n------------------\n");
info!(
"Provider {} compilation output:\n{}",
comp.node.to_string(),
logs
);
}
Ok(Some(deploy_prefix))
Ok(true)
}),
)
} else {
Either::B(future::ok(None))
Either::B(future::ok(false))
};

let upload_input = if let Some(input) = config.input.clone() {
Expand All @@ -139,17 +134,23 @@ fn gumpi_async(
Either::B(future::ok(()))
};

let deploy_keys = session
.deploy_keys()
.into_future()
.flatten()
.context("deploying SSH keys");

Either::B(
deploy_prefix
.join(upload_input)
.and_then(move |(deploy_prefix, ())| {
deploy_future
.join3(upload_input, deploy_keys)
.and_then(move |(deployed, (), ())| {
session
.exec(
cpus_requested,
config.progname,
config.args,
config.mpiargs,
deploy_prefix,
config.mpiargs.unwrap_or_default(),
deployed,
)
.context("program execution")
.join(future::ok(session))
Expand All @@ -162,7 +163,7 @@ fn gumpi_async(
Either::B(future::ok(()))
}
})
.handle_ctrlc()
.ctrlc_as_error()
.then(move |fut| {
// At this point, there should be no other session references
// remaining. If it isn't so, we want to stay on the safe side
Expand Down
Loading