# Adaptive-scheduler example

[Read the documentation](https://adaptive-scheduler.readthedocs.io/en/latest/#what-is-this) to see what this is all about.

## Step 1: define the simulation

Often one wants to sweep a continuous 1D or 2D space for multiple parameters. [Adaptive](http://adaptive.readthedocs.io) is the ideal program to do this. We define a simulation by creating several `adaptive.Learners`. 

We **need** to define the following variables:
* `learners` a list of learners
* `fnames` a list of file names, one for each learner

In [None]:
from functools import partial

import adaptive
import adaptive_scheduler


def h(x, width=0.01, offset=0):
    import numpy as np
    import random

    for _ in range(10):  # Burn some CPU time just because
        np.linalg.eig(np.random.rand(1000, 1000))

    a = width
    return x + a ** 2 / (a ** 2 + (x - offset) ** 2)


offsets = [i / 10 - 0.5 for i in range(5)]

combos = adaptive.utils.named_product(offset=offsets, width=[0.01, 0.05])

learners = []
fnames = []

for combo in combos:
    f = partial(h, **combo)
    learner = adaptive.Learner1D(f, bounds=(-1, 1))
    fname = adaptive_scheduler.utils.combo2fname(combo, folder="data")
    fnames.append(fname)
    learners.append(learner)

# Step 2: run the `learners`

After defining the `learners` and `fnames` in an file (above) we can start to run these learners.

We split up all learners into seperate jobs, all you need to do is to specify how many cores per job you want.

## Simple example

In [None]:
import adaptive_scheduler

run_manager = adaptive_scheduler.slurm_run(learners, fnames, goal=0.01)
run_manager.start()

## Explicit use

Or be explicit and use:

In [None]:
name = "example"
scheduler = adaptive_scheduler.scheduler.SLURM(
    cores_per_node=2,
    nodes=1,
    partition="hb120rsv2-low",
    executor_type="process-pool",
    log_folder="logs",
)
run_manager = adaptive_scheduler.RunManager(
    learners=learners,
    fnames=fnames,
    scheduler=scheduler,
    goal=0.01,
    job_name=f"{name}",
    max_fails_per_job=5,
    max_simultaneous_jobs=50,
    db_fname=f"{name}-database.json",
    log_interval=30,
    save_interval=30,
    save_dataframe=True,
    cleanup_first=False,
)

## Queue

In [None]:
# See the current queue with
import pandas as pd

queue = run_manager.scheduler.queue(me_only=True)
df = pd.DataFrame(queue).transpose()
df

In [None]:
# Read the logfiles and put it in a `pandas.DataFrame`.
# This only returns something when there are log-files to parse!
# So after `run_manager.log_interval` has passed.
df = run_manager.parse_log_files()
df.head()

In [None]:
# See the database
df = run_manager.get_database()  # or see `run_manager.database_manager.as_dict()`
df.head()

In [None]:
# After the calculation started and some data has been saved, we can display the learners
import adaptive

adaptive.notebook_extension()

run_manager.load_learners()
learner = adaptive.BalancingLearner(learners, cdims=combos)
learner.plot()

## Simple sequential example
Sometimes you cannot formulate your problem with Adaptive, instead you just want to run a function as a sequence of parameters.

Surprisingly, this approach with a `SequenceLearner` [is slightly faster than `ipyparallel.Client.map`](https://github.com/python-adaptive/adaptive/pull/193#issuecomment-491062073).

In [None]:
import numpy as np

from adaptive import SequenceLearner
from adaptive_scheduler.utils import split, combo_to_fname


def g(xyz):
    x, y, z = xyz
    for _ in range(5):  # Burn some CPU time just because
        np.linalg.eig(np.random.rand(1000, 1000))
    return x ** 2 + y ** 2 + z ** 2


xs = np.linspace(0, 10, 11)
ys = np.linspace(-1, 1, 11)
zs = np.linspace(-3, 3, 11)
xyzs = [(x, y, z) for x in xs for y in ys for z in zs]

# We have only one learner so one fname
learners = [SequenceLearner(g, sequence=xyzs)]
fnames = ["data/xyzs"]

In [None]:
import adaptive_scheduler


def goal(learner):
    return learner.done()


scheduler = adaptive_scheduler.scheduler.DefaultScheduler(
    cores=10, executor_type="ipyparallel",
)  # PBS or SLURM

run_manager2 = adaptive_scheduler.server_support.RunManager(
    scheduler, learners, fnames, goal=goal, log_interval=30, save_interval=30,
)
run_manager2.start()

In [None]:
run_manager2.load_learners()
learner = learners[0]
try:
    result = learner.result()
    print(result)
except:
    print("`learner.result()` is only available when all values are calculated.")
    partial_data = learner.data
    print(partial_data)

## Extended example
This example shows how to run split up a list into 100 `SequenceLearner`s and runs it in 100 jobs.

In [None]:
import numpy as np

from adaptive import SequenceLearner
from adaptive_scheduler.utils import split, combo2fname
from adaptive.utils import named_product


def g(combo):
    x, y, z = combo["x"], combo["y"], combo["z"]

    for _ in range(5):  # Burn some CPU time just because
        np.linalg.eig(np.random.rand(1000, 1000))

    return x ** 2 + y ** 2 + z ** 2


combos = named_product(x=np.linspace(0, 10), y=np.linspace(-1, 1), z=np.linspace(-3, 3))

print(f"Length of combos: {len(combos)}.")

# We could run this as 1 job with N nodes, but we can also split it up in multiple jobs.
# This is desireable when you don't want to run a single job with 300 nodes for example.
# Note that 
# `adaptive_scheduler.utils.split_sequence_in_sequence_learners(g, combos, 100, "data")`
# does the same!

njobs = 100
split_combos = list(split(combos, njobs))

print(
    f"Length of split_combos: {len(split_combos)} and length of split_combos[0]: {len(split_combos[0])}."
)

learners = [SequenceLearner(g, combos_part) for combos_part in split_combos]
fnames = [combo2fname(combos_part[0], folder="data") for combos_part in split_combos]

We now start the `RunManager` with a lot of arguments to showcase some of the options you can use to customize your run.

In [None]:
from functools import partial
import adaptive_scheduler
from adaptive_scheduler.scheduler import DefaultScheduler, PBS, SLURM


def goal(learner):
    return learner.done()  # the standard goal for a SequenceLearner


extra_scheduler = (
    ["--exclusive", "--time=24:00:00"] if DefaultScheduler is SLURM else []
)

scheduler = adaptive_scheduler.scheduler.DefaultScheduler(
    cores=10,
    executor_type="ipyparallel",
    extra_scheduler=extra_scheduler,
    extra_env_vars=["PYTHONPATH='my_dir:$PYTHONPATH'"],
    python_executable="~/miniconda3/bin/python",
    log_folder="logs",
)  # PBS or SLURM

run_manager3 = adaptive_scheduler.server_support.RunManager(
    scheduler,
    learners,
    fnames,
    goal=goal,
    log_interval=10,
    save_interval=30,
    runner_kwargs=dict(retries=5, raise_if_retries_exceeded=False),
    kill_on_error="srun: error:",  # cancel a job if this is inside a log
    job_name="example-sequence",  # this is used to generate unqiue job names
    db_fname="example-sequence.json",  # the database keeps track of job_id <-> (learner, is_done)
    start_job_manager_kwargs=dict(
        max_fails_per_job=10,  # the RunManager is cancelled after njobs * 10 fails
        max_simultaneous_jobs=300,  # limit the amount of simultaneous jobs
    ),
)

In [None]:
run_manager3.start()

In [None]:
df = run_manager3.parse_log_files()
df.head()

In [None]:
run_manager3.load_learners()  # load the data into the learners
result = sum(
    [l.result() for l in learners], []
)  # combine all learner's result into 1 list