In [None]:
# automatically reload modules if source is modified 
%load_ext autoreload
%autoreload 2

In [None]:
import os
import pathlib
import enum 
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# `easyvvuq` dependencies
import easyvvuq as vvuq
from easyvvuq.decoders import YAMLDecoder
import chaospy as cp

In [None]:
# `isct` dependencies 
from workflow.uq import ISCTEncoder, ISCTDecoder
from workflow.isct_trial import trial as trial_cmd 

User directories: 
- `template_dir`: this directory points towards a patient directory that acts as template, i.e. this directory (and all its content) are duplicated for each new run generated by `EasyVVUQ`. Therefore, it is suggested to perform one analysis of that patient directory before generating the new directories with `EasyVVUQ`, as all initialisations and preprocessing is simply copied, rather then needed to be repeated. For example, create and run a single patient:
```
isct trial create one -n 1 
isct trial run one -v 
```

- `work_dir`: this is the directory in which `EasyVVUQ` will create the runs and corresponding databases. Everytime `EasyVVUQ` is started it generates a new database inside this folder (with a rather obscure random hash attached to the directory name). Be sure to clear out this `work_dir` somewhat regularly, as many directories can accumulate taking up some diskspace. 

In [None]:
template_dir = "/Users/max/trials/one/patient_000"
work_dir = "/Users/max/trials/vvuq"

In [None]:
# ensure input is present
template_dir = pathlib.Path(template_dir)
assert os.path.isdir(template_dir)

work_dir = pathlib.Path(work_dir)
if not os.path.isdir(work_dir):
    os.makedirs(work_dir)
assert os.path.isdir(work_dir) 

### Campaing definition 

This table lists a copy of the considered variables for UQ analysis of the one-dimensional bloodflow model. Each variable should be updated through `EasyVVUQ` and requires Encoders/Decoders to do so. The column `supported` indicates whether this is possible yes or no. The file names are all relative to the patient directory, i.e `trial/patient_i`.

These are all _inputs_ towards the bloodflow model, where various outputs are possible, e.g. the flow or related properties for a variety of arteries. TODO: what are the output fields of interest, i.e. which artery and what physical property?

| variable `name` | type | unit | location | supported | range | 
| ------ | ------ | ------ | ------ | ----- | ----- | 
| heart rate `HeartRate` | uncertain | bmp | `patient.yml`, `config.xml` | yes | N(68,20) | 
| stroke volume `StrokeVolume` | uncertain | ml | `bf_sim/Model_parameters.txt` | yes | N(104,21) | 
| blood density `Density` | uncertain | kg.m-3 | `bf_sim/Model_parameters.txt` | yes | U(1019,1061)|
| blood viscosity `BLOOD_VISC` | uncertain | mPa.s | `bf_sim/Model_parameters.txt` | yes | N(62.9,18.1) |
| wall thickness | uncertain | mm | per vessel: `1-D_Anatomy.txt` | no | N(0.44,0.04) |
| wall elasticity | uncertain | mmHg | per vessel: `1-D_Anatomy.txt` | no | N(951,380) |
| vertebral artery diameter | uncertain | mm | `unknown` | no | U(3.2,6.5) |
| systolic pressure `SystolePressure` | certain | mmHg | `bf_sim/Model_parameters.txt` | yes | kept to value from WP2 (`rr_syst`) |
| diastolic pressure `DiastolePressure` | certain | mmHg | `bf_sim/Model_parameters.txt` | yes | kept to hardcoded default value as in `workflow/patient.py` |
| mean right atrial pressure | certain | mmHg | `unknown` | no | |  
| clot location | certain | categorical | `Clots.txt` | no | kept to hardcoded value `R. MCA` as in `workflow/patient.py` until issue #48 is resolved. Should be considered as discrete distribution |
| CoW vessel diameters | certain | mm | `unknown` | no | |
| CoW vessel lengths | certain | mm | `unknown` | no | |
| brain mesh | certain | mm | `unknown` | no | |

TODO:
- Unclear if yet possible to switch brain meshes.
- Many data on vessel diamaters: which to vary for UQ?
- Elasticity appears in `1-D_Anatomy.txt` for each vessel: which to vary for UQ?

In [None]:
# create a `campaign`: effectively a directory containing a SQL database 
# and the directories for the various analysis. 
campaign = vvuq.Campaign('UQ_', work_dir=work_dir)

In [None]:
# create a `encoder`: encode the parameters towards input files
encoder = ISCTEncoder(template_fname=template_dir)

In [None]:
# output file where `pressure drop` data is written to 
output = "bf_sim/ResultsPerVessel.csv"

# output variables of interest in `ResultPerVessel.csv`
cols = ["VolumeFlowrate(mL/s)", "Pressure(Pa)"]

# create a `decoder`: decode the output parameters towards the database
decoder = vvuq.decoders.SimpleCSV(target_filename=output, output_columns=cols, header=0)

In [None]:
# create a `collater` to aggregate the output data 
collater = vvuq.collate.AggregateSamples(average=False)

In [None]:
# define parameters of interest and their properties
# this all just goes into a single dictionary, where now only 
# `BLOOD_VISC` is considered as parameter to be varied 
parameters = {
    "HeartRate": { 
        "type": "float", 
        "min": 0,
        "max": 200,
        "default": 68,
    },
    "StrokeVolume": {
        "type": "float",
        "min": 0,
        "max": 250,
        "default": 104,
    },
    #"Density": {
    #    "type": "float",
    #    "min": 0.0,
    #    "max": 3000,
    #    "default": 1019,
    #},
    "BLOOD_VISC": {
        "type": "float", 
       "min": 0.0, 
        "max": 1.0, 
    "default": 0.035,
    },
    #"SystolePressure": { # current left to its default value
    #    "type": "float",
    #    "min": 40,
    #    "max": 220,
    #    "default": 100,
    #}
    #"DiastolePressure": { 
    #    "type": "float",
    #    "min": 40,
    #    "max": 220,
    #    "default": 100,
    #}
}

In [None]:
# create an `app` for the campaign, by connecting all components
campaign.add_app(
    name="blood-visc",
    params=parameters,
    encoder=encoder,
    decoder=decoder,
    collater=collater,
)

### Sampling definition

In [None]:
# the parameters to vary are provided as dict with their 
# corresponding distributions 
vary = {
    "HeartRate": cp.Normal(68, 20), 
    "StrokeVolume": cp.Normal(104, 21),
    #"Density": cp.Uniform(1019,1061),
    #"BLOOD_VISC": cp.Normal(0.0629, 0.0181),
}

In [None]:
# available methods
class Method(enum.Enum):
    random = "random"
    PCE = "PCE"
    QMC = "QMC"

In [None]:
# pick any from the Enum 
method = Method.QMC

# create a `sampler` matching the method 
if method == method.random: 
    sampler = vvuq.sampling.RandomSampler(vary=vary)
    
if method == method.PCE: 
    sampler = vvuq.sampling.PCESampler(vary=vary, polynomial_order=3)
    
if method == method.QMC:
    sampler = vvuq.sampling.QMCSampler(vary=vary, n_mc_samples=10**2) # this is the default

# assign the sampler
campaign.set_sampler(sampler)

The `num_samples` variable seems to act as either a limit or indication of the desired number of samples to be drawn. For the more advanced methods, PCE and QMC, it seems most logical to set the number of samples sufficiently high, such that PCE/QMK can dictate the required number of samples to draw. Note, if PCE/QMC are restricted to too little samples, the corresponding analysis might not be able to execute.

In [None]:
# draw the samples
num_samples = 100000
replicas = 1 # the number of times a single sample is replicated
campaign.draw_samples(num_samples=num_samples, replicas=replicas)

In [None]:
# This logs all the runs in the current `campaign`. It is mostly for 
# simple inspection to see if the desired parameters are varied and to 
# list all runs. 
for run in campaign.list_runs():
    print(f"{run[1]['run_name']}: {run[1]['params']}")

In [None]:
# create all run directories; copies the template and updates the
# parameters using the `ISCTEncoder`
campaign.populate_runs_dir()

### Running `isct` for each proposed sample

By populating the campaign, all required subdirectories are created in the directory of the database. These directories represent patient directories for which various ways are available to evaluate their simulations. We can consider the base directory as a trial directory, and invoke `isct trial run` to evaluate the simulations of all subdirectories. Alternatively, and more involved, we could manually invoke the individual runs by `isct patient run`. Eitherway, the required steps are evaluated and the output is stored within the individual run directories. Afterwards, the collation step will aggregate these results back into the database. 

To run the jobs in parallel, we can do so locally by exploiting parallel with `n` procs (`-jn`)

`isct trial run {run_dir} --gnu-parallel | parallel -jn`

TODO:
- support running notebook on external workstation: evacuate jobs locally on the remote machine
- support remote execution on workstations: send the jobs towards the remote workstation for execution
- support remote execution on HPC systems: send the jobs through a queing system to HPC systems 
- investigate efficient collation: archive data sets remotely, transport only essential information for analysis

In [None]:
# extract the path from the database location, this seems required
# to obtain the hash that is attached after the original directory 
run_dir = campaign.db_location.split(":")[-1]
run_dir = pathlib.Path(run_dir).parent

# the runs are located in the /runs/ directory
run_dir = run_dir.joinpath("runs")

In [None]:
# either run this command:
#trial_cmd(f"trial run {run_dir} -v".split())

# evaluate the following command in a subprocess
#import subprocess
#subprocess.run(f"isct trial run {run_dir} -v".split())

# or run the following output in terminal 
print(f"isct trial run {run_dir} -v")

### Collecting output
This steps collects the output parameters of interest from the output files and stores the output in the database.

In [None]:
campaign.collate()

### Analysing output
- The analysis has to match the sampling method, these are directly related.
- The analysis seems to fail for PCE/QMC when too little samples are considered.

In [None]:
# define analysis in line with sampling method
if method == method.random:
    analysis = vvuq.analysis.BasicStats(
        qoi_cols=cols
    )

if method == method.PCE: 
    analysis = vvuq.analysis.PCEAnalysis(
        sampler=sampler, 
        qoi_cols=cols
    )
    
if method == method.QMC:
    analysis = vvuq.analysis.QMCAnalysis(
        sampler=sampler,
        qoi_cols=cols
    )

In [None]:
# apply analysis to current database
campaign.apply_analysis(analysis)
results = campaign.get_last_analysis()

### Results
The results are reported as a dictionary, where the contents are strongly dependent on the chosen sampling and analysis method. Both PCE and QMC report Sobol indices in addition to basic statistical information. From here, we can either perform the post-processing directly on the obtained dictionary, however, direct data analysis on the database is also a possibility. Note, it seems to make sense to exploit the already provided analysis methods provided in EasyVVUQ as much as possible.

In [None]:
# The dictionary keys depend on the analysis
results.keys()

In [None]:
# The information obtained from the collation can be accessed explicitly. 
# This returns a panda dataframe with all the data fields, this can be
# used for any type of analysis as well.
hr = campaign.get_collation_result()[4::225]['HeartRate']

### Storing and saving results
The state of the campaign can be stored explicitly and from there reloaded elsewhere. This should provided the necessary functionality to serialise the current state of the campaign, together with the database and all subdirectories, and move this archive between systems. Thus, we can move around the database and the runs of all samples between systems, e.g. between remote and local machines.

The `state.json` simply contains the type of samplers, collation, and aggregation methods as well as the details of the database, i.e. its path, and the working directory that contains all subdirectories of the the individual samples. From there, we can initialise a new campaign and continue where previously left of. Thus, the data analysis could be decoupled completely from the scripts that perform the VVUQ analysis, which also saves recomputation compared to reevaluating the database over and over.

In [None]:
state = work_dir.joinpath("state.json")
campaign.save_state(state)

In [None]:
reloaded_campaign = vvuq.Campaign(state_file=state, work_dir=work_dir)
reloaded_campaign.get_collation_result()