In [1]:
import json
import os

from pprint import pprint

from aiod_rail_sdk import Configuration
from aiod_rail_sdk.clients import RailClient

os.environ["AIOD_RAIL_API_KEY"] = "your-api-key"

# Create RAIL client

In [2]:
config = Configuration(host="http://localhost/api")
my_client = RailClient(config)

# Experiment Templates endpoints

## Prepare template specification

We can either load template specification as whole json and create ExperimentTemplate from it

In [None]:
with open('your-path-to-template-spec-here.json') as f:
  template = json.load(f)

Or we can define our template in code and use paths to local files created earlier (script, requirements and Dockerfile)

In [3]:
script_path = "../backend/dev-scripts/experiments/script.py"
requirements_path = "../backend/dev-scripts/experiments/requirements.txt"
base_image = "python:3.9"

In [4]:
template_config = {
    "name": "MyExperimentTemplate",
    "description": "I created this on 30.5.2024",
    "task": "TEXT_CLASSIFICATION",
    "datasets_schema": {
        "cardinality": "1-1"
    },
    "models_schema": {
        "cardinality": "1-1"
    },
    "envs_required": [
        {
            "name": "SPLIT_NAME",
            "description": "name of a subset"
        }
    ],
    "envs_optional": [],
    "available_metrics": [
        "accuracy"
    ],
    "public": True
}

## Create experiment template

In [5]:
resp = my_client.experiments_templates.create_experiment_template(
    template=(script_path, requirements_path, base_image, template_config)
)
pprint(resp.to_dict(), sort_dicts=False)

{'name': 'MyExperimentTemplate',
 'description': 'I created this on 30.5.2024',
 'task': <TaskType.TEXT_CLASSIFICATION: 'TEXT_CLASSIFICATION'>,
 'datasets_schema': {'cardinality': <AssetCardinality.ENUM_1_MINUS_1: '1-1'>},
 'models_schema': {'cardinality': <AssetCardinality.ENUM_1_MINUS_1: '1-1'>},
 'envs_required': [{'name': 'SPLIT_NAME', 'description': 'name of a subset'}],
 'envs_optional': [],
 'script': 'import os\n'
           '\n'
           'os.environ["HF_HOME"] = "."\n'
           '\n'
           'import json\n'
           'import logging\n'
           '\n'
           'import numpy as np\n'
           'import sklearn.metrics as m\n'
           'import torch\n'
           'import wandb\n'
           'from datasets import load_dataset\n'
           'from tqdm import tqdm\n'
           'from transformers import AutoModelForSequenceClassification, '
           'AutoTokenizer\n'
           '\n'
           '\n'
           'def get_device():\n'
           '    return "cuda" if torch

## Update experiment template

In [6]:
template_config_updated = template_config
template_config_updated["name"] = 'MyExperimentTemplateUpdated'
template_config_updated["description"] = 'I updated this experiment on 30.5.2024'
pprint(template_config_updated)

{'available_metrics': ['accuracy'],
 'base_image': 'python:3.9',
 'datasets_schema': {'cardinality': '1-1'},
 'description': 'I updated this experiment on 30.5.2024',
 'envs_optional': [],
 'envs_required': [{'description': 'name of a subset', 'name': 'SPLIT_NAME'}],
 'models_schema': {'cardinality': '1-1'},
 'name': 'MyExperimentTemplateUpdated',
 'pip_requirements': 'transformers==4.30.2\n'
                     'datasets==2.14.6\n'
                     'numpy==1.25.0\n'
                     'scikit-learn==1.2.2\n'
                     'wandb==0.15.4\n'
                     '--extra-index-url https://download.pytorch.org/whl/cpu\n'
                     'torch==2.0.0+cpu\n',
 'public': True,
 'script': 'import os\n'
           '\n'
           'os.environ["HF_HOME"] = "."\n'
           '\n'
           'import json\n'
           'import logging\n'
           '\n'
           'import numpy as np\n'
           'import sklearn.metrics as m\n'
           'import torch\n'
           'import wa

Let's check the experiment templates, which are not approved yet and take the one we just created to select it for update with template config we defined above

In [7]:
experiment_templates = my_client.experiments_templates.get(approved=False)
experiment_template_to_update = None 

for experiment_template in experiment_templates:
    print(f"id: {experiment_template.id}\nname: {experiment_template.name}")
    print("-"*100)
    if experiment_template.name == "MyExperimentTemplate":
        experiment_template_to_update = experiment_template

id: 66599e3fd42a40985a8d26a5
name: MyExperimentTemplate
----------------------------------------------------------------------------------------------------


Let's update the selected experiment template, we will check the experiment id to verify that it's the same one we updated

In [8]:
updated_experiment_template = my_client.experiments_templates.update(
    experiment_template_to_update.id,
    template=(script_path, requirements_path, base_image, template_config_updated)
)
print(f'id: {updated_experiment_template.id},\nname: {updated_experiment_template.name}')

id: 66599e3fd42a40985a8d26a5,
name: MyExperimentTemplateUpdated


Let's visualize experiment templates which are approved and built 

In [9]:
experiment_templates = my_client.experiments_templates.get(finalized=True, approved=True)
for experiment_template in experiment_templates:
    print(f"id: {experiment_template.id} | name: {experiment_template.name} | approved: {experiment_template.approved}\n")

id: 665998b7d42a40985a8d26a4 | name: ExampleTemplate | approved: True



We can also get just their count if needed

In [10]:
my_client.experiments_templates.count(finalized=True, approved=True)

1

Let's get the first experiment template by it's id

In [11]:
experiment_template = my_client.experiments_templates.get_by_id(experiment_templates[0].id)
print(f"id: {experiment_template.id} | name: {experiment_template.name} | archived: {experiment_template.archived}\n")

id: 665998b7d42a40985a8d26a4 | name: ExampleTemplate | archived: False



We will archive this experiment template

In [12]:
my_client.experiments_templates.archive(experiment_template.id, archived=True)

We can now verify that experiment template has been archived

In [13]:
experiment_template = my_client.experiments_templates.get_by_id(experiment_templates[0].id)
print(f"id: {experiment_template.id} | name: {experiment_template.name} | archived: {experiment_template.archived}\n")

id: 665998b7d42a40985a8d26a4 | name: ExampleTemplate | archived: True



# Datasets endpoints

We can also view number of datasets we can browse

In [14]:
my_client.datasets.count()

411987

Let's get two of them

In [15]:
example_datasets = my_client.datasets.get(offset=0, limit=2)
pprint(list(map(lambda x: (x.name, x.description), example_datasets)))

[('acronym_identification',
  Text(plain='Acronym identification training and development sets for the acronym identification task at SDU@AAAI-21.', html=None)),
 ('ade_corpus_v2',
  Text(plain=' ADE-Corpus-V2  Dataset: Adverse Drug Reaction Data.\n This is a dataset for Classification if a sentence is ADE-related (True) or not (False) and Relation Extraction between Adverse Drug Event and Drug.\n DRUG-AE.rel provides relations between drugs and adverse effects.\n DRUG-DOSE.rel provides relations between drugs and dosages.\n ADE-NEG.txt provides all sentences in the ADE corpus that DO NOT contain any drug-related adverse effects.', html=None))]


# Experiments endpoints

We can either load experiment description from file

In [None]:
with open('your-path-to-experiment-config-json') as f:
    experiment = json.load(f)
experiment

Or we can define it by ourselves in code as dictionary. We have to specify an experiment template from which experiment will be created, let's pick the one that is ready to be use

In [16]:
experiment_templates = my_client.experiments_templates.get(finalized=True, approved=True)

for experiment_template in experiment_templates:
    print(f"id: {experiment_template.id} | name: {experiment_template.name} | approved: {experiment_template.approved}\n")

id: 665998b7d42a40985a8d26a4 | name: ExampleTemplate | approved: True

id: 66599e3fd42a40985a8d26a5 | name: MyExperimentTemplateUpdated | approved: True



In [17]:
experiment = {
    "name": "MyNewExperiment",
    "description": "I created this experiment on 31.5.2024",
    "publication_ids": [],
    "experiment_template_id": experiment_templates[1].id,
    "dataset_ids": [
        "1"
    ],
    "model_ids": [
        "2"
    ],
    "env_vars": [
        {
            "key": "SPLIT_NAME",
            "value": "train"
        }
    ],
    "public": True
}
experiment

{'name': 'MyNewExperiment',
 'description': 'I created this experiment on 31.5.2024',
 'publication_ids': [],
 'experiment_template_id': '66599e3fd42a40985a8d26a5',
 'dataset_ids': ['1'],
 'model_ids': ['2'],
 'env_vars': [{'key': 'SPLIT_NAME', 'value': 'train'}],
 'public': True}

Create experiment from provided json file

In [18]:
experiment = my_client.experiments.create_experiment(experiment=experiment)
experiment.to_dict()

{'name': 'MyNewExperiment',
 'description': 'I created this experiment on 31.5.2024',
 'experiment_template_id': '66599e3fd42a40985a8d26a5',
 'publication_ids': [],
 'dataset_ids': ['1'],
 'model_ids': ['2'],
 'env_vars': [{'key': 'SPLIT_NAME', 'value': 'train'}],
 'public': True,
 'id': '6659a023d42a40985a8d26a6',
 'created_at': datetime.datetime(2024, 5, 31, 10, 2, 10, 896985, tzinfo=TzInfo(UTC)),
 'updated_at': datetime.datetime(2024, 5, 31, 10, 2, 10, 896980, tzinfo=TzInfo(UTC)),
 'archived': False,
 'mine': True}

Let's display all experiments. We can see our new experiment there

In [20]:
experiments = my_client.experiments.get()
for e in experiments:
    print(f'id: {e.id} | name: {e.name} | archived: {e.archived}')

id: 6659a023d42a40985a8d26a6 | name: MyNewExperiment | archived: False
id: 6659a09cd42a40985a8d26a7 | name: SimpleExperiment | archived: True


As we can see from output above, one of the experiments is archived, we can use count to display how many of experiments are archived 

In [21]:
my_client.experiments.count(archived=True)

1

### Run experiment

We can now run the experiment which we will select by providing its id or name. Let's select the one which we created before and run it.

In [29]:
experiment_for_run = my_client.experiments.get(query="SimpleExperiment")[0]
experiment_for_run.to_dict()

{'name': 'SimpleExperiment',
 'description': 'SimpleExperiment',
 'experiment_template_id': '665998b7d42a40985a8d26a4',
 'publication_ids': [],
 'dataset_ids': ['1'],
 'model_ids': ['2'],
 'env_vars': [],
 'public': False,
 'id': '6659a09cd42a40985a8d26a7',
 'created_at': datetime.datetime(2024, 5, 31, 10, 4, 12, 462000),
 'updated_at': datetime.datetime(2024, 5, 31, 10, 26, 27, 971000),
 'archived': False,
 'mine': True}

In [30]:
run = my_client.experiments.run_experiment(id=experiment_for_run.id)
run.to_dict()

{'id': '6659a63dedac6aaf345fd85d',
 'created_at': datetime.datetime(2024, 5, 31, 10, 28, 13, 618646, tzinfo=TzInfo(UTC)),
 'updated_at': datetime.datetime(2024, 5, 31, 10, 28, 13, 618654, tzinfo=TzInfo(UTC)),
 'retry_count': 0,
 'state': <RunState.CREATED: 'CREATED'>,
 'metrics': {},
 'archived': False,
 'public': False,
 'mine': True,
 'experiment_id': '6659a09cd42a40985a8d26a7'}

We can display number of experiment runs and it's count

In [31]:
my_client.experiments.get_experiment_runs_count(id=experiment_for_run.id)

2

In [32]:
experiment_runs = my_client.experiments.get_experiment_runs(id=experiment_for_run.id)
for exp_run in experiment_runs:
    display(exp_run.to_dict())

{'id': '6659a5d5edac6aaf345fd85c',
 'created_at': datetime.datetime(2024, 5, 31, 10, 26, 29, 694000),
 'updated_at': datetime.datetime(2024, 5, 31, 10, 27, 4, 5000),
 'retry_count': 0,
 'state': <RunState.FINISHED: 'FINISHED'>,
 'metrics': {},
 'archived': False,
 'public': False,
 'mine': True,
 'experiment_id': '6659a09cd42a40985a8d26a7'}

{'id': '6659a63dedac6aaf345fd85d',
 'created_at': datetime.datetime(2024, 5, 31, 10, 28, 13, 618000),
 'updated_at': datetime.datetime(2024, 5, 31, 10, 28, 19, 44000),
 'retry_count': 0,
 'state': <RunState.IN_PROGRESS: 'IN_PROGRESS'>,
 'metrics': {},
 'archived': False,
 'public': False,
 'mine': True,
 'experiment_id': '6659a09cd42a40985a8d26a7'}

Let's display logs from the run which we executed on the experiment

In [34]:
logs = my_client.experiments.logs_experiment_run(id=experiment_runs[0].id)
pprint(json.loads(logs))

{'engine_specific': None,
 'job_logs': {'f11ec9aa-661e-4409-89d6-9ed0005d01f4': {'backend_job_id': 'reana-run-job-7508088f-3536-4f6a-b52d-52678711130b',
                                                       'cmd': 'set -a && '
                                                              'source .env && '
                                                              'set +a && '
                                                              'python '
                                                              'script.py',
                                                       'compute_backend': 'Kubernetes',
                                                       'docker_img': 'docker.io/aridzik/rail-exp-templates:template-665998b7d42a40985a8d26a4',
                                                       'finished_at': '2024-05-31T10:26:57',
                                                       'job_name': 'Execute '
                                                                   '

We can also download files from experiment. This call will download script.py into the folder where this code is located

In [35]:
my_client.experiments.download_experiment_run(id=experiment_runs[0].id, filepath="script.py", to_dir="run_out")

Let's archive our experiment which we ran earlier

In [36]:
my_client.experiments.archive(id=experiment_for_run.id, archived=True)

As we can see from output, our experiment MyNewExperiment has been archived

In [37]:
archived_experiments = my_client.experiments.get(archived=True)

for a_e in archived_experiments:
    print(f"id: {a_e.id} | name: {a_e.name} | archived: {a_e.archived}")

id: 6659a09cd42a40985a8d26a7 | name: SimpleExperiment | archived: True


We can also delete the run which we executed earlier but before that, we have to un-archive the experiment to be able to delete it

In [38]:
my_client.experiments.archive(id=experiment_for_run.id, archived=False)

In [39]:
my_client.experiments.delete_experiment_run(id=experiment_runs[0].id)

As we can see the run is deleted

In [41]:
my_client.experiments.get_experiment_runs_count(id=experiment_for_run.id)

1