# integrate.ai API Sample Notebook to run client on Azure Container Instances

This task group implementation is meant to be a POC for running tasks in ACI. Not for production use.

## Set environment variables (or replace inline) with your IAI credentials
### Generate and manage this token in the UI, in the Tokens page

In [None]:
import os

IAI_TOKEN = os.environ.get("IAI_TOKEN")

# These are credentials for the remote processes, service principal that has limited access
# was previously created.
IAI_SP = os.environ["IAI_SP"]
IAI_PW = os.environ["IAI_PW"]
IAI_TENANT = os.environ["IAI_TENANT"]

os.environ["IAI_AZURE_BLOB_STORAGE_ACCOUNT"] = "testronstorageaccount"

## Authenticate to the integrate.ai api client

In [None]:
from integrate_ai_sdk.api import connect

client = connect(token=IAI_TOKEN)

## Sample model config and data schema
You can find the model config and data schema in the [integrate.ai end user tutorial](https://integrate-ai.gitbook.io/integrate.ai-user-documentation/tutorials/end-user-tutorials/model-training-with-a-sample-local-dataset)

In [None]:
model_config = {
    "experiment_name": "test_synthetic_tabular",
    "experiment_description": "test_synthetic_tabular",
    "strategy": {"name": "FedAvg", "params": {}},
    "model": {"params": {"input_size": 15, "hidden_layer_sizes": [6, 6, 6], "output_size": 2}},
    "balance_train_datasets": False,
    "ml_task": {
        "type": "classification",
        "params": {
            "loss_weights": None,
        },
    },
    "optimizer": {"name": "SGD", "params": {"learning_rate": 0.2, "momentum": 0.0}},
    "differential_privacy_params": {"epsilon": 4, "max_grad_norm": 7},
    "save_best_model": {
        "metric": "loss",  # to disable this and save model from the last round, set to None
        "mode": "min",
    },
    "seed": 23,  # for reproducibility
}

data_schema = {
    "predictors": ["x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14"],
    "target": "y",
}

## Import Azure SessionTaskGroup

In [None]:
from integrate_ai_sdk.taskgroup.taskbuilder import azure, local
from integrate_ai_sdk.taskgroup.base import SessionTaskGroup
from concurrent.futures import ProcessPoolExecutor

# important to use ProcessPoolExecutor to avoid deadlocks
# max workers needs to be set to at least the same number of concurrent tasks,
# in order to avoid deadlocks.
executor = ProcessPoolExecutor(max_workers=10)

local_tb = local.local_python(executor, client)

tb = azure.aci(
    subscription_id="21cb130d-da92-4bcc-ae6e-f18889c9a637",
    storage_account="testronstorageaccount",
    resource_group="test-ron-resource-group",
    image_repo="iaitestrepo.azurecr.io",
    server_image="edge/fl-server-internal:2.2.20",  # TODO this should be fetched from gateway for default
    client_image="edge/fl-client-internal:2.0.18-cpu",
    remote_service_principal=azure.ServicePrincipalCredential(id=IAI_SP, password=IAI_PW, tenant=IAI_TENANT),
)

In [None]:
# session available in group context after submission
from datetime import datetime


def wait_and_print(task_group_context):
    max_count = 60
    count = 0
    while True:
        print(datetime.now())
        for task in task_group_context.contexts:
            print(f"{task.status()['info'].name}: {task.status()['info'].instance_view.state}")
        if task_group_context.wait(5):
            break
        if count >= max_count:
            raise Exception("Waited too long!")
        count += 1

### Set up autoreload in notebook (jupyter specific)

This allows for code that's being developed in the python virtual env to be have modules autoreloaded on change

In [None]:
%load_ext autoreload
%autoreload 2


### Specifying optional AWS Credentials, path to datasets

In [None]:
# Specify the path to your training and test data on S3
data_dir = "az://test-ron-blob/synthetic"
# data_dir = '~/Downloads/synthetic'
storage_path = "azure://test-ron-blob"
train_path1 = f"{data_dir}/train_silo0.parquet"
train_path2 = f"{data_dir}/train_silo1.parquet"
test_path = f"{data_dir}/test.parquet"

## Create and Run EDA Session

In [None]:
dataset_config = {"dataset_one": [], "dataset_two": []}

eda_session = client.create_eda_session(
    name="Testing notebook - EDA",
    description="I am testing EDA session creation through a notebook",
    data_config=dataset_config,
    startup_mode="external",
).start()
eda_session.id

In [None]:
eda_task_group = (
    SessionTaskGroup(eda_session)
    .add_task(tb.fls(storage_path=storage_path))
    .add_task(tb.eda(dataset_name="dataset_one", dataset_path=train_path1))
    .add_task(tb.eda(dataset_name="dataset_two", dataset_path=train_path2))
)

In [None]:
eda_task_group_context = eda_task_group.start()

In [None]:
wait_and_print(eda_task_group_context)

In [None]:
print(eda_task_group_context.contexts[0].logs())

In [None]:
print(eda_task_group_context.contexts[1].logs())

In [None]:
print(eda_task_group_context.contexts[2].logs())

In [None]:
results = eda_session.results()
results.describe()

## Create a Training Session

The documentation for [creating a session](https://integrate-ai.gitbook.io/integrate.ai-user-documentation/tutorials/end-user-tutorials/model-training-with-a-sample-local-dataset#create-and-start-the-session) gives a bit more context into the parameters that are used during training session creation.<br />
For this session we are going to be using two training clients and two rounds. 

In [None]:
training_session = client.create_fl_session(
    name="Testing notebook",
    description="I am testing session creation through a notebook",
    min_num_clients=2,
    num_rounds=2,
    package_name="iai_ffnet",
    model_config=model_config,
    data_config=data_schema,
    startup_mode="external",
).start()

training_session.id

### Create task_group with appropriate number of tasks
#### Number of tasks added should match min_number of clients specified when creating the session

In [None]:
task_group = (
    SessionTaskGroup(training_session)
    .add_task(tb.fls(storage_path=storage_path))
    .add_task(tb.hfl(train_path=train_path1, test_path=test_path))
    .add_task(tb.hfl(train_path=train_path2, test_path=test_path))
)

In [None]:
hfl_task_group_context = task_group.start()

### Monitor submitted jobs

In [None]:
wait_and_print(hfl_task_group_context)

In [None]:
print(hfl_task_group_context.contexts[0].logs())

In [None]:
print(hfl_task_group_context.contexts[1].logs())

In [None]:
print(hfl_task_group_context.contexts[2].logs())

In [None]:
training_session.metrics()

## Trained model parameters are accessible from the completed session

Model parameters can be retrieved using the model's state_dict method. These parameters can then be saved with torch.save().

In [None]:
import torch

model = training_session.model().as_pytorch()

save_state_dict_folder = "./saved_models"
# PyTorch conventional file type
file_name = f"{training_session.id}.pt"
os.makedirs(save_state_dict_folder, exist_ok=True)
saved_state_dict_path = os.path.join(save_state_dict_folder, file_name)

with open(saved_state_dict_path, "w") as f:
    torch.save(model.state_dict(), saved_state_dict_path)

## Load the saved model

To load a model saved previously, a model object needs to be initialized first. This can be done by directly importing one of the IAI-supported packages (e.g., FFNet) or using the model class defined in a custom package. 

In [None]:
from integrate_ai_sdk.packages.FFNet.nn_model import FFNet

model = FFNet(input_size=15, output_size=2, hidden_layer_sizes=[6, 6, 6])

# use torch.load to unpickle the state_dict
target_state_dict = torch.load(saved_state_dict_path)

model.load_state_dict(target_state_dict)

## Load test data

In [None]:
import pandas as pd
from integrate_ai_sdk.utils.util import get_pandas_read_storage_option

test_data_path = f"{data_dir}/test.parquet"
test_data = pd.read_parquet(test_data_path, storage_options=get_pandas_read_storage_option(test_data_path))
test_data.head()

## Convert test data to tensors

In [None]:
Y = torch.tensor(test_data["y"].values)

In [None]:
X = torch.tensor(
    test_data[["x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14"]].values
)

## Run model predictions

In [None]:
model(X)

In [None]:
labels = model(X).max(dim=1)[1]
labels