# integrate.ai API Sample Notebook to run server on AWS Fargate

## Set environment variables (or replace inline) with your IAI credentials
### Generate and manage this token in the UI, in the Tokens page
### Generate AWS session credentials or use the default profile

In [None]:
import os

IAI_TOKEN = os.environ.get("IAI_TOKEN")

## Authenticate to the integrate.ai api client

In [None]:
from integrate_ai_sdk.api import connect

client = connect(token=IAI_TOKEN)

## Sample model config and data schema
You can find the model config and data schema in the [integrate.ai end user tutorial](https://integrate-ai.gitbook.io/integrate.ai-user-documentation/tutorials/end-user-tutorials/model-training-with-a-sample-local-dataset)

In [None]:
model_config = {
    "experiment_name": "test_notebook_glm_external_fargate",
    "experiment_description": "test_notebook_glm_external_fargate",
    "strategy": {"name": "FedAvg", "params": {}},
    "model": {
        "params": {"input_size": 15, "output_activation": "sigmoid"},
    },
    "balance_train_datasets": False,
    "ml_task": {
        "type": "logistic",
        "params": {
            "loss_weights": None,
        },
    },
    "optimizer": {"name": "SGD", "params": {"learning_rate": 0.2, "momentum": 0.0}},
    "differential_privacy_params": {"epsilon": 4, "max_grad_norm": 7},
    "save_best_model": {
        "metric": "loss",  # to disable this and save model from the last round, set to None
        "mode": "min",
    },
    "seed": 23,  # for reproducibility
}

data_schema = {
    "predictors": ["x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14"],
    "target": "y",
}

## Create a Training Session

The documentation for [creating a session](https://integrate-ai.gitbook.io/integrate.ai-user-documentation/tutorials/end-user-tutorials/model-training-with-a-sample-local-dataset#create-and-start-the-session) gives a bit more context into the parameters that are used during training session creation.<br />
For this session we are going to be using two training clients and two rounds. 

In [None]:
training_session = client.create_fl_session(
    name="Testing notebook",
    description="I am testing session creation through a notebook",
    min_num_clients=2,
    num_rounds=2,
    package_name="iai_glm",
    model_config=model_config,
    data_config=data_schema,
    startup_mode="external",
).start()

training_session.id


## Run Training Server on AWS Fargate

### Specifying optional AWS Credentials, Cluster, Task Definition Name and Network Parameters

In [None]:
# Set your AWS Credentials if you are generating temporary ones, else use the default profile credentials
aws_creds = {
    "ACCESS_KEY": os.environ.get("AWS_ACCESS_KEY_ID"),
    "SECRET_KEY": os.environ.get("AWS_SECRET_ACCESS_KEY"),
    "SESSION_TOKEN": os.environ.get("AWS_SESSION_TOKEN"),
    "REGION": os.environ.get("AWS_REGION"),
}

In [None]:
# Specify the name of your cluster, task definition, network parameters, and batch job definitions
task_def = "{aws fargate task definition}"
subnet_id = "{aws subnet id}"
security_group = "{aws security group}"
fargate_cluster = "{fargate cluster name}"
job_queue = "{job queue}"
job_def = "{job definition}"
model_storage = "{model storage path}"

train_path1 = "{train path 1}"
train_path2 = "{train path 2}"
test_path = "{test path}"

### Create fargate and batch task builder object

In [None]:
from integrate_ai_sdk.taskgroup.taskbuilder import aws as taskbuilder_aws

In [None]:
tb = taskbuilder_aws.fargate(aws_credentials=aws_creds, cluster=fargate_cluster, task_definition=task_def)

tb_batch = taskbuilder_aws.batch(aws_credentials=aws_creds, job_queue=job_queue, cpu_job_definition=job_def)

### Create task group to run the server and batch and start it
If `ssm_token_key` is not specified, key name is going to be generated by `client`. You have to pass `client` for the system to obtain a session token and store it in SSM. If client is not passed, then the default token key defined in the task definition is going to be used to fetch a JWT token. 

In [None]:
from integrate_ai_sdk.taskgroup.base import SessionTaskGroup

task_group_context = (
    SessionTaskGroup(training_session)
    .add_task(tb.fls(subnet_id, security_group, storage_path=model_storage, client=client))
    .add_task(tb_batch.hfl(train_path=train_path1, test_path=test_path, vcpus="2", memory="16384", client=client))
    .add_task(tb_batch.hfl(train_path=train_path2, test_path=test_path, vcpus="2", memory="16384", client=client))
    .start()
)

### Monitor submitted jobs 

In [None]:
# session available in group context after submission
print(task_group_context.session.id)

In [None]:
# status of tasks submitted
task_group_status = task_group_context.status()
for task_status in task_group_status:
    print(task_status)

In [None]:
# Use to monitor if a session has completed successfully or has failed
# You can modify the time to wait as per your specific task
task_group_context.wait(30)

### Session Complete!
Now you can view the training metrics and start making predictions

In [None]:
training_session.metrics().as_dict()

In [None]:
fig = training_session.metrics().plot()

### Trained model parameters are accessible from the completed session
Model parameters can be retrieved using the model's state_dict method. These parameters can then be saved with torch.save().

In [None]:
import torch

model = training_session.model().as_pytorch()

save_state_dict_folder = "./saved_models"
# PyTorch conventional file type
file_name = f"{training_session.id}.pt"
os.makedirs(save_state_dict_folder, exist_ok=True)
saved_state_dict_path = os.path.join(save_state_dict_folder, file_name)

with open(saved_state_dict_path, "w") as f:
    torch.save(model.state_dict(), saved_state_dict_path)

### Load the saved model
To load a model saved previously, a model object needs to be initialized first. This can be done by directly importing one of the IAI-supported packages (e.g., FFNet) or using the model class defined in a custom package.

In [None]:
from integrate_ai_sdk.packages.GLM.model import GLM

model = GLM(input_size=15, output_activation="sigmoid")

# use torch.load to unpickle the state_dict
target_state_dict = torch.load(saved_state_dict_path)

model.load_state_dict(target_state_dict)

### Load test data

In [None]:
import pandas as pd

test_data = pd.read_parquet(test_path)
test_data.head()

### Convert test data to tensors

In [None]:
Y = torch.tensor(test_data["y"].values)

In [None]:
X = torch.tensor(
    test_data[["x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14"]].values
)

### Run model predictions

In [None]:
preds = model(X)

In [None]:
labels = preds.max(dim=1)[1]
labels