# Running a Federated Cycle with Synergos
This notebook shows how to use Synergos to run on your local machine a simulated federated cycle through three phases: Connect, Train, Evaluate, for the [one hundred plant species leaf dataset](https://archive.ics.uci.edu/ml/datasets/One-hundred+plant+species+leaves+data+set).

### About the dataset:
* 1600 instances of leaves across 16 classes
* Features: shape, texture and margin, each represented as a 64-element vector
* This demo will use texture data from the data_Tex_64.txt file to perform a classification task

### Prerequisites:
Before running this notebook, you should have:
* Built the required docker images for TTP and Worker
* Installed Synergos in your (virtual) environment

## 1. Data preprocessing

In [None]:
import json
import os

import pandas as pd
import numpy as np
import sklearn.model_selection

# transform original data from data_Tex_64.txt, into format:
#     /TARGET_DATASET_DIR # contains the datasets in this demo ready to be distributed/mounted to the worker containers
#         /data[n] # for each worker participating
#             /train
#                 xxx.csv
#                 metadata.json
#                 schema.json
#             /evaluate
#                 yyy.csv
#                 metadata.json
#                 schema.json
#             /predict
#                 zzz.csv
#                 metadata.json
#                 schema.json

ORIGINAL_DATA_FILE = "dataset/data_Tex_64.txt"   # path to the downloaded texture data file 
TARGET_DATASET_DIR = "leaf_textures"
NUM_WORKERS = 2

os.makedirs(TARGET_DATASET_DIR, exist_ok=True)
    
# Transform the csv according to the required format
# Shape and margin can be included by merging into one dataframe, if 
def transform(filepath):
    df = pd.read_csv(filepath, header=None)
    df.rename(columns={0:'target'}, inplace=True) # rename the target column
    return df

df = transform(ORIGINAL_DATA_FILE)


In [None]:
# Convert string labels to numeric as only numeric labels are supported
mapping = {}
n = 0
for x in df['target'].unique():
    mapping[x] = str(n)
    n = n+1
df.replace(to_replace=mapping, inplace=True)

# save the mapping for future reference
with open('target_mapping.json', 'w', encoding='utf-8') as f:
    json.dump(mapping, f, indent=4)

In [None]:
# Check for NAs
print("NAs:", df[df.isna().any(axis=1)])

# Convert int to float
dfs = df.select_dtypes(include="int64")
df = df.astype({c: 'float64' for c in dfs.columns})

In [None]:
# Shuffle and create train/evaluate/predict splits

train_proportion = 0.6
evaluate_proportion = 0.2

df = sklearn.utils.shuffle(df)

train_data, evaluate_data, predict_data = np.split(df, [int(train_proportion*len(df)), int((train_proportion+evaluate_proportion)*len(df))])

data_dict = {
    "train": train_data,
    "evaluate": evaluate_data,
    "predict": predict_data
}

## 2. Create folder structure and save data and metadata files

In [None]:
# Create schema.json and metadata.json files
schema = {}
for col_name in df.columns:
    if col_name == "target":
        schema[col_name] = 'category'
    else:
        schema[str(col_name)] = str(df[col_name].dtype)

metadata = {
    "datatype": "tabular",
    "operations": {}
}   

In [None]:
# For train, evaluate and predict data, create worker splits 
# and write to each worker's directories
worker_splits = np.array_split(train_data, NUM_WORKERS)

for i in range(NUM_WORKERS):

    worker_data_dir = TARGET_DATASET_DIR + "/data" + str(i+1)
    for subcategory in ["train", "evaluate", "predict"]:
        worker_subdir = worker_data_dir + "/" + subcategory
        os.makedirs(worker_subdir, exist_ok=True)
        
        # Retrieve data from data_dict and get split for this worker
        current_data = np.array_split(data_dict[subcategory], NUM_WORKERS)[i]
        current_data.to_csv(worker_subdir + '/' + subcategory + '.csv', index=False) # For example: data1/predict/predict.csv
            
        with open(worker_subdir + '/schema.json', 'w', encoding='utf-8') as f:
            json.dump(schema, f, indent=4)
        with open(worker_subdir + '/metadata.json', 'w', encoding='utf-8') as f:
            json.dump(metadata, f, indent=4)

## 3. Set up TTP and worker nodes

#### Running on local machine with local participants:

Execute these commands from within your working project directory, in separate terminals

````
docker run -v "$(pwd)"/leaf_textures/data1:/worker/data -v "$(pwd)"/demo_outputs/worker1:/worker/outputs --name worker_1 worker:pysyft_demo

docker run -v "$(pwd)"/leaf_textures/data2:/worker/data -v "$(pwd)"/demo_outputs/worker2:/worker/outputs --name worker_2 worker:pysyft_demo

docker run -p 5000:5000 -p 5678:5678 -p 8020:8020 -p 8080:8080 -v "$(pwd)"/demo_outputs:/ttp/mlflow -v "$(pwd)"/leaf_textures/ttp_data:/ttp/data -v "$(pwd)"/demo_outputs/ttp:/ttp/outputs --name ttp --link worker_1 --link worker_2 ttp:pysyft_demo
````


## 4. Start project

In [None]:
from synergos import Driver

host = "0.0.0.0"    # IP and port of TTP service
port = 5000

driver = Driver(host=host, port=port)

############################################################
# Phase 1: CONNECT - Submitting TTP & Participant metadata #
############################################################

# 1A. TTP controller creates a project

driver.projects.create(
    project_id="test_project",
    action="classify",     # either regress or classify
    incentives={
        'tier_1': [],
        'tier_2': [],
        'tier_3': []
    }
)


# 1B. TTP controller creates an experiment

# Define a simple two layer neural net
driver.experiments.create(
    project_id="test_project",
    expt_id="test_experiment",
    model=[
        {
            "activation": "sigmoid",
            "is_input": True,
            "l_type": "Linear",
            "structure": {
                "bias": True,
                "in_features": 64,
                "out_features": 32
            }
        },
        {
            "activation": "softmax",
            "is_input": False,
            "l_type": "Linear",
            "structure": {
                "bias": True,
                "in_features": 32,
                "out_features": 16
            }
        },
    ]
)


# 1C. TTP controller creates a run

driver.runs.create(
    project_id="test_project",
    expt_id="test_experiment",
    run_id="test_run",
    rounds=2,
    epochs=1,
    base_lr=0.0005,
    max_lr=0.005,
    criterion="NLLLoss"
)


# 1D. Participants registers their servers' configurations

driver.participants.create(
    participant_id="test_participant_1",
    host='172.17.0.2',       # IP of the worker container
    port=8020,
    f_port=5000,
    log_msgs=True,
    verbose=True
)

driver.participants.create(
    participant_id="test_participant_2",
    host='172.17.0.3',
    port=8020,
    f_port=5000,
    log_msgs=True,
    verbose=True
)


# 1E. Participants registers their role in a specific project
# Roles: Host: contribute data # Guest: has validation set, may also contribute data.

driver.registrations.create(
    project_id="test_project",
    participant_id="test_participant_1",
    role="guest"              
)

driver.registrations.create(
    project_id="test_project",
    participant_id="test_participant_2",
    role="host"
)


# 1F. Participants registers their tags for a specific project

driver.tags.create(
    project_id="test_project",
    participant_id="test_participant_1",
    train=[["train"]],
    evaluate=[["evaluate"]],
)

driver.tags.create(
    project_id="test_project",
    participant_id="test_participant_2",
    train=[["train"]],
    evaluate=[["evaluate"]]
)




In [None]:
#######################################################
# Phase 2: TRAIN - Alignment, Training & Optimisation #
#######################################################

# 2A. Perform multiple feature alignment to dynamically configure datasets and models for cross-grid compatibility

driver.alignments.create(project_id="test_project")

In [None]:
# 2B. Trigger training across the federated grid

model_resp = driver.models.create(
    project_id="test_project",
    expt_id="test_experiment",
    run_id="test_run"
)

In [None]:
################################################
# Phase 3: EVALUATE - Validation & Predictions #
################################################

# 3A. Perform validation(s) of combination(s)

driver.validations.create(
    project_id="test_project",
    expt_id="test_experiment",
    run_id="test_run"
)

In [None]:

# 3B. Perform prediction(s) of combination(s)

driver.predictions.create(
    tags={"test_project": [["predict"]]},
    participant_id="test_participant_1",
    project_id="test_project",
    expt_id="test_experiment",
    run_id="test_run"
)

In [None]:
# 3B. Perform prediction(s) of combination(s)

driver.predictions.create(
    tags={"test_project": [["predict"]]},
    participant_id="test_participant_2",
    project_id="test_project",
    expt_id="test_experiment",
    run_id="test_run"
)