In [1]:
import subprocess
import toml

In [2]:
def run_training():
    gateway = subprocess.Popen(['./hypha-gateway', 'run', '-c', 'gateway.toml'], stdout=subprocess.DEVNULL)
    w1 = subprocess.Popen(['./hypha-worker', 'run', '-c', 'worker-a-gcn.toml'], stdout=subprocess.DEVNULL)
    w2 = subprocess.Popen(['./hypha-worker', 'run', '-c', 'worker-b.toml'], stdout=subprocess.DEVNULL)
    ps = subprocess.Popen(['./hypha-worker', 'run', '-c', 'worker-c-gcn.toml'], stdout=subprocess.DEVNULL)
    scheduler = subprocess.Popen(['./hypha-scheduler', 'run', '-c', 'testing_scheduler_gcn.toml'], stdout=subprocess.DEVNULL)
    
    scheduler.wait()
    ps.terminate()
    w2.terminate()
    w1.terminate()
    gateway.terminate()
    ps.wait()
    w2.wait()
    w1.wait()
    gateway.wait()

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

def generate_plot(file, peer_ids, agg_window=20):
    metrics =  pd.read_csv(file, delimiter=",")
    eval_acc = []
    test_acc = []
    
    for peer in peer_ids:
        data = metrics[metrics.peer_id == peer].reset_index()
        eval_acc.append(data["valid_acc"].to_numpy()[-1])
        test_acc.append(data["test_acc"].to_numpy()[-1])
        
        plt.plot(data.loss[1:].rolling(agg_window).mean(), label=peer)
    plt.legend()
    plt.show()
    print(f"LR: {file.split("_")[2]} Momentum: {file.split("_")[3]} Update samples {file.split("_")[4]}")
    print(f"Test Acc: {np.mean(test_acc)}, Validation Acc: {np.mean(eval_acc)}")
    return np.mean(test_acc), np.mean(eval_acc)

peer_ids = [
    "12D3KooWQ6WxZdg2BNFMgy8imjZcXtbA4P4F43SzGNtKUUd4ivgQ",
    "12D3KooWRrp64o43d3CQovjUT52ojtNUTtkUpJ4wSd9CfyZjkRqp"
]

In [31]:
outer_learning_rate = [1., .7, .4, .1]
outer_momentum = [.9, .6, .3, 0.0]
avg_samples_between_updates = [2, 4, 8, 16]

In [None]:
scheduler_toml = "testing_scheduler_gcn.toml"

for lr in outer_learning_rate:
    for m in outer_momentum:
        for s in avg_samples_between_updates:
            metrics_file = f"metrics_gcn_{lr}_{m}_{s}.csv"
            content = toml.load(scheduler_toml)
            content["scheduler"]["job"]["metrics"][0]["path"] = metrics_file
            content["scheduler"]["job"]["rounds"]["avg_samples_between_updates"] = s
            content["scheduler"]["job"]["rounds"]["update_rounds"] = int(500/s)
            content["scheduler"]["job"]["outer_optimizer"]["learning-rate"] = lr
            content["scheduler"]["job"]["outer_optimizer"]["momentum"] = m
            f = open(scheduler_toml,'w')
            toml.dump(content, f)
            f.close()
            run_training()
            generate_plot(metrics_file, peer_ids, 1)

In [None]:
values = {}
for lr in outer_learning_rate:
    for m in outer_momentum:
        for s in avg_samples_between_updates:
            if lr == 1.0 and m == 0.9:
                continue
            metrics_file = f"metrics_gcn_{lr}_{m}_{s}.csv"
            test_acc, valid_acc = generate_plot(metrics_file, peer_ids, 1)
            values[(lr,m,s)] = (test_acc, valid_acc)

In [59]:
min_key_test = next(iter(values))
min_val_test = next(iter(values.values()))[0]
min_key_valid = next(iter(values))
min_val_valid = next(iter(values.values()))[1]

In [60]:
for k,v in values.items():
    if v[0] > min_val_test:
        min_key_test = k
        min_val_test = v[0]
    if v[1] > min_val_valid:
        min_key_valid = k
        min_val_valid = v[1]

In [61]:
print(min_key_test, min_val_test)
print(min_key_valid, min_val_valid)

(1.0, 0.3, 16) 0.70652015
(0.7, 0.6, 8) 0.71547703
