In [3]:
import wandb
from collections import defaultdict
from tqdm.notebook import tqdm
import numpy as np
import pandas as pd
from IPython import embed

In [4]:
api = wandb.Api()

# Get best runs from sweeps

In [6]:
# List of tags to identify the set of sweeps in this analysis

IN_TAGS = ["feats8-noise1", "feats8-noise2", "feats8-noise3"]

runs = api.runs("dhdhagar/prob-ent-resolution",
                filters={"tags": {"$in": IN_TAGS}, 
                         "state": {"$eq": "finished"},
                         "config.pairwise_mode": {"$eq": False}
                        },
                order="-summary_metrics.best_dev_b3_f1"
               )
print(f"Non-MLP runs: {len(runs)}")
runs_mlp = api.runs("dhdhagar/prob-ent-resolution", 
                filters={"tags": {"$in": IN_TAGS}, 
                         "state": {"$eq": "finished"},
                         "config.pairwise_mode": {"$eq": True}
                        },
                order="-summary_metrics.best_dev_auroc"
               )
print(f"MLP runs: {len(runs_mlp)}")
print(f"Total runs: {len(runs) + len(runs_mlp)}")

methods = {'e2e', 'e2e-nosdp', 'frac', 'frac-nosdp', 'mlp'}

def make_key(noise, model, dataset, seed):
    return f"noise{noise}_{model}_{dataset}_{seed}"
def run_key(run):
    dataset = run._attrs['config']['dataset']
    dataset_seed = run._attrs['config']['dataset_random_seed']
    method = set(run.tags).intersection(methods).pop()
    noise = run._attrs['config']['noise_std']
    key = make_key(noise, method, dataset, dataset_seed)
    return key
def details_from_key(key):
    noise, model, dataset, seed = key.split('_')
    noise = noise[-1]
    return noise, model, dataset, seed

best_runs = {}
finished_runs = defaultdict(int)
sweep_ids = {}
for _runs in [runs, runs_mlp]:
    for run in tqdm(_runs):
        if len(dict(run.summary)) < 10:
            continue
        key = run_key(run)
        finished_runs[key] += 1
        if key in best_runs:
            continue
        best_runs[key] = run
        sweep_ids[key] = run.sweepName
        
print(f"Best runs found for {len(best_runs)} sweeps")
print(f"Total finished runs across sweeps = {sum(finished_runs.values())}")

Non-MLP runs: 131
MLP runs: 46
Total runs: 177


  0%|          | 0/131 [00:00<?, ?it/s]

  0%|          | 0/46 [00:00<?, ?it/s]

Best runs found for 111 sweeps
Total finished runs across sweeps = 186


# Manage sweeps

In [15]:
_SWEEP_PREFIX="feats8-noise"
_SWEEP_FEATS8_FLAGS = "--keep_feat_idxs=0 --keep_feat_idxs=1 --keep_feat_idxs=2 \
    --keep_feat_idxs=3 --keep_feat_idxs=4 --keep_feat_idxs=5 \
    --keep_feat_idxs=14 --keep_feat_idxs=15"

# Add agents to sweeps with fewer completed runs

def add_agents(max_run_count, launched, finished_runs, sweep_ids, n_agents=1, partition="cpu"):
    for k, v in finished_runs.items():
        if k in launched:
            continue
        if v < max_run_count:
            noise, model, dataset, seed = details_from_key(k)
            sweep_id = sweep_ids[k]
            SWEEP_FLAGS = f"{_SWEEP_FEATS8_FLAGS} --noise_std={noise}"
            SWEEP_PREFIX = f"{_SWEEP_PREFIX}{noise}"
            print(f'./add_agent.sh {dataset} {seed} {model} dhdhagar/prob-ent-resolution/{sweep_id} {n_agents} {partition} "{SWEEP_FLAGS}" {SWEEP_PREFIX}')
            !./add_agent.sh $dataset $seed $model dhdhagar/prob-ent-resolution/$sweep_id $n_agents $partition "$SWEEP_FLAGS" $SWEEP_PREFIX
            launched.add(k)

def add_agents_by_list(keys_to_add, launched, sweep_ids, n_agents=1, partition="cpu"):
    for k in keys_to_add:
        noise, model, dataset, seed = details_from_key(k)
        sweep_id = sweep_ids[k]
        SWEEP_FLAGS = f"{_SWEEP_FEATS8_FLAGS} --noise_std={noise}"
        SWEEP_PREFIX = f"{_SWEEP_PREFIX}{noise}"
        print(f'./add_agent.sh {dataset} {seed} {model} dhdhagar/prob-ent-resolution/{sweep_id} {n_agents} {partition} "{SWEEP_FLAGS}" {SWEEP_PREFIX}')
        !./add_agent.sh $dataset $seed $model dhdhagar/prob-ent-resolution/$sweep_id $n_agents $partition "$SWEEP_FLAGS" $SWEEP_PREFIX
        launched.add(k)

def add_sweeps_by_list(keys_to_add, launched, partition="cpu"):
    for k in keys_to_add:
        noise, model, dataset, seed = details_from_key(k)
        SWEEP_FLAGS = f"{_SWEEP_FEATS8_FLAGS} --noise_std={noise}"
        SWEEP_PREFIX = f"{_SWEEP_PREFIX}{noise}"
        print(f'./run_sweep.sh {dataset} {seed} {seed} {model} {partition} "{SWEEP_FLAGS}" {SWEEP_PREFIX}')
        !./run_sweep.sh $dataset $seed $seed $model $partition "$SWEEP_FLAGS" $SWEEP_PREFIX
        launched.add(k)

def resubmit_pending_agents(launched, sweep_ids, n_agents=1, partition="cpu"):
    pending = !sacct --format="JobID,JobName%50,Partition,State" | grep PENDING
    print('\n'.join(pending))
    _pending_agents = set()
    _pending_sweep_job_ids = []
    for pen in pending:
        pen_split = pen.split()
        SWEEP_PREFIX, model, dataset, seed, _ = pen_split[1].split('_')
        assert _SWEEP_PREFIX in SWEEP_PREFIX
        noise = SWEEP_PREFIX[-1]
        seed_split = seed.split('-')  # remove "agentX"
        seed = seed_split[0][-1]
        if len(seed_split) > 1:
            # Sweep agent job is pending
            _pending_agents.add(make_key(noise, model, dataset, seed))
            _pending_sweep_job_ids.append(pen_split[0])
    print(f"PENDING agents: {_pending_agents}\n")
    if len(_pending_agents) > 0:
        # Cancel pending
        _pending_sweep_job_ids = ' '.join(_pending_sweep_job_ids)
        print(f"!scancel {_pending_sweep_job_ids}")
        !scancel $_pending_sweep_job_ids
        # Relaunch
        add_agents_by_list(keys_to_add=_pending_agents, launched=launched,
                           sweep_ids=sweep_ids, n_agents=n_agents, partition=partition)

def resubmit_pending_sweeps(launched, partition="cpu"):
    pending = !sacct --format="JobID,JobName%50,Partition,State" | grep PENDING
    print('\n'.join(pending))
    _pending_agents = set()
    _pending_sweep_job_ids = []
    for pen in pending:
        pen_split = pen.split()
        SWEEP_PREFIX, model, dataset, seed, _ = pen_split[1].split('_')
        assert _SWEEP_PREFIX in SWEEP_PREFIX
        noise = SWEEP_PREFIX[-1]
        seed_split = seed.split('-')  # remove "agentX"
        seed = seed_split[0][-1]
        if len(seed_split) == 1:
            # Sweep init job is pending
            _pending_agents.add(make_key(noise, model, dataset, seed))
            _pending_sweep_job_ids.append(pen_split[0])
    print(f"PENDING agents: {_pending_agents}\n")
    if len(_pending_agents) > 0:
        # Cancel pending
        _pending_sweep_job_ids = ' '.join(_pending_sweep_job_ids)
        print(f"!scancel {_pending_sweep_job_ids}")
        !scancel $_pending_sweep_job_ids
        # Relaunch
        add_sweeps_by_list(keys_to_add=_pending_agents, launched=launched, partition=partition)

In [36]:
launched = set()

In [None]:
resubmit_pending_sweeps(launched)

In [None]:
add_agents(max_run_count=60, launched=launched, finished_runs=finished_runs, sweep_ids=sweep_ids, n_agents=2)

In [None]:
resubmit_pending_agents(launched=launched, sweep_ids=sweep_ids, n_agents=2)

# Analyze results

In [None]:
def get_result_dfs(best_runs):
    res_map = {
        'train_time': 'z_run_time',
        'inf_time_hac': 'z_inf_time_hac',
        'inf_time_cc': 'z_inf_time_cc',
        'inf_time_cc-nosdp': 'z_inf_time_cc-nosdp',
        'b3_f1_hac': 'best_test_b3_f1_hac',
        'b3_f1_cc': 'best_test_b3_f1_cc',
        'b3_f1_cc-fixed': 'best_test_b3_f1_cc-fixed',
        'b3_f1_cc-nosdp': 'best_test_b3_f1_cc-nosdp',
        'b3_f1_cc-nosdp-fixed': 'best_test_b3_f1_cc-nosdp-fixed',
        'vmeasure_hac': 'best_test_vmeasure_hac',
        'vmeasure_cc': 'best_test_vmeasure_cc',
        'vmeasure_cc-fixed': 'best_test_vmeasure_cc-fixed',
        'vmeasure_cc-nosdp': 'best_test_vmeasure_cc-nosdp',
        'vmeasure_cc-nosdp-fixed': 'best_test_vmeasure_cc-nosdp-fixed'
    }
    final = {}
    for run_id, run in best_runs.items():
        _key = run_id[:-2]  # Remove the seed
        if _key not in final:
            final[_key] = defaultdict(list)
        res = dict(run.summary)
        for out_key, in_key in res_map.items():
            final[_key][out_key].append(float(res[in_key]))
    means, stds, comb = {}, {}, {}
    for k in final:
        if k is not means:
            means[k] = {}
            stds[k] = {}
            comb[k] = {}
        for _k in final[k]:
            means[k][_k] = round(np.mean(final[k][_k])*(1 if 'time' in _k else 100), 2)
            stds[k][_k] = round(np.std(final[k][_k])*(1 if 'time' in _k else 100), 2)
            comb[k][_k] = f"{means[k][_k]}±{stds[k][_k]}"
    return means, stds, comb



def get_df_by_dataset(res, dataset, noise, to_latex=False):
    new_res = {}
    for _r in res:
        if dataset in _r and f'noise{noise}' in _r:
            _new_r = _r.replace(f"{dataset}_", '').replace(f"_{dataset}", '')
            _new_r = _new_r.replace(f"_noise{noise}", '').replace(f"noise{noise}_", '')
            new_res[_new_r] = res[_r]
    if to_latex:
        print(pd.DataFrame(new_res).T.style.to_latex())
    outdf = pd.DataFrame(new_res).T.sort_index()
    def highlight_max(s):
        if s.dtype == object:
            is_max = [False for _ in range(s.shape[0])]
            if '±' in s[0]:
                nums = np.array(list(map(lambda x: float(x.split('±')[0]), s)))
                is_max = nums == nums.max()
        else:
            is_max = s == s.max()
        return ['color: green' if cell else '' for cell in is_max]
    
    if outdf[outdf.keys()[0]].dtype == object:
        return outdf.style.apply(highlight_max), outdf
    return outdf.style.format('{:.2f}').apply(highlight_max), outdf

In [None]:
means, stds, comb = get_result_dfs(best_runs)

In [None]:
outdf, _outdf = get_df_by_dataset(res=comb, dataset='pubmed', noise=1)
outdf