In [209]:
import wandb
from collections import defaultdict
from tqdm.notebook import tqdm
import numpy as np
import pandas as pd
from IPython import embed

In [3]:
api = wandb.Api()

In [340]:
# List of tags to identify the set of sweeps in this analysis

TAGS = ["feats8"]

In [344]:
runs = api.runs("dhdhagar/prob-ent-resolution",
                filters={"tags": {"$in": TAGS}, 
                         "state": {"$eq": "finished"},
                         "config.pairwise_mode": {"$eq": False}
                        },
                order="-summary_metrics.best_dev_b3_f1"
               )
runs_mlp = api.runs("dhdhagar/prob-ent-resolution", 
                filters={"tags": {"$in": TAGS}, 
                         "state": {"$eq": "finished"},
                         "config.pairwise_mode": {"$eq": True}
                        },
                order="-summary_metrics.best_dev_auroc"
               )

In [345]:
len(runs)

2670

In [346]:
len(runs_mlp)

760

In [229]:
methods = {'e2e', 'e2e-nosdp', 'frac', 'frac-nosdp', 'mlp'}
def run_key(run):
    dataset = run._attrs['config']['dataset']
    dataset_seed = run._attrs['config']['dataset_random_seed']
    method = set(run.tags).intersection(methods).pop()
    key = f"{method}_{dataset}_{dataset_seed}"
    return key

In [255]:
best_runs = {}
finished_runs = defaultdict(int)
all_sweeps = {}
for _runs in [runs, runs_mlp]:
    for run in tqdm(_runs):
        if len(dict(run.summary)) < 10:
            continue
        key = run_key(run)
        finished_runs[key] += 1
        if key in best_runs:
            continue
        best_runs[key] = run
        all_sweeps[key] = run.sweepName
print(f"Best runs found for {len(best_runs)} sweeps")
print(f"Total finished runs across sweeps = {sum(finished_runs.values())}")

  0%|          | 0/2670 [00:00<?, ?it/s]
  0%|          | 0/760 [00:00<?, ?it/s]


In [234]:
res_map = {
    'train_time': 'z_run_time',
    'inf_time_hac': 'z_inf_time_hac',
    'inf_time_cc': 'z_inf_time_cc',
    'inf_time_cc-nosdp': 'z_inf_time_cc-nosdp',
    'b3_f1_hac': 'best_test_b3_f1_hac',
    'b3_f1_cc': 'best_test_b3_f1_cc',
    'b3_f1_cc-fixed': 'best_test_b3_f1_cc-fixed',
    'b3_f1_cc-nosdp': 'best_test_b3_f1_cc-nosdp',
    'b3_f1_cc-nosdp-fixed': 'best_test_b3_f1_cc-nosdp-fixed',
    'vmeasure_hac': 'best_test_vmeasure_hac',
    'vmeasure_cc': 'best_test_vmeasure_cc',
    'vmeasure_cc-fixed': 'best_test_vmeasure_cc-fixed',
    'vmeasure_cc-nosdp': 'best_test_vmeasure_cc-nosdp',
    'vmeasure_cc-nosdp-fixed': 'best_test_vmeasure_cc-nosdp-fixed'
}

In [235]:
final = {}
for run_id, run in best_runs.items():
    _key = run_id[:-2]  # Remove the seed
    if _key not in final:
        final[_key] = defaultdict(list)
    res = dict(run.summary)
    for out_key, in_key in res_map.items():
        final[_key][out_key].append(float(res[in_key]))
means, stds, comb = {}, {}, {}
for k in final:
    if k is not means:
        means[k] = {}
        stds[k] = {}
        comb[k] = {}
    for _k in final[k]:
        means[k][_k] = round(np.mean(final[k][_k])*(1 if 'time' in _k else 100), 2)
        stds[k][_k] = round(np.std(final[k][_k])*(1 if 'time' in _k else 100), 2)
        comb[k][_k] = f"{means[k][_k]}±{stds[k][_k]}"

In [236]:
def get_df_by_dataset(res, dataset, to_latex=False):
    new_res = {}
    for _r in res:
        if dataset in _r:
            new_res[_r.replace(f"{dataset}_", '').replace(f"_{dataset}", '')] = res[_r]
    if to_latex:
        print(pd.DataFrame(new_res).T.style.to_latex())
    outdf = pd.DataFrame(new_res).T.sort_index()
    
    def highlight_max(s):
        if s.dtype == object:
            is_max = [False for _ in range(s.shape[0])]
            if '±' in s[0]:
                nums = np.array(list(map(lambda x: float(x.split('±')[0]), s)))
                is_max = nums == nums.max()
        else:
            is_max = s == s.max()
        return ['color: green' if cell else '' for cell in is_max]
    
    if outdf[outdf.keys()[0]].dtype == object:
        return outdf.style.apply(highlight_max), outdf
    return outdf.style.format('{:.2f}').apply(highlight_max), outdf

In [237]:
outdf, _outdf = get_df_by_dataset(comb, 'pubmed')
outdf

Unnamed: 0,train_time,inf_time_hac,inf_time_cc,inf_time_cc-nosdp,b3_f1_hac,b3_f1_cc,b3_f1_cc-fixed,b3_f1_cc-nosdp,b3_f1_cc-nosdp-fixed,vmeasure_hac,vmeasure_cc,vmeasure_cc-fixed,vmeasure_cc-nosdp,vmeasure_cc-nosdp-fixed
e2e,1257.4±483.02,184.2±136.26,28.4±5.99,12.6±2.42,69.78±14.58,78.86±5.5,64.86±22.14,78.76±5.48,64.82±21.0,83.08±8.94,88.67±3.64,80.95±13.39,88.67±3.88,80.62±13.31
e2e-nosdp,234.0±21.57,31.6±13.48,21.4±10.59,11.4±2.06,85.92±3.6,82.7±3.92,81.54±3.99,85.3±3.01,84.06±4.05,93.14±1.91,90.9±2.86,90.1±3.13,92.84±1.72,91.88±2.71
frac,1090.0±209.84,32.2±15.64,25.0±10.39,21.0±19.58,83.4±3.76,86.02±2.98,83.08±3.49,84.08±3.94,83.58±3.57,92.05±1.8,93.25±2.15,91.57±3.47,92.14±2.88,92.03±2.89
frac-nosdp,362.2±85.95,63.8±43.5,31.2±9.54,20.8±5.71,63.2±9.3,77.48±2.54,60.82±20.39,80.2±4.01,60.54±20.27,80.0±5.57,88.09±1.86,75.97±16.18,89.86±1.97,75.72±16.02
mlp,282.4±77.6,34.2±11.77,23.0±9.94,14.4±6.25,86.6±2.79,84.6±4.36,79.3±8.75,85.98±4.31,81.34±10.13,93.56±1.58,92.42±2.31,88.46±6.16,93.23±1.83,89.25±7.22


In [238]:
outdf, _outdf = get_df_by_dataset(comb, 'qian')
outdf

Unnamed: 0,train_time,inf_time_hac,inf_time_cc,inf_time_cc-nosdp,b3_f1_hac,b3_f1_cc,b3_f1_cc-fixed,b3_f1_cc-nosdp,b3_f1_cc-nosdp-fixed,vmeasure_hac,vmeasure_cc,vmeasure_cc-fixed,vmeasure_cc-nosdp,vmeasure_cc-nosdp-fixed
e2e,1234.6±192.11,221.0±294.91,43.8±18.06,36.0±11.85,69.62±7.01,69.4±7.45,67.78±8.25,69.4±7.45,67.78±8.25,84.5±4.63,84.49±4.78,83.62±5.72,84.49±4.78,83.62±5.72
e2e-nosdp,587.0±270.52,77.4±20.88,44.6±32.21,29.0±13.37,71.02±5.2,70.98±5.17,66.5±4.59,72.14±5.95,67.62±4.96,85.12±3.89,84.97±4.26,83.57±3.44,85.52±4.42,84.27±3.37
frac,1299.2±485.59,232.4±163.01,21.8±8.89,13.8±3.06,70.54±6.69,69.12±6.07,59.22±6.66,69.58±6.45,58.46±7.14,84.84±4.35,84.13±3.97,80.89±4.39,84.44±4.17,80.89±4.38
frac-nosdp,347.8±113.89,81.2±18.73,29.0±12.46,21.2±14.85,68.98±5.87,70.98±4.93,56.9±6.42,71.2±5.16,56.48±6.83,83.85±4.09,84.83±3.99,80.56±4.22,85.11±3.86,81.07±3.56
mlp,556.0±87.24,80.2±20.5,42.8±16.27,30.8±5.74,72.3±5.46,54.18±12.54,44.8±15.62,54.22±12.98,44.9±15.85,85.3±4.29,81.56±5.06,79.36±5.65,81.62±5.01,79.56±5.84


In [239]:
outdf, _outdf = get_df_by_dataset(comb, 'arnetminer')
outdf

Unnamed: 0,train_time,inf_time_hac,inf_time_cc,inf_time_cc-nosdp,b3_f1_hac,b3_f1_cc,b3_f1_cc-fixed,b3_f1_cc-nosdp,b3_f1_cc-nosdp-fixed,vmeasure_hac,vmeasure_cc,vmeasure_cc-fixed,vmeasure_cc-nosdp,vmeasure_cc-nosdp-fixed
e2e,1072.6±746.42,104.6±66.95,75.6±72.71,16.8±4.17,64.4±10.62,64.54±10.74,63.58±10.2,64.82±11.07,64.02±10.42,76.71±7.04,76.64±6.9,76.07±6.63,76.73±6.92,76.44±6.99
e2e-nosdp,143.2±13.41,73.4±23.47,69.8±67.15,13.6±2.15,67.34±11.12,65.36±9.74,63.74±8.72,66.64±10.6,63.36±8.6,78.91±7.6,78.09±7.11,77.64±6.9,78.76±7.36,77.83±7.16
frac,1450.2±975.29,103.6±64.36,34.2±21.89,12.0±1.55,63.64±12.64,57.44±14.89,54.1±14.5,57.54±15.3,53.34±13.97,76.95±7.68,76.05±7.1,74.5±6.76,76.06±7.12,74.4±6.56
frac-nosdp,138.4±17.32,201.2±277.46,67.4±75.15,16.8±3.54,64.68±11.87,59.86±11.57,51.68±6.78,60.5±12.08,50.74±6.83,77.77±6.91,77.85±5.42,75.19±4.62,78.25±5.64,74.86±4.81
mlp,310.2±18.89,67.0±23.35,53.4±44.56,11.4±1.02,67.84±9.65,59.54±8.29,58.4±9.72,60.04±8.14,57.88±9.5,80.52±5.95,77.52±5.79,76.06±4.76,78.07±5.71,76.01±4.9


In [327]:
# Add agents to sweeps with few completed runs

def add_agents(max_run_count_filter, already_added, sweep_finished_count, sweep_ids, n_agents=2, gpu="gypsum-1080ti"):
    for k, v in sweep_finished_count.items():
        if k in already_added:
            continue
        if v < max_run_count_filter:
            model, dataset, seed = k.split('_')
            sweep_id = sweep_ids[k]
            n_agents = n_agents
            gpu_name = gpu
            print(f"./add_agent.sh {dataset} {seed} {model} dhdhagar/prob-ent-resolution/{sweep_id} {n_agents} {gpu_name}")
            !./add_agent.sh $dataset $seed $model dhdhagar/prob-ent-resolution/$sweep_id $n_agents $gpu_name
            already_added.add(k)

In [328]:
def add_agents_by_list(keys_to_add, already_added, sweep_ids, n_agents=2, gpu="gypsum-1080ti"):
    for k in keys_to_add:
        model, dataset, seed = k.split('_')
        sweep_id = sweep_ids[k]
        n_agents = n_agents
        gpu_name = gpu
        print(f"./add_agent.sh {dataset} {seed} {model} dhdhagar/prob-ent-resolution/{sweep_id} {n_agents} {gpu_name}")
        !./add_agent.sh $dataset $seed $model dhdhagar/prob-ent-resolution/$sweep_id $n_agents $gpu_name
        already_added.add(k)

In [347]:
def resubmit_pending_agents(already_added, sweep_ids, n_agents=2, gpu="gypsum-1080ti"):
    pending = !sacct --format="JobID,JobName%50,Partition,State" | grep PENDING
    _pending = set()
    _pending_job_ids = []
    for pen in pending:
        pen_split = pen.split()
        _pending_job_ids.append(pen_split[0])
        model, dataset, seed, _ = pen_split[1].split('_')
        seed = seed.split('-')[0][-1]
        _pending.add(f"{model}_{dataset}_{seed}")
    print(f"PENDING sweeps: {_pending}")
    if len(_pending) > 0:
        _pending_job_ids = ' '.join(_pending_job_ids)

        print(f"!scancel {_pending_job_ids}")
        !scancel $_pending_job_ids

        add_agents_by_list(keys_to_add=_pending, already_added=already_added, 
                           sweep_ids=sweep_ids, n_agents=n_agents, gpu=gpu)

In [282]:
agents_added = set()

In [284]:
add_agents(max_run_count_filter=20, already_added=agents_added, 
           sweep_finished_count=finished_runs, sweep_ids=all_sweeps, 
           n_agents=2, gpu="gypsum-1080ti")

In [None]:
add_agents(max_run_count_filter=30, already_added=agents_added, 
           sweep_finished_count=finished_runs, sweep_ids=all_sweeps, 
           n_agents=2, gpu="gypsum-2080ti")

In [None]:
add_agents(max_run_count_filter=40, already_added=agents_added, 
           sweep_finished_count=finished_runs, sweep_ids=all_sweeps, 
           n_agents=2, gpu="gypsum-titanx")

In [None]:
add_agents(max_run_count_filter=50, already_added=agents_added, 
           sweep_finished_count=finished_runs, sweep_ids=all_sweeps, 
           n_agents=2, gpu="gpu")

In [331]:
resubmit_pending_agents(already_added=agents_added, sweep_ids=all_sweeps,
                 n_agents=2, gpu="gypsum-titanx")

PENDING sweeps: {'mlp_qian_2', 'mlp_qian_3'}
!scancel 6725814 6725815 6725816 6725817
./add_agent.sh qian 2 mlp dhdhagar/prob-ent-resolution/gcteakms 2 gypsum-titanx
Submitted batch job 6725996
    Logs: jobs/mlp_qian_sweep2-1_1680906954.err
Submitted batch job 6725997
    Logs: jobs/mlp_qian_sweep2-2_1680906954.err
./add_agent.sh qian 3 mlp dhdhagar/prob-ent-resolution/6j25xh4w 2 gypsum-titanx
Submitted batch job 6725998
    Logs: jobs/mlp_qian_sweep3-1_1680906954.err
Submitted batch job 6725999
    Logs: jobs/mlp_qian_sweep3-2_1680906954.err


In [348]:
resubmit_pending_agents(already_added=agents_added, sweep_ids=all_sweeps,
                 n_agents=2, gpu="gypsum-titanx")

PENDING sweeps: set()
