In [2]:
%matplotlib inline

from hydra import initialize, compose
from omegaconf import OmegaConf 

import dr_gen.utils.run as ru
import dr_gen.utils.display as dsp

from dr_gen.analyze.run_group import RunGroup
import dr_gen.analyze.result_plotting as rplt

%load_ext autoreload
%autoreload 2

### Setup Config and Generator

In [3]:
with initialize(config_path="../configs/", version_base=None):
    cfg = compose(
        config_name="config.yaml",
        overrides=[
            "paths=mac",
        ]
    )

In [4]:
generator = ru.set_deterministic(cfg.seed)

In [5]:
print(OmegaConf.to_yaml(OmegaConf.to_container(cfg.paths, resolve=True)))

root: /Users/daniellerothermel/drotherm
proj_dir_name: dr_gen
data: /Users/daniellerothermel/drotherm/data
logs: /Users/daniellerothermel/drotherm/logs
my_data: /Users/daniellerothermel/drotherm/data/dr_gen
my_logs: /Users/daniellerothermel/drotherm/logs/dr_gen
run_dir: /Users/daniellerothermel/drotherm/logs/dr_gen/bs500/lr0.1/wd0.0001/s0/2025-03-25/17-33-1742938432
dataset_cache_root: /Users/daniellerothermel/drotherm/data/cifar10/
agg_results: /Users/daniellerothermel/drotherm/data/dr_gen/cifar10/cluster_runs/lr_wd_init_v0



### Load, Disect and Filter Sweep

In [8]:
rg = RunGroup()
rg.load_runs_from_base_dir(cfg.paths.agg_results)

>> 0 / 1288 files failed parsing
>> Updated hpm sweep info


In [10]:
rg.ignore_runs_by_hpms(epochs=180)

>> Updated hpm sweep info


In [12]:
print(dsp.make_table(*rg.get_swept_table_data()))

+------+------------+
| Key  |   Values   |
+------+------------+
| Init |   random   |
|      | pretrained |
+------+------------+
|  WD  |   4e-05    |
|      |   0.0001   |
|      |  0.00016   |
|      |   1e-05    |
|      |  6.3e-05   |
|      |  0.00025   |
+------+------------+
|  LR  |    0.04    |
|      |    0.1     |
|      |    0.2     |
|      |    0.06    |
|      |    0.01    |
|      |    0.25    |
|      |    0.16    |
+------+------------+


In [13]:
table = dsp.make_table(*rg.get_hpms_sweep_table())
print(">> Current Sweep, Ready to Analyze:")
dsp.print_table(
    table,
    drop_cols=[],
    sort_cols=['Init', 'LR', 'WD'],
    lr=[0.04, 0.06, 0.1, 0.16, 0.25],
)

>> Current Sweep, Ready to Analyze:
+------------+------+---------+-------+
|    Init    |  LR  |    WD   | Count |
+------------+------+---------+-------+
| pretrained | 0.04 |  0.0001 |   20  |
| pretrained | 0.04 | 0.00016 |   20  |
| pretrained | 0.04 | 0.00025 |   20  |
| pretrained | 0.04 |  4e-05  |   20  |
| pretrained | 0.04 | 6.3e-05 |   20  |
| pretrained | 0.06 |  0.0001 |   20  |
| pretrained | 0.06 | 0.00016 |   20  |
| pretrained | 0.06 | 0.00025 |   20  |
| pretrained | 0.06 |  4e-05  |   20  |
| pretrained | 0.06 | 6.3e-05 |   20  |
| pretrained | 0.1  |  0.0001 |  103  |
| pretrained | 0.1  | 0.00016 |   20  |
| pretrained | 0.1  | 0.00025 |   20  |
| pretrained | 0.1  |  4e-05  |   20  |
| pretrained | 0.1  | 6.3e-05 |   20  |
| pretrained | 0.16 |  0.0001 |   20  |
| pretrained | 0.16 | 0.00016 |   20  |
| pretrained | 0.16 | 0.00025 |   20  |
| pretrained | 0.16 |  4e-05  |   20  |
| pretrained | 0.16 | 6.3e-05 |   20  |
| pretrained | 0.25 |  0.0001 |   20  |
| pr

In [16]:
runs_pre = rg.select_run_data_by_hpms(lr=0.1, wd=1e-4, init="pretrained")
for hpm, rlist in runs_pre.items():
    print(f" - {str(hpm):70} | {len(rlist):,} RIDS")

 - model.weights=DEFAULT optim.lr=0.1 optim.weight_decay=0.0001           | 103 RIDS


In [17]:
runs_rand = rg.select_run_data_by_hpms(**{"optim.lr": 0.1, "optim.weight_decay": 1e-4, "init": "random"})
for hpm, rlist in runs.items():
    print(f" - {str(hpm):70} | {len(rlist):,} RIDS")

 - model.weights=None optim.lr=0.1 optim.weight_decay=0.0001              | 99 RIDS


In [None]:
## 

### [Out of Date] Using Result Plotting Utils

In [None]:
rplt.plot_run_splits(
    good_runs,
    all_runs_metrics,
    sweep_info,
    run_ind=0,
    ignore_keys=['optim.step_size', 'epochs'],
    ylim=(75, 100),
)

In [None]:
rplt.plot_split_summaries(
    good_runs,
    all_runs_metrics,
    sweep_info,
    kv_select={
        'epochs': '270',
        'model.weights': 'None',
        'optim.lr': '0.1',
        'optim.weight_decay': '0.0001',
    }, 
    splits=['train', 'val'],
    ignore_keys=["optim.step_size", "epochs"],
    num_seeds=None,
    ylim=(82, 86),
)
    

In [None]:
rplt.plot_split_summaries(
    good_runs,
    all_runs_metrics,
    sweep_info,
    kv_select={
        'epochs': '270',
        'model.weights': 'None',
        'optim.lr': '0.1',
        'optim.weight_decay': '0.0001',
    }, 
    splits=['train', 'val'],
    ignore_keys=["optim.step_size", "epochs"],
    num_seeds=20,
    ylim=(82, 86),
)
    

In [None]:
rplt.plot_combo_histogram(
    good_runs,
    all_runs_metrics,
    sweep_info,
    kv_select={
        'epochs': '270',
        'model.weights': 'None',
        'optim.lr': '0.1',
        'optim.weight_decay': '0.0001',
    }, 
    split='val',
    epoch=110,
    metric='acc1',
    ignore_keys=["optim.step_size", "epochs"],
    num_seeds=None,
    #nbins=10,
    hist_range=(82,86),
    density=True,
)

In [None]:
rplt.plot_combo_histogram_compare(
    good_runs,
    all_runs_metrics,
    sweep_info,
    kv_select={
        'epochs': '270',
        #'model.weights': 'None',
        'optim.lr': '0.1',
        'optim.weight_decay': '0.0001',
    }, 
    split="val",
    epoch=100,
    metric="acc1",
    ignore_keys=["optim.step_size", "epochs"],
    num_seeds=20,
    nbins=10,
    vary_key="model.weights",
)

In [None]:
rplt.ks_stats_plot_cdfs(
    good_runs,
    all_runs_metrics,
    sweep_info,
    kv_select={
        'epochs': '270',
        #'model.weights': 'None',
        'optim.lr': '0.1',
        'optim.weight_decay': '0.0001',
    }, 
    split="val",
    epoch=100,
    metric="acc1",
    ignore_keys=["optim.step_size", "epochs"],
    num_seeds=None,
    vary_key="model.weights",
)

In [None]:
rplt.ks_stats_plot_cdfs(
    good_runs,
    all_runs_metrics,
    sweep_info,
    kv_select={
        'epochs': '270',
        'model.weights': 'None',
        #'optim.lr': '0.1',
        'optim.weight_decay': '0.0001',
    }, 
    split="val",
    epoch=100,
    metric="acc1",
    ignore_keys=["optim.step_size", "epochs"],
    num_seeds=None,
    vary_key="optim.lr",
    vary_vals=[str(v) for v in [0.1, 0.01]],
)

In [None]:
rplt.ks_stat_plot_cdfs_histograms(
    good_runs,
    all_runs_metrics,
    sweep_info,
    kv_select={
        'epochs': '270',
        #'model.weights': 'None',
        'optim.lr': '0.1',
        'optim.weight_decay': '0.0001',
    }, 
    split="val",
    epoch=100,
    metric="acc1",
    ignore_keys=["optim.step_size", "epochs"],
    num_seeds=None,
    vary_key="model.weights",
    nbins=40,
)

In [None]:
rplt.ks_stat_plot_cdfs_histograms(
    good_runs,
    all_runs_metrics,
    sweep_info,
    kv_select={
        'epochs': '270',
        'model.weights': 'None',
        #'optim.lr': '0.1',
        'optim.weight_decay': '0.0001',
    }, 
    split="val",
    epoch=100,
    metric="acc1",
    ignore_keys=["optim.step_size", "epochs"],
    num_seeds=20,
    vary_key="optim.lr",
    vary_vals=[str(lr) for lr in [0.1, 0.01]],
    nbins=40,
)

In [None]:
rplt.ks_stat_plot_cdfs_histograms(
    good_runs,
    all_runs_metrics,
    sweep_info,
    kv_select={
        'epochs': '270',
        'model.weights': 'None',
        #'optim.lr': '0.1',
        'optim.weight_decay': '0.0001',
    }, 
    split="val",
    epoch=100,
    metric="acc1",
    ignore_keys=["optim.step_size", "epochs"],
    num_seeds=20,
    vary_key="optim.lr",
    vary_vals=[str(lr) for lr in [0.1, 0.04]],
    nbins=40,
)