In [1]:
import pandas as pd
import torch
from modelV2.tune import HyperParamOptimizer
from modelV2.train import tuning_wrapper
# from modelV2.arch.rn_2 import ResNet18
from torchvision.transforms import v2
import os
import re
os.environ["CUDA_VISIBLE_DEVICES"] = "6"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def extract_masked_path(target_mask_factor: str, mask_factors_str: str, masked_paths_str: str):
    try:
        # parse the mask factors/masked path strings
        mask_factors_list = parse_mask_factors(mask_factors_str)
        mask_paths_list = parse_masked_paths(masked_paths_str)

        # find the index of the target mask factor
        target_idx = mask_factors_list.index(target_mask_factor)

        # extract the target masked path
        return mask_paths_list[target_idx]
    
    except Exception as e:
        print(e)
        return 'CAUGHT_ERROR'

def parse_mask_factors(mask_factors_str: str):
    # remove brackets and spaces from the string
    mask_factors_str = mask_factors_str.translate({ord(c): None for c in "][ "})
    # the split on commas
    return mask_factors_str.split(',')

def parse_masked_paths(masked_paths_str: str):
    # split on dollar characters (pls dont have any of these in your paths!!)
    return masked_paths_str.split('$')

In [3]:
SEED = 49713
BASE_DIR = "/mnt/NAS3/DataBalance/balance-study/data/images/run_e1_a_1200_800/"
MASK_STR = "0_0"
BALANCE_DISTRIBUTIONS = False

N_BOOTSTRAPS = 5
EPOCHS_PER_RUN = 20
MONITOR_METRIC = 'loss'
HPARAM_RANGE_DICT = {
    'learning_rate': (0.00001, 0.01),
}


BATCH_SIZE = 48
N_RANDOM = 5
N_GUIDED = 10
MODEL_NAME = f"masked_{MASK_STR}_rn18_r3_a"

In [4]:
pos_df = pd.read_csv(os.path.join(BASE_DIR, "pos_out_df.csv"))
neg_df = pd.read_csv(os.path.join(BASE_DIR, "neg_proc_df.csv"))

# only include exams we've assigned to the tuning set
# this is done so evaluation class balance is consistent 
# between train/val/test sets
# 10/90 balance testing can be done after tuning is completed
neg_df = neg_df[neg_df["include_in_tuning"] == 1]

pos_df.dropna(subset=['masked_factors', 'masked_png_paths'], inplace=True)
neg_df.dropna(subset=['masked_factors', 'masked_png_paths'], inplace=True)
print(pos_df[['assigned_split']].value_counts())
print(neg_df[['assigned_split']].value_counts())

def correct_paths_to_kraken(hiti_path):
    replace_str = r"/mnt/NAS3/DataBalance/balance-study\1"
    # use re.sub to replace everything before '/data/images'
    return re.sub(r'^.*?(/data/images)', replace_str, hiti_path)

pos_df['__target_path'] = pos_df.apply(
    lambda x: correct_paths_to_kraken(extract_masked_path(
        MASK_STR.replace('_', '.'), 
        x.masked_factors, 
        x.masked_png_paths
    )), 
    axis=1
)

neg_df['__target_path'] = neg_df.apply(
    lambda x: correct_paths_to_kraken(extract_masked_path(
        MASK_STR.replace('_', '.'), 
        x.masked_factors, 
        x.masked_png_paths
    )), 
    axis=1
)

DF_POOL_DICT = {
    "pos": {
        "train": pos_df[pos_df.assigned_split == "train"],
        "val": pos_df[pos_df.assigned_split == "val"],
        "test": pos_df[pos_df.assigned_split == "test"]
    },
    "neg": {
        "train": neg_df[neg_df.assigned_split == "train"],
        "val": neg_df[neg_df.assigned_split == "val"],
        "test": neg_df[neg_df.assigned_split == "test"]
    }
}

assigned_split
train             4485
test               986
val                955
Name: count, dtype: int64
assigned_split
train             45232
val                9743
test                991
Name: count, dtype: int64


In [5]:
AUGMENT_DICT = {
    'crop': {
        'enabled': True, 
        'func': v2.RandomResizedCrop,
        'prob': 1.0,
        'params': {
            'size': (1200, 800), 
            'scale': (0.7, 1.0),
        }
    },
    'rotation': { 
        'enabled': True, 
        'func': v2.RandomRotation,
        'prob': 0.4,
        'params': {
            'degrees': 5
        }
    },
    'color_jitter': {
        'enabled': True,
        'func': v2.ColorJitter,
        'prob': 0.2,
        'params': {
            'brightness': 0.4, 
            'contrast': 0.4,
            'saturation': 0.4, 
            'hue': 0.2
        }
    },
    'gaussian_blur': {
        'enabled': True,
        'func': v2.GaussianBlur,
        'prob': 0.2,
        'params': {
            'kernel_size': 3
        }
    }
}

In [6]:
# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"using {device} device...")

using cuda device...


In [7]:
# get optimizer
hparam_optimizer = HyperParamOptimizer(
    hparam_range_dict=HPARAM_RANGE_DICT, 
    balance=BALANCE_DISTRIBUTIONS,
    monitor_metric=MONITOR_METRIC,
    n_bootstraps=N_BOOTSTRAPS,
    epochs_per_run=EPOCHS_PER_RUN,
    seed=SEED
)

# set device and model
hparam_optimizer.set_model(
    device=device, 
    model_type='resnet18'
)

# load data into optimizer
hparam_optimizer.load_data(
    df_pool_dict=DF_POOL_DICT,
    batch_size=BATCH_SIZE,
    augment_dict=AUGMENT_DICT
)

# start optimizer
hparam_optimizer.optimize(
    objective=tuning_wrapper,
    n_random=N_RANDOM, 
    n_guided=N_GUIDED, 
    model_name=MODEL_NAME
)


Data loaded...

Optimizing model ------------------------------------------------------------
Logging results to './logs/masked_0_0_rn18_r3_a_opt.json'
|   iter    |  target   | learni... |
-------------------------------------
running 5 bootstraps...
with seeds: [4266198 9960322 3226871 5341026  569431]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pos_df.loc[:, '__class'] = 1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  neg_df.loc[:, '__class'] = 0



Epoch 0 ----------------------------------------


100%|██████████| 131/131 [01:20<00:00,  1.63batch/s, train_loss=0.692, train_acc=0.523, train_auc=0.555, train_f1=0.656, train_prec=0.513, train_rec=0.911]
100%|██████████| 28/28 [00:18<00:00,  1.51batch/s, val_loss=0.691, val_acc=0.532, val_auc=0.56, val_f1=0.659, val_prec=0.518, val_rec=0.906]


saving model with best val_loss '0.6912'...

Epoch 1 ----------------------------------------


100%|██████████| 131/131 [01:24<00:00,  1.56batch/s, train_loss=0.691, train_acc=0.54, train_auc=0.558, train_f1=0.643, train_prec=0.526, train_rec=0.829] 
100%|██████████| 28/28 [00:18<00:00,  1.50batch/s, val_loss=0.692, val_acc=0.519, val_auc=0.526, val_f1=0.629, val_prec=0.512, val_rec=0.816]



Epoch 2 ----------------------------------------


100%|██████████| 131/131 [01:23<00:00,  1.57batch/s, train_loss=0.691, train_acc=0.546, train_auc=0.558, train_f1=0.627, train_prec=0.532, train_rec=0.761]
100%|██████████| 28/28 [00:19<00:00,  1.43batch/s, val_loss=0.691, val_acc=0.523, val_auc=0.544, val_f1=0.48, val_prec=0.528, val_rec=0.44]


saving model with best val_loss '0.6910'...

Epoch 3 ----------------------------------------


100%|██████████| 131/131 [01:23<00:00,  1.57batch/s, train_loss=0.69, train_acc=0.546, train_auc=0.564, train_f1=0.595, train_prec=0.537, train_rec=0.666]
100%|██████████| 28/28 [00:18<00:00,  1.49batch/s, val_loss=0.69, val_acc=0.548, val_auc=0.568, val_f1=0.614, val_prec=0.536, val_rec=0.72]


saving model with best val_loss '0.6904'...

Epoch 4 ----------------------------------------


100%|██████████| 131/131 [01:25<00:00,  1.53batch/s, train_loss=0.69, train_acc=0.549, train_auc=0.567, train_f1=0.573, train_prec=0.544, train_rec=0.605]
100%|██████████| 28/28 [00:20<00:00,  1.39batch/s, val_loss=0.69, val_acc=0.564, val_auc=0.569, val_f1=0.564, val_prec=0.564, val_rec=0.564]


saving model with best val_loss '0.6903'...

Epoch 5 ----------------------------------------


100%|██████████| 131/131 [01:22<00:00,  1.59batch/s, train_loss=0.69, train_acc=0.552, train_auc=0.571, train_f1=0.552, train_prec=0.553, train_rec=0.551]
100%|██████████| 28/28 [00:19<00:00,  1.43batch/s, val_loss=0.69, val_acc=0.532, val_auc=0.558, val_f1=0.427, val_prec=0.551, val_rec=0.349]


saving model with best val_loss '0.6897'...

Epoch 6 ----------------------------------------


100%|██████████| 131/131 [01:23<00:00,  1.57batch/s, train_loss=0.69, train_acc=0.544, train_auc=0.564, train_f1=0.58, train_prec=0.537, train_rec=0.631] 
100%|██████████| 28/28 [00:19<00:00,  1.40batch/s, val_loss=0.689, val_acc=0.549, val_auc=0.574, val_f1=0.538, val_prec=0.551, val_rec=0.525]


saving model with best val_loss '0.6890'...

Epoch 7 ----------------------------------------


100%|██████████| 131/131 [01:23<00:00,  1.56batch/s, train_loss=0.689, train_acc=0.549, train_auc=0.574, train_f1=0.543, train_prec=0.55, train_rec=0.535] 
100%|██████████| 28/28 [00:19<00:00,  1.40batch/s, val_loss=0.69, val_acc=0.546, val_auc=0.575, val_f1=0.638, val_prec=0.53, val_rec=0.802]



Epoch 8 ----------------------------------------


100%|██████████| 131/131 [01:24<00:00,  1.54batch/s, train_loss=0.69, train_acc=0.54, train_auc=0.565, train_f1=0.551, train_prec=0.538, train_rec=0.564]  
100%|██████████| 28/28 [00:19<00:00,  1.44batch/s, val_loss=0.689, val_acc=0.534, val_auc=0.566, val_f1=0.59, val_prec=0.526, val_rec=0.672]



Epoch 9 ----------------------------------------


100%|██████████| 131/131 [01:23<00:00,  1.58batch/s, train_loss=0.689, train_acc=0.549, train_auc=0.577, train_f1=0.56, train_prec=0.546, train_rec=0.573] 
100%|██████████| 28/28 [00:18<00:00,  1.49batch/s, val_loss=0.688, val_acc=0.576, val_auc=0.599, val_f1=0.55, val_prec=0.585, val_rec=0.518]


saving model with best val_loss '0.6880'...

Epoch 10 ----------------------------------------


100%|██████████| 131/131 [01:24<00:00,  1.55batch/s, train_loss=0.689, train_acc=0.542, train_auc=0.57, train_f1=0.524, train_prec=0.546, train_rec=0.505] 
100%|██████████| 28/28 [00:19<00:00,  1.44batch/s, val_loss=0.689, val_acc=0.541, val_auc=0.573, val_f1=0.538, val_prec=0.542, val_rec=0.534]



Epoch 11 ----------------------------------------


100%|██████████| 131/131 [01:28<00:00,  1.47batch/s, train_loss=0.689, train_acc=0.547, train_auc=0.571, train_f1=0.556, train_prec=0.545, train_rec=0.568]
100%|██████████| 28/28 [00:20<00:00,  1.39batch/s, val_loss=0.69, val_acc=0.542, val_auc=0.56, val_f1=0.547, val_prec=0.541, val_rec=0.554]



Epoch 12 ----------------------------------------


100%|██████████| 131/131 [01:23<00:00,  1.58batch/s, train_loss=0.688, train_acc=0.554, train_auc=0.579, train_f1=0.56, train_prec=0.552, train_rec=0.568] 
100%|██████████| 28/28 [00:19<00:00,  1.41batch/s, val_loss=0.689, val_acc=0.546, val_auc=0.568, val_f1=0.557, val_prec=0.544, val_rec=0.57]



Epoch 13 ----------------------------------------


100%|██████████| 131/131 [01:21<00:00,  1.60batch/s, train_loss=0.688, train_acc=0.557, train_auc=0.582, train_f1=0.585, train_prec=0.551, train_rec=0.624]
100%|██████████| 28/28 [00:20<00:00,  1.37batch/s, val_loss=0.687, val_acc=0.551, val_auc=0.586, val_f1=0.508, val_prec=0.562, val_rec=0.464]


saving model with best val_loss '0.6873'...

Epoch 14 ----------------------------------------


100%|██████████| 131/131 [01:23<00:00,  1.57batch/s, train_loss=0.688, train_acc=0.547, train_auc=0.578, train_f1=0.55, train_prec=0.547, train_rec=0.554] 
100%|██████████| 28/28 [00:19<00:00,  1.45batch/s, val_loss=0.689, val_acc=0.54, val_auc=0.564, val_f1=0.575, val_prec=0.535, val_rec=0.621]



Epoch 15 ----------------------------------------


100%|██████████| 131/131 [01:29<00:00,  1.46batch/s, train_loss=0.687, train_acc=0.551, train_auc=0.582, train_f1=0.561, train_prec=0.549, train_rec=0.572]
100%|██████████| 28/28 [00:19<00:00,  1.41batch/s, val_loss=0.689, val_acc=0.537, val_auc=0.55, val_f1=0.535, val_prec=0.538, val_rec=0.531]



Epoch 16 ----------------------------------------


100%|██████████| 131/131 [01:23<00:00,  1.57batch/s, train_loss=0.688, train_acc=0.554, train_auc=0.576, train_f1=0.562, train_prec=0.552, train_rec=0.572]
100%|██████████| 28/28 [00:19<00:00,  1.44batch/s, val_loss=0.688, val_acc=0.537, val_auc=0.565, val_f1=0.592, val_prec=0.529, val_rec=0.672]



Epoch 17 ----------------------------------------


100%|██████████| 131/131 [01:24<00:00,  1.55batch/s, train_loss=0.687, train_acc=0.553, train_auc=0.577, train_f1=0.56, train_prec=0.551, train_rec=0.57]  
100%|██████████| 28/28 [00:20<00:00,  1.36batch/s, val_loss=0.688, val_acc=0.54, val_auc=0.574, val_f1=0.581, val_prec=0.533, val_rec=0.639]



Epoch 18 ----------------------------------------


100%|██████████| 131/131 [01:22<00:00,  1.59batch/s, train_loss=0.687, train_acc=0.555, train_auc=0.576, train_f1=0.57, train_prec=0.551, train_rec=0.59]  
100%|██████████| 28/28 [00:19<00:00,  1.46batch/s, val_loss=0.689, val_acc=0.533, val_auc=0.561, val_f1=0.549, val_prec=0.531, val_rec=0.569]



Epoch 19 ----------------------------------------


100%|██████████| 131/131 [01:25<00:00,  1.54batch/s, train_loss=0.688, train_acc=0.542, train_auc=0.568, train_f1=0.545, train_prec=0.541, train_rec=0.55] 
100%|██████████| 28/28 [00:20<00:00,  1.40batch/s, val_loss=0.687, val_acc=0.551, val_auc=0.58, val_f1=0.598, val_prec=0.541, val_rec=0.669]


saving model with best val_loss '0.6866'...
best model loaded...


100%|██████████| 28/28 [00:19<00:00,  1.45batch/s, val_loss=0.687, val_acc=0.554, val_auc=0.577, val_f1=0.596, val_prec=0.545, val_rec=0.657]


KeyError: 'test_loss'