# This notebook trains a model with cross-val on the entire dataset
- Gets pred_probs on holdout and saves results as numpy files
- Make sure you run ``evaluate_benchmarks`` on local and push/pull newest ``cifar10_test_consensus_dataset`` first

In [None]:
# %load_ext autoreload
# %autoreload 2

import sys

sys.path.insert(0, "../")

from autogluon.vision import ImagePredictor, ImageDataset
import numpy as np
import pandas as pd
import pickle
import datetime
from pathlib import Path
import cleanlab
from utils.cross_validation_autogluon import cross_val_predict_autogluon_image_dataset

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

## Load data

In [3]:
# Load consensus test files
data_filepath = './data/benchmark_data/cifar10_test_consensus_dataset_worst_25_coin20.csv'
df = pd.read_csv(data_filepath)

# Create mini train dataset for testing
num_from_each_group = 15
mini_df = df.groupby("label").head(num_from_each_group)
mini_df.groupby("label")["image"].count().reset_index()

Unnamed: 0,label,image
0,0,15
1,1,15
2,2,15
3,3,15
4,4,15
5,5,15
6,6,15
7,7,15
8,8,15
9,9,15


**Model and data saving params**

In [4]:
# save/load folders
model_folder = './data/cifar10_consensus_worst_25_coin20' # + [model_type]

# generate cross-validated predicted probabilities for various models
models = [
    "resnet18",
    "swin_base_patch4_window7_224"
]

# xvalidation parameters
num_cv_folds = 5 # number K in stratified K-folds cross-validation
verbose = 1 # verbose for dataloading duing crossval to numpy save

# shared model parameters
epochs = 100 #100
holdout_frac = 0.2
time_limit = 21600 #21600
random_state = 123

## Run cross validation on `models`

In [None]:
%%time
# run cross-validation for each model
for model in models:
    
    print("----")
    print(f"Running cross-validation for model: {model}")

    MODEL_PARAMS = {
        "model": model,
        "epochs": epochs,
        "holdout_frac": holdout_frac,
    }

    # results of cross-validation will be saved to pickle files for each model/fold
    _ = \
        cross_val_predict_autogluon_image_dataset(
            dataset=mini_df,
            out_folder=f"{model_folder}_{model}/", # save results of cross-validation in pickle files for each fold
            n_splits=num_cv_folds,
            model_params=MODEL_PARAMS,
            time_limit=time_limit,
            random_state=random_state,
        )

modified configs(<old> != <new>): {
root.img_cls.model   resnet101 != resnet18
root.train.early_stop_patience -1 != 10
root.train.epochs    200 != 100
root.train.early_stop_baseline 0.0 != -inf
root.train.batch_size 32 != 16
root.train.early_stop_max_value 1.0 != inf
root.misc.seed       42 != 141
root.misc.num_workers 4 != 64
}
Saved config to /datasets/uly/multiannotator_benchmarks/74424aa5/.trial_0/config.yaml


----
Running cross-validation for model: resnet18
----
Running Cross-Validation on Split: 0


Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /home/uly/.cache/torch/hub/checkpoints/resnet18-5c106cde.pth
Model resnet18 created, param count:                                         11181642
AMP not enabled. Training in float32.
Disable EMA as it is not supported for now.
Start training from [Epoch 0]
[Epoch 0] training: accuracy=0.104167
[Epoch 0] speed: 25 samples/sec	time cost: 3.108615
[Epoch 0] validation: top1=0.083333 top5=0.500000
[Epoch 0] Current best top-1: 0.083333 vs previous -inf, saved to /datasets/uly/multiannotator_benchmarks/74424aa5/.trial_0/best_checkpoint.pkl
[Epoch 1] training: accuracy=0.072917
[Epoch 1] speed: 452 samples/sec	time cost: 0.177527
[Epoch 1] validation: top1=0.083333 top5=0.416667
[Epoch 2] training: accuracy=0.239583
[Epoch 2] speed: 480 samples/sec	time cost: 0.167176
[Epoch 2] validation: top1=0.083333 top5=0.500000
[Epoch 3] training: accuracy=0.333333
[Epoch 3] speed: 497 samples/sec	time cost: 0.161430
[Epoch 3

Saving ./data/cifar10_consensus_worst_25_coin20_resnet18/split_0/_test_pred_probs_split_0
Saving ./data/cifar10_consensus_worst_25_coin20_resnet18/split_0/_test_pred_features_split_0
Saving ./data/cifar10_consensus_worst_25_coin20_resnet18/split_0/_test_labels_split_0
Saving ./data/cifar10_consensus_worst_25_coin20_resnet18/split_0/_test_image_files_split_0
Saving ./data/cifar10_consensus_worst_25_coin20_resnet18/split_0/_test_indices_split_0
----
Running Cross-Validation on Split: 1


Model resnet18 created, param count:                                         11181642
AMP not enabled. Training in float32.
Disable EMA as it is not supported for now.
Start training from [Epoch 0]
[Epoch 0] training: accuracy=0.093750
[Epoch 0] speed: 31 samples/sec	time cost: 2.520188
[Epoch 0] validation: top1=0.083333 top5=0.666667
[Epoch 0] Current best top-1: 0.083333 vs previous -inf, saved to /datasets/uly/multiannotator_benchmarks/f4127b7b/.trial_0/best_checkpoint.pkl
[Epoch 1] training: accuracy=0.135417
[Epoch 1] speed: 439 samples/sec	time cost: 0.183105
[Epoch 1] validation: top1=0.250000 top5=0.750000
[Epoch 1] Current best top-1: 0.250000 vs previous 0.083333, saved to /datasets/uly/multiannotator_benchmarks/f4127b7b/.trial_0/best_checkpoint.pkl
[Epoch 2] training: accuracy=0.166667
[Epoch 2] speed: 494 samples/sec	time cost: 0.162673
[Epoch 2] validation: top1=0.333333 top5=0.583333
[Epoch 2] Current best top-1: 0.333333 vs previous 0.250000, saved to /datasets/uly/mult

## Read per-fold pickle files from xvalidation and save data as numpy arrays

In [None]:
# load pickle file util
def load_pickle(pickle_file_name, verbose=1):
    """Load pickle file"""

    if verbose:
        print(f"Loading {pickle_file_name}")

    with open(pickle_file_name, 'rb') as handle:
        out = pickle.load(handle)
        
    return out

# get the original label from file path (aka "true labels" y)
get_orig_label_idx_from_file_path = np.vectorize(lambda f: label_name_to_idx_map[Path(f).parts[-2]])

# get original label name to idx mapping
label_name_to_idx_map = {'airplane': 0,
                         'automobile': 1,
                         'bird': 2,
                         'cat': 3,
                         'deer': 4,
                         'dog': 5,
                         'frog': 6,
                         'horse': 7,
                         'ship': 8,
                         'truck': 9}

#### Save pickle files per fold as single files per model

In [None]:
results_list = []

for model in models:

    pred_probs = []
    labels = []
    images = []
    
    for split_num in range(num_cv_folds):

        out_subfolder = f"{model_folder}_{model}/split_{split_num}/"
        
        # pickle file name to read
        get_pickle_file_name = (
            lambda object_name: f"{out_subfolder}_{object_name}_split_{split_num}"
        )

        # NOTE: the "test_" prefix in the pickle name correspond to the "test" split during cross-validation.
        pred_probs_split = load_pickle(get_pickle_file_name("test_pred_probs"), verbose=verbose)
        labels_split = load_pickle(get_pickle_file_name("test_labels"), verbose=verbose)
        images_split = load_pickle(get_pickle_file_name("test_image_files"), verbose=verbose)
        indices_split = load_pickle(get_pickle_file_name("test_indices"), verbose=verbose)

        # append to list so we can combine data from all the splits
        pred_probs.append(pred_probs_split)
        labels.append(labels_split)
        images.append(images_split)    

    # convert list to array
    pred_probs = np.vstack(pred_probs)
    labels = np.hstack(labels) # remember that this is the noisy labels (s)
    images = np.hstack(images)
    
    # get the true labels (y) from the original file path
    true_labels = get_orig_label_idx_from_file_path(images)
    
    # save to Numpy files
    numpy_out_folder = f"{model_folder}_{model}/"
    
    print(f"Saving to numpy files in this folder: {numpy_out_folder}")
    
    np.save(numpy_out_folder + "pred_probs", pred_probs)
    np.save(numpy_out_folder + "labels", labels)
    np.save(numpy_out_folder + "images", images)
    np.save(numpy_out_folder + "true_labels", true_labels)

    # check the accuracy
    acc_labels = (pred_probs.argmax(axis=1) == labels).mean() # noisy labels (s)
    acc_true_labels = (pred_probs.argmax(axis=1) == true_labels).mean() # true labels (y)    
    acc_noisy_vs_true_labels = (labels == true_labels).mean()
    
    print(f"Model: {model}")
    print(f"  Accuracy (argmax pred vs labels): {acc_labels}")
    print(f"  Accuracy (argmax pred vs true labels) : {acc_true_labels}")
    print(f"  Accuracy (labels vs true labels)       : {acc_noisy_vs_true_labels}")
    
    results = {
        "model": model,
        "Accuracy (argmax pred vs noisy labels)": acc_labels,
        "Accuracy (argmax pred vs true labels)": acc_true_labels,
        "Accuracy (noisy vs true labels)": acc_noisy_vs_true_labels
    }
    
    results_list.append(results)

In [None]:
for model in models:
    numpy_out_folder = f"{model_folder}_{model}/"

    pred_probs = np.load(numpy_out_folder + 'pred_probs.npy')
    labels = np.load(numpy_out_folder + 'labels.npy')
    true_labels =  np.load(numpy_out_folder + 'true_labels.npy')
    print(f'{model}\n pred_probs[{pred_probs.shape}],labels[{labels.shape}], true_labels[{true_labels.shape}]\n')