# This notebook runs a train and eval loop on models with improving consensus labels over each iteration.

In [None]:
import sys
import numpy as np
sys.path.insert(0, "../")

from utils.model_training import train_models
from utils.model_training import sum_xval_folds
from utils.data_loading import get_annotator_labels
from utils.data_loading import drop_and_distribute
from utils.data_loading import get_and_save_consensus_labels
from utils.data_loading import get_ground_truth_data_matched

In [1]:
# Get cifar10h dataset and dropout information from it
cifar10_infolder = './data/cifar10h/cifar10h-raw.csv' #c10h raw data folder
cifar10_outfolder = './benchmark_data/' #c10h raw data folder

max_annotations = 5
consensus_outfolder = f'./benchmark_data/cifar10_test_consensus_dataset_range_{max_annotations}_0.csv' #output folder for consensus labels

c10h_labels, c10h_true_labels, c10h_true_images = get_annotator_labels(infolder)
c10h_labels = drop_and_distribute(c10h_labels)

# save c10h_results
np.save(f"{cifar10_outfolder}/c10h_labels_range_{max_annotations}", c10h_labels)
np.save(f"{cifar10_outfolder}/c10h_true_labels_range_{max_annotations}", c10h_true_labels)

# Generate and save consensus labels
consensus_labels = get_and_save_consensus_labels(c10h_labels, consensus_outfolder)

ModuleNotFoundError: No module named 'cleanlab.multiannotator'

In [None]:
# Load consensus labels and train model on them
models = ["resnet18","swin_base_patch4_window7_224"]

In [None]:
# Loop through and retrain model on better pred-probs
NUM_MODEL_RETRAINS = 3

for i in range(NUM_MODEL_RETRAINS):
    for model in models:
        # Get folders
        if i > 0:
            consensus_infolder = consensus_outfolder
        else:
            consensus_infolder = f'./benchmark_data/cifar10_test_consensus_dataset_range_{max_annotations}_{i}_{model}.csv'
        consensus_oufolder = f'./benchmark_data/cifar10_test_consensus_dataset_range_{max_annotations}_{i+1}_{model}.csv'
        model_results_folder = f'./data/cifar10_consensus_range_{max_annotations}_{i}' # + [model_type]

        # Match label idxs to model
        c10h_labels, c10h_true_labels, pred_probs = get_ground_truth_data_matched(f"{model_results_folder}_{model}", cifar10_outfolder)
        train_models([model], consensus_infolder, model_results_folder)
        pred_probs, labels , true_labels, images, results_list = sum_xval_folds([model], model_results_folder)
        
        # Report basic results
        for result in results_list:
            print(result)

        # Generate and save consensus labels
        consensus_labels = get_and_save_consensus_labels(c10h_labels, consensus_outfolder, pred_probs)