# This notebook runs a train and eval loop on models with improving consensus labels over each iteration.

In [1]:
# keep ground truth labels as self contained as possible! define #ground truth not actually needed
# get and save different consensus labels in a dictionary cleanly
# compute accuracy but make it clear that is not something the user will want to do

# label some data
# get improved consensus quality scores and more data
# train notebook (active learning with a twist because you can get datapoint for something you've already labeled)



In [2]:
import sys
import numpy as np
import os
sys.path.insert(0, "../")

from utils.model_training import train_models
from utils.model_training import sum_xval_folds
from utils.data_loading import get_annotator_labels
from utils.data_loading import drop_and_distribute
from utils.data_loading import get_ground_truth_data_matched
from utils.data_loading import get_and_save_improved_consensus_label
from utils.data_loading import get_and_save_consensus_labels
from cleanlab.multiannotator import get_label_quality_multiannotator # only in hui wen directory
from datetime import datetime

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
now = datetime.now() # current date and time
experiment_folder = "experiment_" + str(int(now.timestamp()))
dirName = './data/experiments/' + experiment_folder

if not os.path.exists(dirName):
    os.makedirs(dirName)
    print("Directory " , dirName ,  " Created ")
else:    
    print("Directory " , dirName ,  " already exists")

print(f'Experiment saved in {dirName}')

Directory  ./data/experiments/experiment_1660859529  Created 
Experiment saved in ./data/experiments/experiment_1660859529


## Dropout data values

In [4]:
# Get cifar10h dataset and dropout information from it
cifar10_infolder = './data/cifar-10h/cifar10h-raw.csv' #c10h raw data folder
max_annotations = 5

c10h_labels, c10h_true_labels, c10h_true_images = get_annotator_labels(cifar10_infolder)
c10h_labels = drop_and_distribute(c10h_labels, c10h_true_labels, max_annotations)

# save c10h_results
cifar10_labels_folder = f"{dirName}/todelete_c10h_labels_range_{max_annotations}.npy"
cifar10_true_labels_folder = f"{dirName}/todelete_c10h_true_labels_range_{max_annotations}.npy"
np.save(cifar10_labels_folder, c10h_labels)
np.save(cifar10_true_labels_folder, c10h_true_labels)

# Generate and save original consensus labels
consensus_outfolder = f'{dirName}/todelete_cifar10_test_consensus_dataset_range_{max_annotations}_0.csv' #output folder for consensus labels
consensus_labels = get_and_save_consensus_labels(c10h_labels, c10h_true_labels, consensus_outfolder)

# Generate label quality of each annotator
label_quality_multiannotator = None

all examples have at least 1 annotator
num_worst_annotators_selected 457
Total idxs dropped:  488003.0
Make sure 10.0 <= 5 and 1.0 > 0: 
Total idxs dropped:  8139.0
Make sure 9.0 <= 5 and 1.0 > 0: 


In [5]:
# cifar10_infolder = './data/cifar-10h/cifar10h-raw.csv' #c10h raw data folder
# max_annotations = 5

# c10h_labels, c10h_true_labels, c10h_true_images = get_annotator_labels(cifar10_infolder)
# np.save('./benchmark_data/c10h_true_labels.npy', c10h_true_labels)
# np.save('./benchmark_data/c10h_true_images.npy', c10h_true_images)

In [22]:
image_locs = [f"cifar10_test/{'/'.join(image.split('/')[-2:])}" for image in consensus_labels["image"]]
np.save('c10h_image_paths.npy', image_locs)

In [21]:
# image_locs = ['/'.split(image) for image in consensus_labels["image"]]
image_locs[:3]

['cifar10_test/cat/test_batch_index_0000.png',
 'cifar10_test/ship/test_batch_index_0001.png',
 'cifar10_test/ship/test_batch_index_0002.png']

## Train models through loop

In [6]:
# Load consensus labels and train model on them
models = [
    "resnet18",
    "swin_base_patch4_window7_224"
]

train_args = {
    "num_cv_folds": 5, 
    "verbose": 1, 
    "epochs": 100, 
    "holdout_frac": 0.2, 
    "time_limit": 21600, 
    "random_state": 123
}

In [None]:
# # Loop through and retrain model on better pred-probs
# NUM_MODEL_RETRAINS = 7

# # check if new consensus labels exist in the set of prior consensus labels and stop if yes (cycle maybe)
# for i in range(NUM_MODEL_RETRAINS):
#     for model in models:
#         # Get folders
#         consensus_infolder = f'{dirName}/todelete_cifar10_test_consensus_dataset_range_{max_annotations}_0.csv' 
# #         if i == 0:
# #             consensus_infolder = consensus_outfolder
# #         else:
# #             consensus_infolder = f'{dirName}/todelete_cifar10_test_consensus_dataset_range_{max_annotations}_{i-1}_{model}.csv'
#         model_results_folder = f'{dirName}/todelete_cifar10_consensus_range_{max_annotations}_{i}' # + [model_type]
#         consensus_outfolder = f'{dirName}/todelete_cifar10_test_consensus_dataset_range_{max_annotations}_{i}_{model}.csv'
        
#         df = pd.read_csv(consensus_infolder)
#         print(df.head())
#         print(f'--INFO {i}_{model}--')
#         print('Loading consensus from', consensus_infolder)
#         print('Saving consensus to', consensus_outfolder)
#         print('Saving model results to', model_results_folder)
#         print('---------------------')
        
        
# #         # Train model
# #         train_models([model], consensus_infolder, model_results_folder, **train_args)
# #         pred_probs, labels , true_labels, images = sum_xval_folds([model], model_results_folder, **train_args)
        
# #         # Get label quality multiannotator
# #         label_quality_multiannotator = get_label_quality_multiannotator(c10h_labels,pred_probs,verbose=False)

# #         # Generate and save consensus labels
# #         _ = get_and_save_improved_consensus_label(label_quality_multiannotator, c10h_true_labels, consensus_outfolder)

In [None]:
# Loop through and retrain model on better pred-probs
NUM_MODEL_RETRAINS = 7

# check if new consensus labels exist in the set of prior consensus labels and stop if yes (cycle maybe)
for i in range(NUM_MODEL_RETRAINS):
    for model in models:
        # Get folders
        if i == 0:
            consensus_infolder = consensus_outfolder
        else:
            consensus_infolder = f'{dirName}/todelete_cifar10_test_consensus_dataset_range_{max_annotations}_{i-1}_{model}.csv'
        model_results_folder = f'{dirName}/todelete_cifar10_consensus_range_{max_annotations}_{i}' # + [model_type]
        consensus_outfolder = f'{dirName}/todelete_cifar10_test_consensus_dataset_range_{max_annotations}_{i}_{model}.csv'
        
        print(f'--INFO {i}_{model}--')
        print('Loading consensus from', consensus_infolder)
        print('Saving consensus to', consensus_outfolder)
        print('Saving model results to', model_results_folder)
        print('---------------------')
        
        
        # Train model
        train_models([model], consensus_infolder, model_results_folder, **train_args)
        pred_probs, labels , true_labels, images = sum_xval_folds([model], model_results_folder, **train_args)
        
        # Get label quality multiannotator
        label_quality_multiannotator = get_label_quality_multiannotator(c10h_labels,pred_probs,verbose=False)

        # Generate and save consensus labels
        _ = get_and_save_improved_consensus_label(label_quality_multiannotator, c10h_true_labels, consensus_outfolder)

## Compute accuracy of model based on Accuracy (labels vs true labels) by itter after folder

In [None]:
acc_noisy_vs_true_labels = (consensus_labels['label'].values == c10h_true_labels).mean()
print(f"Accuracy ORIGINAL (consensus labels vs true labels): {acc_noisy_vs_true_labels}\n")

for model in models:
    for i in range(NUM_MODEL_RETRAINS):
        
        # Get folders
        if i == 0:
            consensus_infolder = consensus_outfolder
        else:
            consensus_infolder = f'{dirName}/todelete_cifar10_test_consensus_dataset_range_{max_annotations}_{i-1}_{model}.csv'
        model_results_folder = f'{dirName}/todelete_cifar10_consensus_range_{max_annotations}_{i}' # + [model_type]
    
        print(f'--{model} iter{i}--')
        
        out_subfolder = f"{model_results_folder}_{model}/"
        pred_probs = np.load(out_subfolder + "pred_probs.npy")
        labels = np.load(out_subfolder + "labels.npy") # remember that this is the noisy labels (s)
        images = np.load(out_subfolder + "images.npy", allow_pickle=True)
        true_labels = np.load(out_subfolder + "true_labels.npy")

        # check the accuracy
        acc_labels = (pred_probs.argmax(axis=1) == labels).mean() # noisy labels (s)
        acc_true_labels = (pred_probs.argmax(axis=1) == true_labels).mean() # true labels (y)    
        acc_noisy_vs_true_labels = (labels == true_labels).mean()

        print(f"Model: {model}")
        print(f"  Accuracy (argmax pred vs labels)                 : {acc_labels}")
        print(f"  Accuracy (argmax pred vs true labels)            : {acc_true_labels}")
        print(f"  Accuracy (consensus labels vs true labels)       : {acc_noisy_vs_true_labels}\n")

In [None]:
annotator_mask = np.logical_not(np.isnan(c10h_labels))
pd.DataFrame(annotator_mask.sum(axis=0)).hist(bins=50)

In [None]:
pd.DataFrame(annotator_mask.sum(axis=1)).hist(bins=50)

In [None]:
import pandas as pd

consensus_file = f'{dirName}/todelete_cifar10_test_consensus_dataset_range_{max_annotations}_0.csv'
clabels = pd.read_csv(consensus_file)['label'].values
print('true: ', c10h_true_labels[:20])
print('cons: ', clabels[:20])
accuracy = np.mean(labels == c10h_true_labels).mean()
print(f"Accuracy ORIGINAL (consensus labels vs true labels): {accuracy}\n")

model = "resnet18"

for i in range(NUM_MODEL_RETRAINS):
    model_results_folder = f'{dirName}/todelete_cifar10_consensus_range_{max_annotations}_{i}' # + [model_type]
    out_subfolder = f"{model_results_folder}_{model}/"

    true_labels = np.load(out_subfolder + "true_labels.npy")
    print('true: ', true_labels[:20])
    print('cons: ', clabels[:20])
    x = np.where(true_labels != clabels.astype(int))[0]
    print(true_labels[x][:20], )
    print((10000 - len(x) ) /10000)
    
    if not (true_labels == clabels.astype(int)).all():
        print('something is wrong!')
    
    consensus_file = f'{dirName}/todelete_cifar10_test_consensus_dataset_range_{max_annotations}_{i}_{model}.csv'
    clabels = pd.read_csv(consensus_file)['label'].values
    accuracy = np.mean(clabels == c10h_true_labels).mean()
    print(f"{i}: Accuracy (consensus labels vs true labels): {accuracy}")    
        

In [None]:
from cleanlab.multiannotator import get_majority_vote_label
consensus_labels_true = np.load('benchmark_data/c10h_labels_worst_20_coin20.npy')
consensus_labels_true.shape

consensus_labels_true = get_majority_vote_label(pd.DataFrame(consensus_labels_true), pred_probs=None)
consensus_labels_true.shape

In [None]:
accuracy = np.mean(consensus_labels_true == c10h_true_labels)
print(f"Accuracy ORIGINAL (consensus labels vs true labels): {accuracy}\n")

In [None]:
consensus_labels = get_and_save_consensus_labels(c10h_labels, c10h_true_labels, consensus_outfolder)

In [None]:
import sys
import os
import pandas as pd
sys.path.insert(0, "../")

def get_image_paths(images, image_data_folder):
    classes = {"airplane": 0, 
           "automobile": 1, 
           "bird": 2, 
           "cat": 3, 
           "deer": 4, 
           "dog": 5, 
           "frog": 6, 
           "horse": 7, 
           "ship": 8, 
           "truck": 9}

path = os.getcwd()

print(path)
print('images[0]: domestic_cat_s_000907.png')

image_data_folder = 'data/cifar10/test' # datafolder ending is split of test and train

# image_locs = [f"{path}/{image_data_folder}/{get_animal(im)}/test_batch_index_{get_idx(im)}" for im in images]
consensus_data = './data/benchmark_data/cifar10_test_consensus_dataset_worst_20_coin20.csv'
consensus_df = pd.read_csv(consensus_data)
consensus_df.head()

# consensus_df['image'][0]

# consensus_df['image'] = \
# consensus_df['image'].apply(lambda x: path + '/' + '/'.join(x.split('/')[-5:]))
# consensus_df.to_csv('./data/' + model_folder + '/cifar10_test_consensus_dataset_worst_25_coin20.csv', index=False)

In [None]:
!pwd
! cd data/benchmark_data/cifar10_test_consensus_dataset_worst_20_coin20.csv && ls

In [None]:
consensus_df = pd.read_csv(consensus_data)

In [None]:
consensus_labels.iloc[0]['image']