In [None]:
# -------------------------------------------- Imports -------------------------------------------- 

import torch
import torch.utils.data as Data

import numpy as np
from numpy import genfromtxt
from numpy.matlib import repmat
from numpy.random import default_rng

from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

from PIL import Image
from scipy import stats
from pathlib import Path

from Data_load.transformer import *
from utils import synthetic
import utils.tools, pdb
from torch.nn import functional as F
from torchvision import datasets

import matplotlib.pyplot as plt
import os



## Mnist

### Download

In [None]:
training_data = datasets.MNIST(
    root="",
    train=True,
    download=True,
    transform=transforms.ToTensor()
)
test_data = datasets.MNIST(
    root="",
    train=False,
    download=True,
    transform=transforms.ToTensor()
)

In [None]:
np.save("mnist/train_images.npy", training_data.data)
np.save("mnist/train_labels.npy", training_data.targets)

np.save("mnist/test_images.npy", test_data.data)
np.save("mnist/test_labels.npy", test_data.targets)

### Generate complete annotations

In [None]:
random_seed = 1

err_rate = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7]

for i in range(7):

    for t in range(5):

        original_images = np.load("data/mnist/train_images.npy")
        original_labels = np.load("data/mnist/train_labels.npy")

        data = torch.from_numpy(original_images).float()
        targets = torch.from_numpy(original_labels)
        dataset = zip(data, targets)

        avg_error_rates = [err_rate[i]]*50

        print("################### Noisy Label for Trial " + str(t+1) + f" (err_rate={err_rate[i]}) ###################")
        noisy_label, transition_true = synthetic.get_instance_noisy_label(dataset, targets, num_classes=10, feature_size=28*28, tau=avg_error_rates, norm_std=0.1, seed=random_seed+t)

        print('annot:', noisy_label[:2, :])
        print('true label:', original_labels[:2])
        print(" ")

        np.save("data/mnist_complete/" + str(err_rate[i]) + "_" + str(random_seed + t) + '_noisy_label.npy', noisy_label)
        np.save("data/mnist_complete/" + str(err_rate[i]) + "_" + str(random_seed + t) + '_transition_true.npy', transition_true)


#### Noisy labels

In [None]:
# 10 annotators

random_seed = 1
original_labels = np.load("data/mnist/train_labels.npy")


for t in range(5):

    noisy_labels_1 = np.load("data/mnist_complete/0.1_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_2 = np.load("data/mnist_complete/0.2_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_3 = np.load("data/mnist_complete/0.3_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_4 = np.load("data/mnist_complete/0.4_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_5 = np.load("data/mnist_complete/0.5_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_6 = np.load("data/mnist_complete/0.6_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_7 = np.load("data/mnist_complete/0.7_" + str(random_seed + t) + '_noisy_label.npy')

    transition_true_1 = np.load("data/mnist_complete/0.1_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_2 = np.load("data/mnist_complete/0.2_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_3 = np.load("data/mnist_complete/0.3_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_4 = np.load("data/mnist_complete/0.4_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_5 = np.load("data/mnist_complete/0.5_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_6 = np.load("data/mnist_complete/0.6_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_7 = np.load("data/mnist_complete/0.7_" + str(random_seed + t) + '_transition_true.npy')

    N = noisy_labels_1.shape[0]
    K = 10
    R = 10


    print(" ")
    print("################### Noisy Label for Trial " + str(t+1) + " (10 annotators) ###################")


    # --------------------------------- low ---------------------------------
    print("--------------------------------- low ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_1[:, :4], noisy_labels_2[:, :4], noisy_labels_3[:, :2]), axis=1)
    transition_true = np.concatenate((transition_true_1[:, :4, :], transition_true_2[:, :4, :], transition_true_3[:, :2, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    # Num = 1
    Num = 1
    folder_name = "data/mnist_10_1/low_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)

    

    # --------------------------------- mid ---------------------------------
    print("--------------------------------- mid ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_3[:, :4], noisy_labels_4[:, :4], noisy_labels_5[:, :2]), axis=1)
    transition_true = np.concatenate((transition_true_3[:, :4, :], transition_true_4[:, :4, :], transition_true_5[:, :2, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    # Num = 1
    Num = 1
    folder_name = "data/mnist_10_1/mid_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)


    # --------------------------------- high ---------------------------------
    print("--------------------------------- high ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_5[:, :4], noisy_labels_6[:, :4], noisy_labels_7[:, :2]), axis=1)
    transition_true = np.concatenate((transition_true_5[:, :4, :], transition_true_6[:, :4, :], transition_true_7[:, :2, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    # Num = 1
    Num = 1
    folder_name = "data/mnist_10_1/high_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)

    

In [None]:
# 30 annotators

random_seed = 1
original_labels = np.load("data/mnist/train_labels.npy")


for t in range(5):

    noisy_labels_1 = np.load("data/mnist_complete/0.1_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_2 = np.load("data/mnist_complete/0.2_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_3 = np.load("data/mnist_complete/0.3_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_4 = np.load("data/mnist_complete/0.4_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_5 = np.load("data/mnist_complete/0.5_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_6 = np.load("data/mnist_complete/0.6_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_7 = np.load("data/mnist_complete/0.7_" + str(random_seed + t) + '_noisy_label.npy')

    transition_true_1 = np.load("data/mnist_complete/0.1_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_2 = np.load("data/mnist_complete/0.2_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_3 = np.load("data/mnist_complete/0.3_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_4 = np.load("data/mnist_complete/0.4_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_5 = np.load("data/mnist_complete/0.5_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_6 = np.load("data/mnist_complete/0.6_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_7 = np.load("data/mnist_complete/0.7_" + str(random_seed + t) + '_transition_true.npy')

    N = noisy_labels_1.shape[0]
    K = 10
    R = 30


    print(" ")
    print("################### Noisy Label for Trial " + str(t+1) + " (30 annotators) ###################")

    
    # --------------------------------- low ---------------------------------
    print("--------------------------------- low ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_1[:, :11], noisy_labels_2[:, :11], noisy_labels_3[:, :8]), axis=1)
    transition_true = np.concatenate((transition_true_1[:, :11, :], transition_true_2[:, :11, :], transition_true_3[:, :8, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    # Num = 1
    Num = 1
    folder_name = "data/mnist_30_1/low_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)




    # --------------------------------- mid ---------------------------------
    print("--------------------------------- mid ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_3[:, :11], noisy_labels_4[:, :11], noisy_labels_5[:, :8]), axis=1)
    transition_true = np.concatenate((transition_true_3[:, :11, :], transition_true_4[:, :11, :], transition_true_5[:, :8, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    # Num = 1
    Num = 1
    folder_name = "data/mnist_30_1/mid_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)




    # --------------------------------- high ---------------------------------
    print("--------------------------------- high ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_5[:, :11], noisy_labels_6[:, :11], noisy_labels_7[:, :8]), axis=1)
    transition_true = np.concatenate((transition_true_5[:, :11, :], transition_true_6[:, :11, :], transition_true_7[:, :8, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    # Num = 1
    Num = 1
    folder_name = "data/mnist_30_1/high_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)

    

In [None]:
# 50 annotators

random_seed = 1
original_labels = np.load("data/mnist/train_labels.npy")


for t in range(5):

    noisy_labels_1 = np.load("data/mnist_complete/0.1_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_2 = np.load("data/mnist_complete/0.2_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_3 = np.load("data/mnist_complete/0.3_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_4 = np.load("data/mnist_complete/0.4_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_5 = np.load("data/mnist_complete/0.5_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_6 = np.load("data/mnist_complete/0.6_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_7 = np.load("data/mnist_complete/0.7_" + str(random_seed + t) + '_noisy_label.npy')

    transition_true_1 = np.load("data/mnist_complete/0.1_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_2 = np.load("data/mnist_complete/0.2_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_3 = np.load("data/mnist_complete/0.3_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_4 = np.load("data/mnist_complete/0.4_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_5 = np.load("data/mnist_complete/0.5_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_6 = np.load("data/mnist_complete/0.6_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_7 = np.load("data/mnist_complete/0.7_" + str(random_seed + t) + '_transition_true.npy')

    N = noisy_labels_1.shape[0]
    K = 10
    R = 50



    print(" ")
    print("################### Noisy Label for Trial " + str(t+1) + " (50 annotators) ###################")

    
    # --------------------------------- low ---------------------------------
    print("--------------------------------- low ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_1[:, :18], noisy_labels_2[:, :18], noisy_labels_3[:, :14]), axis=1)
    transition_true = np.concatenate((transition_true_1[:, :18, :], transition_true_2[:, :18, :], transition_true_3[:, :14, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    # Num = 1
    Num = 1
    folder_name = "data/mnist_50_1/low_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)




    # --------------------------------- mid ---------------------------------
    print("--------------------------------- mid ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_3[:, :18], noisy_labels_4[:, :18], noisy_labels_5[:, :14]), axis=1)
    transition_true = np.concatenate((transition_true_3[:, :18, :], transition_true_4[:, :18, :], transition_true_5[:, :14, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    # Num = 1
    Num = 1
    folder_name = "data/mnist_50_1/mid_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)




    # --------------------------------- high ---------------------------------
    print("--------------------------------- high ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_5[:, :18], noisy_labels_6[:, :18], noisy_labels_7[:, :14]), axis=1)
    transition_true = np.concatenate((transition_true_5[:, :18, :], transition_true_6[:, :18, :], transition_true_7[:, :14, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    # Num = 1
    Num = 1
    folder_name = "data/mnist_50_1/high_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)

    

    

In [None]:
# 100 annotators

random_seed = 1
original_labels = np.load("data/mnist/train_labels.npy")


for t in range(5):

    noisy_labels_1 = np.load("data/mnist_complete/0.1_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_2 = np.load("data/mnist_complete/0.2_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_3 = np.load("data/mnist_complete/0.3_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_4 = np.load("data/mnist_complete/0.4_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_5 = np.load("data/mnist_complete/0.5_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_6 = np.load("data/mnist_complete/0.6_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_7 = np.load("data/mnist_complete/0.7_" + str(random_seed + t) + '_noisy_label.npy')

    transition_true_1 = np.load("data/mnist_complete/0.1_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_2 = np.load("data/mnist_complete/0.2_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_3 = np.load("data/mnist_complete/0.3_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_4 = np.load("data/mnist_complete/0.4_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_5 = np.load("data/mnist_complete/0.5_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_6 = np.load("data/mnist_complete/0.6_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_7 = np.load("data/mnist_complete/0.7_" + str(random_seed + t) + '_transition_true.npy')

    N = noisy_labels_1.shape[0]
    K = 10
    R = 100



    print(" ")
    print("################### Noisy Label for Trial " + str(t+1) + " (100 annotators) ###################")

    
    # --------------------------------- low ---------------------------------
    print("--------------------------------- low ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_1[:, :35], noisy_labels_2[:, :35], noisy_labels_3[:, :30]), axis=1)
    transition_true = np.concatenate((transition_true_1[:, :35, :], transition_true_2[:, :35, :], transition_true_3[:, :30, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    # Num = 1
    Num = 1
    folder_name = "data/mnist_100_1/low_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)

    


    # --------------------------------- mid ---------------------------------
    print("--------------------------------- mid ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_3[:, :35], noisy_labels_4[:, :35], noisy_labels_5[:, :30]), axis=1)
    transition_true = np.concatenate((transition_true_3[:, :35, :], transition_true_4[:, :35, :], transition_true_5[:, :30, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    # Num = 1
    Num = 1
    folder_name = "data/mnist_100_1/mid_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)

    


    # --------------------------------- high ---------------------------------
    print("--------------------------------- high ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_5[:, :35], noisy_labels_6[:, :35], noisy_labels_7[:, :30]), axis=1)
    transition_true = np.concatenate((transition_true_5[:, :35, :], transition_true_6[:, :35, :], transition_true_7[:, :30, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    # Num = 1
    Num = 1
    folder_name = "data/mnist_100_1/high_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)

    

    

## Cifar 10

### Download

In [None]:
training_data = datasets.CIFAR10(
    root="cifar10",
    train=True,
    download=True,
    transform=transforms.ToTensor()
)
test_data = datasets.CIFAR10(
    root="cifar10",
    train=False,
    download=True,
    transform=transforms.ToTensor()
)

In [None]:
np.save("cifar10/train_images.npy", training_data.data)
np.save("cifar10/train_labels.npy", training_data.targets)

np.save("cifar10/test_images.npy", test_data.data)
np.save("cifar10/test_labels.npy", test_data.targets)

### Generate complete annotations

In [None]:
random_seed = 1

# 0.1

err_rate = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7]

for i in range(7):

    for t in range(5):

        original_images = np.load("data/cifar10/train_images.npy")
        original_labels = np.load("data/cifar10/train_labels.npy")

        data = torch.from_numpy(original_images).float()
        targets = torch.from_numpy(original_labels)
        dataset = zip(data, targets)

        avg_error_rates = [err_rate[i]]*50

        print("################### Noisy Label for Trial " + str(t+1) + f" (err_rate={err_rate[i]}) ###################")
        noisy_label, transition_true = synthetic.get_instance_noisy_label(dataset, targets, num_classes=10, feature_size=3*32*32, tau=avg_error_rates, norm_std=0.1, seed=random_seed+t)

        print('annot:', noisy_label[:2, :])
        print('true label:', original_labels[:2])
        print(" ")

        np.save("data/cifar10_complete/" + str(err_rate[i]) + "_" + str(random_seed + t) + '_noisy_label.npy', noisy_label)
        np.save("data/cifar10_complete/" + str(err_rate[i]) + "_" + str(random_seed + t) + '_transition_true.npy', transition_true)




#### Num=1, 3, 5, 7, 9

In [None]:
# 10 annotators

random_seed = 1
original_labels = np.load("data/cifar10/train_labels.npy")


for t in range(5):

    noisy_labels_1 = np.load("data/cifar10_complete/0.1_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_2 = np.load("data/cifar10_complete/0.2_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_3 = np.load("data/cifar10_complete/0.3_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_4 = np.load("data/cifar10_complete/0.4_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_5 = np.load("data/cifar10_complete/0.5_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_6 = np.load("data/cifar10_complete/0.6_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_7 = np.load("data/cifar10_complete/0.7_" + str(random_seed + t) + '_noisy_label.npy')

    transition_true_1 = np.load("data/cifar10_complete/0.1_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_2 = np.load("data/cifar10_complete/0.2_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_3 = np.load("data/cifar10_complete/0.3_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_4 = np.load("data/cifar10_complete/0.4_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_5 = np.load("data/cifar10_complete/0.5_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_6 = np.load("data/cifar10_complete/0.6_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_7 = np.load("data/cifar10_complete/0.7_" + str(random_seed + t) + '_transition_true.npy')

    N = noisy_labels_1.shape[0]
    K = 10
    R = 10


    print(" ")
    print("################### Noisy Label for Trial " + str(t+1) + " (10 annotators) ###################")


    # --------------------------------- low ---------------------------------
    print("--------------------------------- low ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_1[:, :4], noisy_labels_2[:, :4], noisy_labels_3[:, :2]), axis=1)
    transition_true = np.concatenate((transition_true_1[:, :4, :], transition_true_2[:, :4, :], transition_true_3[:, :2, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    Num = 1
    folder_name = "data/cifar10_10_1/low_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)

    
    

    # --------------------------------- mid ---------------------------------
    print("--------------------------------- mid ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_3[:, :4], noisy_labels_4[:, :4], noisy_labels_5[:, :2]), axis=1)
    transition_true = np.concatenate((transition_true_3[:, :4, :], transition_true_4[:, :4, :], transition_true_5[:, :2, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    Num = 1
    folder_name = "data/cifar10_10_1/mid_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)

    
    

    # --------------------------------- high ---------------------------------
    print("--------------------------------- high ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_5[:, :4], noisy_labels_6[:, :4], noisy_labels_7[:, :2]), axis=1)
    transition_true = np.concatenate((transition_true_5[:, :4, :], transition_true_6[:, :4, :], transition_true_7[:, :2, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    Num = 1
    folder_name = "data/cifar10_10_1/high_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)

    

In [None]:
# 30 annotators

random_seed = 1
original_labels = np.load("data/cifar10/train_labels.npy")


for t in range(5):

    noisy_labels_1 = np.load("data/cifar10_complete/0.1_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_2 = np.load("data/cifar10_complete/0.2_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_3 = np.load("data/cifar10_complete/0.3_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_4 = np.load("data/cifar10_complete/0.4_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_5 = np.load("data/cifar10_complete/0.5_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_6 = np.load("data/cifar10_complete/0.6_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_7 = np.load("data/cifar10_complete/0.7_" + str(random_seed + t) + '_noisy_label.npy')

    transition_true_1 = np.load("data/cifar10_complete/0.1_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_2 = np.load("data/cifar10_complete/0.2_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_3 = np.load("data/cifar10_complete/0.3_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_4 = np.load("data/cifar10_complete/0.4_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_5 = np.load("data/cifar10_complete/0.5_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_6 = np.load("data/cifar10_complete/0.6_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_7 = np.load("data/cifar10_complete/0.7_" + str(random_seed + t) + '_transition_true.npy')

    N = noisy_labels_1.shape[0]
    K = 10
    R = 30



    print(" ")
    print("################### Noisy Label for Trial " + str(t+1) + " (30 annotators) ###################")

    
    # --------------------------------- low ---------------------------------
    print("--------------------------------- low ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_1[:, :11], noisy_labels_2[:, :11], noisy_labels_3[:, :8]), axis=1)
    transition_true = np.concatenate((transition_true_1[:, :11, :], transition_true_2[:, :11, :], transition_true_3[:, :8, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    Num = 1
    folder_name = "data/cifar10_30_1/low_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)

    

    # --------------------------------- mid ---------------------------------
    print("--------------------------------- mid ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_3[:, :11], noisy_labels_4[:, :11], noisy_labels_5[:, :8]), axis=1)
    transition_true = np.concatenate((transition_true_3[:, :11, :], transition_true_4[:, :11, :], transition_true_5[:, :8, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    Num = 1
    folder_name = "data/cifar10_30_1/mid_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)

    



    # --------------------------------- high ---------------------------------
    print("--------------------------------- high ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_5[:, :11], noisy_labels_6[:, :11], noisy_labels_7[:, :8]), axis=1)
    transition_true = np.concatenate((transition_true_5[:, :11, :], transition_true_6[:, :11, :], transition_true_7[:, :8, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    Num = 1
    folder_name = "data/cifar10_30_1/high_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)

    
    

In [None]:
# 50 annotators

random_seed = 1
original_labels = np.load("data/cifar10/train_labels.npy")


for t in range(5):

    noisy_labels_1 = np.load("data/cifar10_complete/0.1_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_2 = np.load("data/cifar10_complete/0.2_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_3 = np.load("data/cifar10_complete/0.3_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_4 = np.load("data/cifar10_complete/0.4_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_5 = np.load("data/cifar10_complete/0.5_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_6 = np.load("data/cifar10_complete/0.6_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_7 = np.load("data/cifar10_complete/0.7_" + str(random_seed + t) + '_noisy_label.npy')

    transition_true_1 = np.load("data/cifar10_complete/0.1_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_2 = np.load("data/cifar10_complete/0.2_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_3 = np.load("data/cifar10_complete/0.3_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_4 = np.load("data/cifar10_complete/0.4_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_5 = np.load("data/cifar10_complete/0.5_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_6 = np.load("data/cifar10_complete/0.6_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_7 = np.load("data/cifar10_complete/0.7_" + str(random_seed + t) + '_transition_true.npy')

    N = noisy_labels_1.shape[0]
    K = 10
    R = 50



    print(" ")
    print("################### Noisy Label for Trial " + str(t+1) + " (50 annotators) ###################")

    
    # --------------------------------- low ---------------------------------
    print("--------------------------------- low ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_1[:, :18], noisy_labels_2[:, :18], noisy_labels_3[:, :14]), axis=1)
    transition_true = np.concatenate((transition_true_1[:, :18, :], transition_true_2[:, :18, :], transition_true_3[:, :14, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    Num = 1
    folder_name = "data/cifar10_50_1/low_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)

    
    

    # --------------------------------- mid ---------------------------------
    print("--------------------------------- mid ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_3[:, :18], noisy_labels_4[:, :18], noisy_labels_5[:, :14]), axis=1)
    transition_true = np.concatenate((transition_true_3[:, :18, :], transition_true_4[:, :18, :], transition_true_5[:, :14, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    Num = 1
    folder_name = "data/cifar10_50_1/mid_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)

    
    

    # --------------------------------- high ---------------------------------
    print("--------------------------------- high ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_5[:, :18], noisy_labels_6[:, :18], noisy_labels_7[:, :14]), axis=1)
    transition_true = np.concatenate((transition_true_5[:, :18, :], transition_true_6[:, :18, :], transition_true_7[:, :14, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)

    Num = 1
    folder_name = "data/cifar10_50_1/high_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)



In [None]:
# 100 annotators

random_seed = 1
original_labels = np.load("data/cifar10/train_labels.npy")


for t in range(5):

    noisy_labels_1 = np.load("data/cifar10_complete/0.1_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_2 = np.load("data/cifar10_complete/0.2_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_3 = np.load("data/cifar10_complete/0.3_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_4 = np.load("data/cifar10_complete/0.4_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_5 = np.load("data/cifar10_complete/0.5_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_6 = np.load("data/cifar10_complete/0.6_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_7 = np.load("data/cifar10_complete/0.7_" + str(random_seed + t) + '_noisy_label.npy')

    transition_true_1 = np.load("data/cifar10_complete/0.1_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_2 = np.load("data/cifar10_complete/0.2_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_3 = np.load("data/cifar10_complete/0.3_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_4 = np.load("data/cifar10_complete/0.4_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_5 = np.load("data/cifar10_complete/0.5_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_6 = np.load("data/cifar10_complete/0.6_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_7 = np.load("data/cifar10_complete/0.7_" + str(random_seed + t) + '_transition_true.npy')

    N = noisy_labels_1.shape[0]
    K = 10
    R = 100



    print(" ")
    print("################### Noisy Label for Trial " + str(t+1) + " (100 annotators) ###################")

    
    # --------------------------------- low ---------------------------------
    print("--------------------------------- low ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_1[:, :35], noisy_labels_2[:, :35], noisy_labels_3[:, :30]), axis=1)
    transition_true = np.concatenate((transition_true_1[:, :35, :], transition_true_2[:, :35, :], transition_true_3[:, :30, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)

    Num = 1
    folder_name = "data/cifar10_100_1/low_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)

    


    # --------------------------------- mid ---------------------------------
    print("--------------------------------- mid ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_3[:, :35], noisy_labels_4[:, :35], noisy_labels_5[:, :30]), axis=1)
    transition_true = np.concatenate((transition_true_3[:, :35, :], transition_true_4[:, :35, :], transition_true_5[:, :30, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    Num = 1
    folder_name = "data/cifar10_100_1/mid_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)

    


    # --------------------------------- high ---------------------------------
    print("--------------------------------- high ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_5[:, :35], noisy_labels_6[:, :35], noisy_labels_7[:, :30]), axis=1)
    transition_true = np.concatenate((transition_true_5[:, :35, :], transition_true_6[:, :35, :], transition_true_7[:, :30, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    Num = 1
    folder_name = "data/cifar10_100_1/high_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)

    

    

## Cifar 100

### Download

In [None]:
training_data = datasets.CIFAR100(
    root="cifar100",
    train=True,
    download=True,
    transform=transforms.ToTensor()
)
test_data = datasets.CIFAR100(
    root="cifar100",
    train=False,
    download=True,
    transform=transforms.ToTensor()
)

In [None]:
np.save("cifar100/train_images.npy", training_data.data)
np.save("cifar100/train_labels.npy", training_data.targets)

np.save("cifar100/test_images.npy", test_data.data)
np.save("cifar100/test_labels.npy", test_data.targets)

### Generate complete annotations

In [None]:
random_seed = 1

err_rate = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7]

for i in range(7):

    for t in range(5):

        original_images = np.load("data/cifar100/train_images.npy")
        original_labels = np.load("data/cifar100/train_labels.npy")

        data = torch.from_numpy(original_images).float()
        targets = torch.from_numpy(original_labels)
        dataset = zip(data, targets)

        avg_error_rates = [err_rate[i]]*50

        print("################### Noisy Label for Trial " + str(t+1) + f" (err_rate={err_rate[i]}) ###################")
        noisy_label, transition_true = synthetic.get_instance_noisy_label(dataset, targets, num_classes=100, feature_size=3*32*32, tau=avg_error_rates, norm_std=0.1, seed=random_seed+t)

        print('annot:', noisy_label[:2, :])
        print('true label:', original_labels[:2])
        print(" ")

        np.save("data/cifar100_complete/" + str(err_rate[i]) + "_" + str(random_seed + t) + '_noisy_label.npy', noisy_label)
        np.save("data/cifar100_complete/" + str(err_rate[i]) + "_" + str(random_seed + t) + '_transition_true.npy', transition_true)

#### Noisy labels

In [None]:
# 10 annotators

random_seed = 1
original_labels = np.load("data/cifar100/train_labels.npy")


for t in range(5):

    noisy_labels_1 = np.load("data/cifar100_complete/0.1_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_2 = np.load("data/cifar100_complete/0.2_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_3 = np.load("data/cifar100_complete/0.3_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_4 = np.load("data/cifar100_complete/0.4_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_5 = np.load("data/cifar100_complete/0.5_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_6 = np.load("data/cifar100_complete/0.6_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_7 = np.load("data/cifar100_complete/0.7_" + str(random_seed + t) + '_noisy_label.npy')

    transition_true_1 = np.load("data/cifar100_complete/0.1_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_2 = np.load("data/cifar100_complete/0.2_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_3 = np.load("data/cifar100_complete/0.3_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_4 = np.load("data/cifar100_complete/0.4_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_5 = np.load("data/cifar100_complete/0.5_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_6 = np.load("data/cifar100_complete/0.6_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_7 = np.load("data/cifar100_complete/0.7_" + str(random_seed + t) + '_transition_true.npy')

    N = noisy_labels_1.shape[0]
    K = 100
    R = 10


    print(" ")
    print("################### Noisy Label for Trial " + str(t+1) + " (10 annotators) ###################")


    # --------------------------------- low ---------------------------------
    print("--------------------------------- low ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_1[:, :4], noisy_labels_2[:, :4], noisy_labels_3[:, :2]), axis=1)
    transition_true = np.concatenate((transition_true_1[:, :4, :], transition_true_2[:, :4, :], transition_true_3[:, :2, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    # Num = 1
    Num = 1
    folder_name = "data/cifar100_10_1/low_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)

    

    # --------------------------------- mid ---------------------------------
    print("--------------------------------- mid ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_3[:, :4], noisy_labels_4[:, :4], noisy_labels_5[:, :2]), axis=1)
    transition_true = np.concatenate((transition_true_3[:, :4, :], transition_true_4[:, :4, :], transition_true_5[:, :2, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    # Num = 1
    Num = 1
    folder_name = "data/cifar100_10_1/mid_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)


    # --------------------------------- high ---------------------------------
    print("--------------------------------- high ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_5[:, :4], noisy_labels_6[:, :4], noisy_labels_7[:, :2]), axis=1)
    transition_true = np.concatenate((transition_true_5[:, :4, :], transition_true_6[:, :4, :], transition_true_7[:, :2, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    # Num = 1
    Num = 1
    folder_name = "data/cifar100_10_1/high_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)

    

In [None]:
# 30 annotators

random_seed = 1
original_labels = np.load("data/cifar100/train_labels.npy")


for t in range(5):

    noisy_labels_1 = np.load("data/cifar100_complete/0.1_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_2 = np.load("data/cifar100_complete/0.2_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_3 = np.load("data/cifar100_complete/0.3_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_4 = np.load("data/cifar100_complete/0.4_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_5 = np.load("data/cifar100_complete/0.5_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_6 = np.load("data/cifar100_complete/0.6_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_7 = np.load("data/cifar100_complete/0.7_" + str(random_seed + t) + '_noisy_label.npy')

    transition_true_1 = np.load("data/cifar100_complete/0.1_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_2 = np.load("data/cifar100_complete/0.2_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_3 = np.load("data/cifar100_complete/0.3_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_4 = np.load("data/cifar100_complete/0.4_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_5 = np.load("data/cifar100_complete/0.5_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_6 = np.load("data/cifar100_complete/0.6_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_7 = np.load("data/cifar100_complete/0.7_" + str(random_seed + t) + '_transition_true.npy')

    N = noisy_labels_1.shape[0]
    K = 100
    R = 30


    print(" ")
    print("################### Noisy Label for Trial " + str(t+1) + " (30 annotators) ###################")

    
    # --------------------------------- low ---------------------------------
    print("--------------------------------- low ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_1[:, :11], noisy_labels_2[:, :11], noisy_labels_3[:, :8]), axis=1)
    transition_true = np.concatenate((transition_true_1[:, :11, :], transition_true_2[:, :11, :], transition_true_3[:, :8, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    # Num = 1
    Num = 1
    folder_name = "data/cifar100_30_1/low_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)




    # --------------------------------- mid ---------------------------------
    print("--------------------------------- mid ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_3[:, :11], noisy_labels_4[:, :11], noisy_labels_5[:, :8]), axis=1)
    transition_true = np.concatenate((transition_true_3[:, :11, :], transition_true_4[:, :11, :], transition_true_5[:, :8, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    # Num = 1
    Num = 1
    folder_name = "data/cifar100_30_1/mid_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)




    # --------------------------------- high ---------------------------------
    print("--------------------------------- high ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_5[:, :11], noisy_labels_6[:, :11], noisy_labels_7[:, :8]), axis=1)
    transition_true = np.concatenate((transition_true_5[:, :11, :], transition_true_6[:, :11, :], transition_true_7[:, :8, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    # Num = 1
    Num = 1
    folder_name = "data/cifar100_30_1/high_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)

    

In [None]:
# 50 annotators

random_seed = 1
original_labels = np.load("data/cifar100/train_labels.npy")


for t in range(5):

    noisy_labels_1 = np.load("data/cifar100_complete/0.1_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_2 = np.load("data/cifar100_complete/0.2_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_3 = np.load("data/cifar100_complete/0.3_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_4 = np.load("data/cifar100_complete/0.4_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_5 = np.load("data/cifar100_complete/0.5_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_6 = np.load("data/cifar100_complete/0.6_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_7 = np.load("data/cifar100_complete/0.7_" + str(random_seed + t) + '_noisy_label.npy')

    transition_true_1 = np.load("data/cifar100_complete/0.1_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_2 = np.load("data/cifar100_complete/0.2_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_3 = np.load("data/cifar100_complete/0.3_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_4 = np.load("data/cifar100_complete/0.4_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_5 = np.load("data/cifar100_complete/0.5_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_6 = np.load("data/cifar100_complete/0.6_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_7 = np.load("data/cifar100_complete/0.7_" + str(random_seed + t) + '_transition_true.npy')

    N = noisy_labels_1.shape[0]
    K = 100
    R = 50



    print(" ")
    print("################### Noisy Label for Trial " + str(t+1) + " (50 annotators) ###################")

    
    # --------------------------------- low ---------------------------------
    print("--------------------------------- low ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_1[:, :18], noisy_labels_2[:, :18], noisy_labels_3[:, :14]), axis=1)
    transition_true = np.concatenate((transition_true_1[:, :18, :], transition_true_2[:, :18, :], transition_true_3[:, :14, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    # Num = 1
    Num = 1
    folder_name = "data/cifar100_50_1/low_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)




    # --------------------------------- mid ---------------------------------
    print("--------------------------------- mid ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_3[:, :18], noisy_labels_4[:, :18], noisy_labels_5[:, :14]), axis=1)
    transition_true = np.concatenate((transition_true_3[:, :18, :], transition_true_4[:, :18, :], transition_true_5[:, :14, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    # Num = 1
    Num = 1
    folder_name = "data/cifar100_50_1/mid_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)




    # --------------------------------- high ---------------------------------
    print("--------------------------------- high ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_5[:, :18], noisy_labels_6[:, :18], noisy_labels_7[:, :14]), axis=1)
    transition_true = np.concatenate((transition_true_5[:, :18, :], transition_true_6[:, :18, :], transition_true_7[:, :14, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    # Num = 1
    Num = 1
    folder_name = "data/cifar100_50_1/high_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)

    

    

In [None]:
# 100 annotators

random_seed = 1
original_labels = np.load("data/cifar100/train_labels.npy")


for t in range(5):

    noisy_labels_1 = np.load("data/cifar100_complete/0.1_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_2 = np.load("data/cifar100_complete/0.2_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_3 = np.load("data/cifar100_complete/0.3_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_4 = np.load("data/cifar100_complete/0.4_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_5 = np.load("data/cifar100_complete/0.5_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_6 = np.load("data/cifar100_complete/0.6_" + str(random_seed + t) + '_noisy_label.npy')
    noisy_labels_7 = np.load("data/cifar100_complete/0.7_" + str(random_seed + t) + '_noisy_label.npy')

    transition_true_1 = np.load("data/cifar100_complete/0.1_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_2 = np.load("data/cifar100_complete/0.2_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_3 = np.load("data/cifar100_complete/0.3_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_4 = np.load("data/cifar100_complete/0.4_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_5 = np.load("data/cifar100_complete/0.5_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_6 = np.load("data/cifar100_complete/0.6_" + str(random_seed + t) + '_transition_true.npy')
    transition_true_7 = np.load("data/cifar100_complete/0.7_" + str(random_seed + t) + '_transition_true.npy')

    N = noisy_labels_1.shape[0]
    K = 100
    R = 100



    print(" ")
    print("################### Noisy Label for Trial " + str(t+1) + " (100 annotators) ###################")

    
    # --------------------------------- low ---------------------------------
    print("--------------------------------- low ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_1[:, :35], noisy_labels_2[:, :35], noisy_labels_3[:, :30]), axis=1)
    transition_true = np.concatenate((transition_true_1[:, :35, :], transition_true_2[:, :35, :], transition_true_3[:, :30, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    # Num = 1
    Num = 1
    folder_name = "data/cifar100_100_1/low_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)

    


    # --------------------------------- mid ---------------------------------
    print("--------------------------------- mid ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_3[:, :35], noisy_labels_4[:, :35], noisy_labels_5[:, :30]), axis=1)
    transition_true = np.concatenate((transition_true_3[:, :35, :], transition_true_4[:, :35, :], transition_true_5[:, :30, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    # Num = 1
    Num = 1
    folder_name = "data/cifar100_100_1/mid_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)

    


    # --------------------------------- high ---------------------------------
    print("--------------------------------- high ---------------------------------")

    noisy_labels = np.concatenate((noisy_labels_5[:, :35], noisy_labels_6[:, :35], noisy_labels_7[:, :30]), axis=1)
    transition_true = np.concatenate((transition_true_5[:, :35, :], transition_true_6[:, :35, :], transition_true_7[:, :30, :]), axis=1)
    print('Shape of noisy annotations:', noisy_labels.shape)
    print('Shape of transition matrix:', transition_true.shape)


    # Num = 1
    Num = 1
    folder_name = "data/cifar100_100_1/high_"

    annotations, noisy_label = synthetic.incomplete_labeling(noisy_labels, R=R, seed=random_seed+t, annot_num=Num)
    print('annot:', annotations[:2, :])
    print('noist label:', noisy_label[:20])
    print('true label:', original_labels[:20])
    print('noise rate:', (noisy_label != original_labels).sum() / N)
    print(" ")

    np.save(folder_name + str(random_seed + t) + '_annotations.npy', annotations)
    np.save(folder_name + str(random_seed + t) + '_noisy_label.npy', noisy_label)
    np.save(folder_name + str(random_seed + t) + '_transition_true.npy', transition_true)

    

    

## LabelMe