In [1]:
import numpy as np
import pandas as pd
from scipy.io import arff

from sklearn.model_selection import train_test_split
from cleanlab.benchmarking.noise_generation import (
    generate_noise_matrix_from_trace,
    generate_noisy_labels,
)
from cleanlab.multiannotator import get_majority_vote_label

In [2]:
SEED = 1234

In [3]:
def get_synthetic_multiannotator_labels(
    true_labels,
    noise_rate=0.85,
    num_annotators=30,
):
    n = len(true_labels)
    m = len(np.unique(true_labels))  # num classes
    py = np.bincount(true_labels) / float(len(true_labels))

    noise_matrix = generate_noise_matrix_from_trace(
        m,
        trace=noise_rate * m,
        py=py,
        valid_noise_matrix=True,
        seed=SEED,
    )

    multiannotator_labels = np.vstack(
        [
            generate_noisy_labels(true_labels, noise_matrix)
            for i in range(num_annotators)
        ]
    ).transpose()

    return multiannotator_labels

In [4]:
def get_synthetic_labels(
    true_labels,
    noise_rate=0.85,
):
    n = len(true_labels)
    m = len(np.unique(true_labels))  # num classes
    py = np.bincount(true_labels) / float(len(true_labels))

    noise_matrix = generate_noise_matrix_from_trace(
        m,
        trace=noise_rate * m,
        py=py,
        valid_noise_matrix=True,
        seed=SEED,
    )

    labels = generate_noisy_labels(true_labels, noise_matrix)

    return labels

In [5]:
# Returns sample labels/error_mask/annotator_mask where x_drop, y_drop are idxs that are dropped
def get_sample_labels(x_drop, y_drop, labels, annotator_mask):
    s_annotator_mask = annotator_mask.copy()
    s_annotator_mask[(x_drop, y_drop)] = 0
    s_labels = labels.copy().astype("float64")
    np.copyto(s_labels, np.nan, where=(s_annotator_mask == 0))
    # print("Total idxs dropped: ", annotator_mask.sum() - s_annotator_mask.sum())
    return s_labels, s_annotator_mask


# Returns a list of labeled indices to drop
def get_random_drop_per_row_min_annotators(annotator_mask, max_annotations=5):
    x, y = np.where(annotator_mask == 1)
    xy = set([(x_idx, y_idx) for x_idx, y_idx in zip(x, y)])
    idx_df = pd.DataFrame(zip(x, y), columns=["x", "y"])
    idx_keep = []
    for x_idx in range(idx_df["x"].max() + 1):
        Y = idx_df[idx_df["x"] == x_idx]["y"]
        y_keep = np.random.choice(list(y), max_annotations, replace=False)
        xy_keep = [(x_idx, y) for y in y_keep]
        idx_keep.extend(xy_keep)
    xy = xy.difference(set(idx_keep))
    x_drop = [xy_idx[0] for xy_idx in xy]
    y_drop = [xy_idx[1] for xy_idx in xy]
    return x_drop, y_drop


def get_random_drop_per_row(annotator_mask):
    x, y = np.where(annotator_mask == 1)
    idx_df = pd.DataFrame(zip(x, y), columns=["x", "y"])
    for x_idx in range(idx_df["x"].max() + 1):
        num_drop = np.random.randint(1, len(idx_df[idx_df["x"] == x_idx]) + 1)
        idx_df = idx_df.drop(idx_df[idx_df["x"] == x_idx].sample(num_drop).index)
    x_drop = idx_df["x"].values
    y_drop = idx_df["y"].values
    return x_drop, y_drop


def get_least_annotations(annotator_mask):
    annotations_per_example = annotator_mask.sum(axis=1)
    annotations_per_annotator = annotator_mask.sum(axis=0)

    temp_mask = annotator_mask.copy()

    for ex in range(temp_mask.shape[0]):
        if (
            annotations_per_example[ex] < 2
        ):  # ignore dropping when there are very little annotations
            continue

        annotators = np.where(temp_mask[ex] == 1)[0]  # annotators for example
        drop_y = np.random.choice(annotators, len(annotators) - 1, replace=False)

        for y in drop_y:
            if np.random.uniform() > 0.03:
                x = np.where(temp_mask[:, y] == 1)[
                    0
                ]  # annotations for annotator y for all examples
                x = np.setdiff1d(
                    x, np.array([ex])
                )  # annotations for annotator y for all examples minus curent example
                if (
                    annotations_per_example[x].max() > 2
                ):  # number of total annotations by our annotator
                    temp_mask[ex][y] = 0
                    annotations_per_annotator[y] -= 1
                    annotations_per_example[ex] -= 1

    x_drop, y_drop = np.where(temp_mask == 0)

    return x_drop, y_drop

In [6]:
def create_data(noise_rate):
    wallrobot = pd.read_csv("data/wall_robot_subset.csv", index_col=0)
    num_annotators = 30

    X = wallrobot.loc[:, wallrobot.columns != "class"].to_numpy()
    y = wallrobot["class"].to_numpy()

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=1 / 3, random_state=SEED
    )
    multiannotator_labels = get_synthetic_multiannotator_labels(
        y_train, noise_rate=noise_rate, num_annotators=num_annotators
    )
    extra_multiannotator_labels = get_synthetic_multiannotator_labels(
        y_train, noise_rate=noise_rate, num_annotators=num_annotators
    )

    consensus_label = get_majority_vote_label(multiannotator_labels)

    single_labels = get_synthetic_labels(y_train, noise_rate=noise_rate)
    extra_single_labels = get_synthetic_labels(y_train, noise_rate=noise_rate)

    (
        X_labeled,
        X_unlabeled,
        y_labeled,
        y_unlabeled,
        multiannotator_labeled,
        multiannotator_unlabeled,
        single_labeled,
        single_unlabeled,
    ) = train_test_split(
        X_train,
        y_train,
        multiannotator_labels,
        single_labels,
        test_size=0.75,
        random_state=SEED,
    )

    annotator_mask = np.full(multiannotator_labeled.shape, 1)

    x_drop, y_drop = get_least_annotations(annotator_mask)
    multiannotator_labels, annotator_mask = get_sample_labels(
        x_drop, y_drop, multiannotator_labeled, annotator_mask
    )

    consensus_label = get_majority_vote_label(multiannotator_labels)
    single_labeled = consensus_label

    # print(pd.DataFrame(multiannotator_labels).count(axis=1).value_counts())

    # Check if dataset creation conditions are met
    for col in range(annotator_mask.shape[1]):
        annotator = np.where(annotator_mask[:, col] == 1)[0]
        intersects = False
        for j in range(annotator_mask.shape[1]):
            if j == col:
                continue
            annotator2 = np.where(annotator_mask[:, j] == 1)[0]
            if len(np.intersect1d(annotator, annotator2)) > 0:
                intersects = True
        if not intersects:
            print(f"annotator {col} does not intersect with any other annotator")

    anno_acc = pd.DataFrame(multiannotator_labels).apply(
        lambda s: np.mean(s[pd.notna(s)] == y_labeled[pd.notna(s)])
    )
    print(f"min accuracy = {np.min(anno_acc)}")
    print(f"max accuracy = {np.max(anno_acc)}")
    print(f"avg accuracy = {np.mean(anno_acc)}")

    consensus_label = get_majority_vote_label(multiannotator_labels)
    accuracy = np.mean(consensus_label == y_labeled)
    print(f"base consensus accuracy = {accuracy}")

    accuracy = np.mean(
        pd.DataFrame(multiannotator_unlabeled).apply(
            lambda s: np.mean(s == y_unlabeled)
        )
    )
    print(f"base extra label accuracy = {accuracy}")

    accuracy = np.mean(single_labeled == y_labeled)
    print(f"base single label accuracy = {accuracy}")

    # accuracy = np.mean(single_unlabeled == y_unlabeled)
    # print(f"base extra single label accuracy = {accuracy}")

    extra_labels_labeled = multiannotator_labeled
    extra_labels_unlabeled = multiannotator_unlabeled

    return (
        X_labeled,
        X_unlabeled,
        X_test,
        y_labeled,
        y_unlabeled,
        y_test,
        multiannotator_labels,
        single_labeled,
        extra_labels_labeled,
        extra_labels_unlabeled,
    )

In [7]:
for noise_rate in [0.6, 0.7, 0.8, 0.9, 1.0]:
    
    dirname = f"data/{noise_rate}"
    !mkdir -p $dirname

    (
        X_labeled,
        X_unlabeled,
        X_test,
        y_labeled,
        y_unlabeled,
        y_test,
        multiannotator_labels,
        single_labeled,
        extra_labels_labeled,
        extra_labels_unlabeled,
    ) = create_data(noise_rate)

    np.save(f"data/{noise_rate}/X_labeled.npy", X_labeled)
    np.save(f"data/{noise_rate}/X_unlabeled.npy", X_unlabeled)
    np.save(f"data/{noise_rate}/X_test.npy", X_test)

    np.save(f"data/{noise_rate}/true_labels_labeled.npy", y_labeled)
    np.save(f"data/{noise_rate}/true_labels_unlabeled.npy", y_unlabeled)
    np.save(f"data/{noise_rate}/true_labels_test.npy", y_test)

    np.save(f"data/{noise_rate}/multiannotator_labels_labeled.npy", multiannotator_labels)
    np.save(f"data/{noise_rate}/single_labels_labeled.npy", single_labeled)
    np.save(f"data/{noise_rate}/extra_labels_labeled.npy", extra_labels_labeled)
    np.save(f"data/{noise_rate}/extra_labels_unlabeled.npy", extra_labels_unlabeled)

    print()

min accuracy = 0.5384615384615384
max accuracy = 0.8
avg accuracy = 0.6739903977817376
base consensus accuracy = 0.71
base extra label accuracy = 0.6675555555555556
base single label accuracy = 0.71

min accuracy = 0.5769230769230769
max accuracy = 0.9
avg accuracy = 0.7648264686582015
base consensus accuracy = 0.818
base extra label accuracy = 0.7662666666666667
base single label accuracy = 0.818

min accuracy = 0.6923076923076923
max accuracy = 0.9655172413793104
avg accuracy = 0.8665210294828466
base consensus accuracy = 0.896
base extra label accuracy = 0.8635333333333332
base single label accuracy = 0.896

min accuracy = 0.7586206896551724
max accuracy = 1.0
avg accuracy = 0.9373028852681975
base consensus accuracy = 0.95
base extra label accuracy = 0.9301333333333335
base single label accuracy = 0.95

min accuracy = 1.0
max accuracy = 1.0
avg accuracy = 1.0
base consensus accuracy = 1.0
base extra label accuracy = 1.0
base single label accuracy = 1.0

