In [None]:
import sys

sys.path.append("../src")
import logging

logging.getLogger().setLevel(logging.INFO)
import math
import os
import pickle

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
import torch
import torchvision.datasets as dset
import torchvision.transforms as transforms
from PIL import Image
from sklearn.metrics import classification_report
from sklearn.metrics.pairwise import rbf_kernel
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from transformers import CLIPModel, CLIPProcessor

print(torch.cuda.is_available())
import datetime
import os
import pickle
import sys

import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import torch
from datasets_hai.bdd import *
from datasets_hai.coco import *
from datasets_hai.dynasent import *
from datasets_hai.gaussians import *
from datasets_hai.mmlu import *

# local imports
from teacher_methods.teacher_domino import *
from teacher_methods.teacher_gen import *
from teacher_methods.teacher_kmeans import *
from teacher_methods.teacher_selection import *
from utils.metrics_hai import *
from utils.utils import *

In [None]:
alpha = 0.0
beta_high = 0.5
beta_low = 0.00
delta = 2
randomized_sampling = 1
parallel_processes = 1
kernel = rbf_kernel # K(.,.)
kernel = rbf_kernel # K(.,.)
metric_y = loss_01
initialization_epochs = 200
initialization_restarts = 20
TOTAL_TRIALS = 5
DATA_SIZES = [5,7,10,12,15,17,20]#,7,9,11,15,17,20]

# BDD

In [None]:
date_now = datetime.datetime.now()
date_now = date_now.strftime("%Y-%m-%d_%H%M%S")

In [None]:
dataset = pickle.load(open("../data/cleaned_pkl/bdd_dataset.pkl","rb"))
data_blurry = pickle.load(open('../data/cleaned_pkl/data_blur_bdd.pkl','rb'))
dataset.ai_preds = data_blurry['preds']

In [None]:
# split data into 10% train and 90% test, get ids
data_size_n = len(dataset.data_y)
# PARAMETERS FOR ALGORITHMS
epochs = 2000
lr = 0.001

dataset.metric_y = loss_01


metrics_gen_train = []
metrics_domino_train = []
metrics_kmeans_train = []
metrics_selec_train = []

metrics_gen_test = []
metrics_domino_test = []
metrics_kmeans_test = []
metrics_selec_test = []

metrics_human_alone = []
metrics_ai_alone = []


max_teaching_points = max(DATA_SIZES)

for trial in range(TOTAL_TRIALS):
    data_ids = list(range(0, len(dataset.data_y)))
    data_train_ids, data_test_ids = train_test_split(data_ids, test_size=0.3)
    hum_preds = np.zeros(data_size_n)
    # human is right 80% of the time
    for i in range(data_size_n):
        if dataset.data_y[i] == 1:
            hum_preds[i] = np.random.choice([0, 1], p=[0.2, 0.8])
        else:
            hum_preds[i] = np.random.choice([0, 1], p=[0.8, 0.2])

    dataset.hum_preds = hum_preds
    prior_rejector_preds = np.array(
        [np.random.choice([0, 1], p=[0.5, 0.5]) for i in range(len(dataset.data_y))]
    )

    teacher_gen = TeacherGenerative(
        dataset.data_x[data_train_ids],
        dataset.data_y[data_train_ids],
        dataset.hum_preds[data_train_ids],
        dataset.ai_preds[data_train_ids],
        prior_rejector_preds[data_train_ids],
        metric_y,
        max_teaching_points,
        alpha,
        beta_high,
        beta_low,
        delta,
    )
    teacher_gen.epochs = epochs
    teacher_gen.lr = lr
    teacher_gen.initialization_restarts = initialization_restarts
    teacher_gen.initialization_epochs = initialization_epochs
    teacher_gen.fit()

    teacher_selection = TeacherSelective(
        dataset.data_x[data_train_ids],
        dataset.data_y[data_train_ids],
        dataset.hum_preds[data_train_ids],
        dataset.ai_preds[data_train_ids],
        prior_rejector_preds[data_train_ids],
        kernel,
        metric_y,
        max_teaching_points,
        alpha,
        beta_high,
        beta_low,
        randomized_sampling,
        delta,
        parallel_processes,
    )
    print("SELECTION")
    teacher_selection.randomized_sampling = randomized_sampling
    teacher_selection.parallel_processes = parallel_processes
    teacher_selection.fit()

    saved_selec_points = copy.deepcopy(teacher_selection.teaching_set)
    saved_gen_points = copy.deepcopy(teacher_gen.teaching_set)
    # fit generative and selection
    metric_gen_train_trial = []
    metric_selec_train_trial = []
    metric_gen_test_trial = []
    metric_selec_test_trial = []
    metric_domino_train_trial = []
    metric_domino_test_trial = []
    metric_kmeans_train_trial = []
    metric_kmeans_test_trial = []
    metrics_human_alone_trial = []
    metrics_ai_alone_trial = []

    for data_size in DATA_SIZES:
        try:
            teacher_domino = TeacherDomino(
                dataset.data_x[data_train_ids],
                dataset.data_y[data_train_ids],
                dataset.hum_preds[data_train_ids],
                dataset.ai_preds[data_train_ids],
                dataset.ai_scores[data_train_ids],
                metric_y,
                n_pca_components=None,
                n_mixture_components=data_size,
                teaching_points=data_size,
            )
            teacher_domino.fit()
        except:
            print("Domino failed")
            teacher_domino = TeacherDomino(
                dataset.data_x[data_train_ids],
                dataset.data_y[data_train_ids],
                dataset.hum_preds[data_train_ids],
                dataset.ai_preds[data_train_ids],
                dataset.ai_scores[data_train_ids],
                metric_y,
                n_pca_components=None,
                n_mixture_components=50,
                teaching_points=data_size,
            )
            teacher_domino.fit()
        teacher_kmeans = TeacherKmeans(
            dataset.data_x[data_train_ids],
            dataset.data_y[data_train_ids],
            dataset.hum_preds[data_train_ids],
            dataset.ai_preds[data_train_ids],
            metric_y,
            data_size,
        )
        print("Domino")
        teacher_domino.fit()
        teacher_kmeans.fit()
        teacher_selection.teaching_set = saved_selec_points[:data_size]
        teacher_gen.teaching_set = saved_gen_points[:data_size]
        # get eval results
        # TRAIN FIRST
        domino_defer_preds = teacher_domino.get_defer_preds(
            dataset.data_x[data_train_ids]
        )
        domino_loss = compute_metrics(
            dataset.hum_preds[data_train_ids],
            dataset.ai_preds[data_train_ids],
            domino_defer_preds,
            dataset.data_y[data_train_ids],
            metric_y,
        )
        kmeans_defer_preds = teacher_kmeans.get_defer_preds(
            dataset.data_x[data_train_ids]
        )
        kmeans_loss = compute_metrics(
            dataset.hum_preds[data_train_ids],
            dataset.ai_preds[data_train_ids],
            kmeans_defer_preds,
            dataset.data_y[data_train_ids],
            metric_y,
        )
        # trim selection and gen teaching sets

        selection_defer_preds = teacher_selection.get_defer_preds(
            dataset.data_x[data_train_ids], prior_rejector_preds[data_train_ids]
        )
        selection_loss = compute_metrics(
            dataset.hum_preds[data_train_ids],
            dataset.ai_preds[data_train_ids],
            selection_defer_preds,
            dataset.data_y[data_train_ids],
            metric_y,
        )
        gen_defer_preds = teacher_gen.get_defer_preds(
            dataset.data_x[data_train_ids], prior_rejector_preds[data_train_ids]
        )
        gen_loss = compute_metrics(
            dataset.hum_preds[data_train_ids],
            dataset.ai_preds[data_train_ids],
            gen_defer_preds,
            dataset.data_y[data_train_ids],
            metric_y,
        )
        human_only = compute_metrics(
            dataset.hum_preds[data_train_ids],
            dataset.ai_preds[data_train_ids],
            np.zeros(len(data_train_ids)),
            dataset.data_y[data_train_ids],
            metric_y,
        )
        ai_only = compute_metrics(
            dataset.hum_preds[data_train_ids],
            dataset.ai_preds[data_train_ids],
            np.ones(len(data_train_ids)),
            dataset.data_y[data_train_ids],
            metric_y,
        )
        metric_gen_train_trial.append(gen_loss)
        metric_selec_train_trial.append(selection_loss)
        metric_domino_train_trial.append(domino_loss)
        metric_kmeans_train_trial.append(kmeans_loss)
        metrics_human_alone_trial.append(human_only)
        metrics_ai_alone_trial.append(ai_only)
        # TEST, same code but use data_test_ids
        domino_defer_preds = teacher_domino.get_defer_preds(
            dataset.data_x[data_test_ids]
        )
        domino_loss = compute_metrics(
            dataset.hum_preds[data_test_ids],
            dataset.ai_preds[data_test_ids],
            domino_defer_preds,
            dataset.data_y[data_test_ids],
            metric_y,
        )
        kmeans_defer_preds = teacher_kmeans.get_defer_preds(
            dataset.data_x[data_test_ids]
        )
        kmeans_loss = compute_metrics(
            dataset.hum_preds[data_test_ids],
            dataset.ai_preds[data_test_ids],
            kmeans_defer_preds,
            dataset.data_y[data_test_ids],
            metric_y,
        )
        selection_defer_preds = teacher_selection.get_defer_preds(
            dataset.data_x[data_test_ids], prior_rejector_preds[data_test_ids]
        )
        selection_loss = compute_metrics(
            dataset.hum_preds[data_test_ids],
            dataset.ai_preds[data_test_ids],
            selection_defer_preds,
            dataset.data_y[data_test_ids],
            metric_y,
        )
        gen_defer_preds = teacher_gen.get_defer_preds(
            dataset.data_x[data_test_ids], prior_rejector_preds[data_test_ids]
        )
        gen_loss = compute_metrics(
            dataset.hum_preds[data_test_ids],
            dataset.ai_preds[data_test_ids],
            gen_defer_preds,
            dataset.data_y[data_test_ids],
            metric_y,
        )
        human_only = compute_metrics(
            dataset.hum_preds[data_test_ids],
            dataset.ai_preds[data_test_ids],
            np.zeros(len(data_test_ids)),
            dataset.data_y[data_test_ids],
            metric_y,
        )
        ai_only = compute_metrics(
            dataset.hum_preds[data_test_ids],
            dataset.ai_preds[data_test_ids],
            np.ones(len(data_test_ids)),
            dataset.data_y[data_test_ids],
            metric_y,
        )
        metric_gen_test_trial.append(gen_loss)
        metric_selec_test_trial.append(selection_loss)
        metric_domino_test_trial.append(domino_loss)
        metric_kmeans_test_trial.append(kmeans_loss)

    metrics_human_alone.append(metrics_human_alone_trial)
    metrics_ai_alone.append(metrics_ai_alone_trial)
    metrics_gen_train.append(metric_gen_train_trial)
    metrics_selec_train.append(metric_selec_train_trial)
    metrics_domino_train.append(metric_domino_train_trial)
    metrics_kmeans_train.append(metric_kmeans_train_trial)
    metrics_gen_test.append(metric_gen_test_trial)
    metrics_selec_test.append(metric_selec_test_trial)
    metrics_domino_test.append(metric_domino_test_trial)
    metrics_kmeans_test.append(metric_kmeans_test_trial)
    data_save = {}
    data_save["metrics_human_alone"] = metrics_human_alone
    data_save["metrics_ai_alone"] = metrics_ai_alone
    data_save["metrics_gen_train"] = metrics_gen_train
    data_save["metrics_selec_train"] = metrics_selec_train
    data_save["metrics_domino_train"] = metrics_domino_train
    data_save["metrics_kmeans_train"] = metrics_kmeans_train
    data_save["metrics_gen_test"] = metrics_gen_test
    data_save["metrics_selec_test"] = metrics_selec_test
    data_save["metrics_domino_test"] = metrics_domino_test
    data_save["metrics_kmeans_test"] = metrics_kmeans_test
    data_save["date"] = datetime.datetime.now()
    data_save["TOTAL_TRIALS"] = TOTAL_TRIALS
    data_save["DATA_SIZES"] = DATA_SIZES
    with open("../exp_data/results/bdd_real_" + date_now + ".pkl", "wb") as f:
        pickle.dump(data_save, f)
    print("trial", trial, "done")

In [None]:

sys.path.append("../")
#matplotlib.rcParams['pdf.fonttype'] = 42
#matplotlib.rcParams['ps.fonttype'] = 42
#plt.rc('text', usetex=True)
#plt.rc('font', family='serif')

alpha_overlap = 0.5

# printing from a pickle file
max_trials = TOTAL_TRIALS
ns = DATA_SIZES


avgs_rand = [np.average([metrics_gen_train[triall][i][1]['score']   for triall in range(max_trials)]) for i in range(len(ns))]
stds_rand = [np.std([metrics_gen_train[triall][i][1]['score']     for triall in range(max_trials)]) for i in range(len(ns))]
plt.errorbar(ns,  avgs_rand, yerr=stds_rand, marker = "d",  label=f'GEN (ours)', alpha =alpha_overlap, color = "black")

avgs_rand = [np.average([metrics_domino_train[triall][i][1]['score']   for triall in range(max_trials)]) for i in range(len(ns))]
stds_rand = [np.std([metrics_domino_train[triall][i][1]['score']  for triall in range(max_trials)]) for i in range(len(ns))]
plt.errorbar(ns,  avgs_rand, yerr=stds_rand, marker = "o",  label=f'DOMINO', alpha =alpha_overlap, color = "red")

avgs_rand = [np.average([metrics_kmeans_train[triall][i][1]['score']   for triall in range(max_trials)]) for i in range(len(ns))]
stds_rand = [np.std([metrics_kmeans_train[triall][i][1]['score']  for triall in range(max_trials)]) for i in range(len(ns))]
plt.errorbar(ns,  avgs_rand, yerr=stds_rand, marker = "x",  label=f'KMEANS', alpha =alpha_overlap, color = "blue")


avgs_rand = [np.average([metrics_selec_train[triall][i][1]['score']     for triall in range(max_trials)]) for i in range(len(ns))]
stds_rand = [np.std([metrics_selec_train[triall][i][1]['score']     for triall in range(max_trials)]) for i in range(len(ns))]
plt.errorbar(ns,  avgs_rand, yerr=stds_rand, marker = "<",  label=f'SELECT', alpha =alpha_overlap, color = "green")


ax = plt.gca()
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.get_xaxis().tick_bottom()    
ax.get_yaxis().tick_left()   
plt.grid()
plt.legend(fontsize='xx-large')
plt.legend()
plt.ylabel('Train Loss ', fontsize='xx-large')
plt.xlabel('Training data size', fontsize='xx-large')
fig_size = plt.rcParams["figure.figsize"]
fig_size[0] = 6
fig_size[1] = 4.2

#plt.savefig("../exp_data/plots/plot_synth_data_realizable_"+ date_now+".pdf", dpi = 1000, bbox_inches='tight')
#plt.show()
# 

In [None]:

alpha_overlap = 0.5



# printing from a pickle file
max_trials = TOTAL_TRIALS
ns = DATA_SIZES


avgs_rand = [np.average([metrics_gen_test[triall][i][1]['score']   for triall in range(max_trials)]) for i in range(len(ns))]
stds_rand = [np.std([metrics_gen_test[triall][i][1]['score']     for triall in range(max_trials)]) for i in range(len(ns))]
plt.errorbar(ns,  avgs_rand, yerr=stds_rand, marker = "d",  label=f'GEN (ours)', alpha =alpha_overlap, color = "black")

avgs_rand = [np.average([metrics_domino_test[triall][i][1]['score']   for triall in range(max_trials)]) for i in range(len(ns))]
stds_rand = [np.std([metrics_domino_test[triall][i][1]['score']  for triall in range(max_trials)]) for i in range(len(ns))]
plt.errorbar(ns,  avgs_rand, yerr=stds_rand, marker = "o",  label=f'DOMINO', alpha =alpha_overlap, color = "red")

avgs_rand = [np.average([metrics_kmeans_test[triall][i][1]['score']   for triall in range(max_trials)]) for i in range(len(ns))]
stds_rand = [np.std([metrics_kmeans_test[triall][i][1]['score']  for triall in range(max_trials)]) for i in range(len(ns))]
plt.errorbar(ns,  avgs_rand, yerr=stds_rand, marker = "x",  label=f'KMEANS', alpha =alpha_overlap, color = "blue")


avgs_rand = [np.average([metrics_selec_test[triall][i][1]['score']     for triall in range(max_trials)]) for i in range(len(ns))]
stds_rand = [np.std([metrics_selec_test[triall][i][1]['score']     for triall in range(max_trials)]) for i in range(len(ns))]
plt.errorbar(ns,  avgs_rand, yerr=stds_rand, marker = "<",  label=f'SELECT', alpha =alpha_overlap, color = "green")





ax = plt.gca()
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.get_xaxis().tick_bottom()    
ax.get_yaxis().tick_left()   
plt.grid()
plt.legend(fontsize='xx-large')
plt.legend()
plt.ylabel('Test Loss ', fontsize='xx-large')
plt.xlabel('Training data size', fontsize='xx-large')
fig_size = plt.rcParams["figure.figsize"]
fig_size[0] = 6
fig_size[1] = 4.2



# MMLU

In [None]:
date_now = datetime.datetime.now()
date_now = date_now.strftime("%Y-%m-%d_%H%M%S")

In [None]:
dataset =  pickle.load(open('../data/cleaned_pkl/mmlu_dataset.pkl',"rb"))


In [None]:
# split data into 10% train and 90% test, get ids
data_size_n = len(dataset.data_y)
# PARAMETERS FOR ALGORITHMS
lr = 0.001
epochs = 2000
metrics_gen_train = []
metrics_domino_train = []
metrics_kmeans_train = []
metrics_selec_train = []

metrics_gen_test = []
metrics_domino_test = []
metrics_kmeans_test = []
metrics_selec_test = []

metrics_human_alone = []
metrics_ai_alone = []


max_teaching_points = max(DATA_SIZES)

for trial in range(TOTAL_TRIALS):
    data_ids = list(range(0,len(dataset.data_y) ))
    data_train_ids, data_test_ids = train_test_split(data_ids, test_size=0.3)
    hum_preds = np.zeros(data_size_n)
    # human is right 80% of the time
    hum_preds = np.empty(data_size_n, dtype=int)

    for i in range(len(dataset.data_y)):
        current_value = dataset.data_y[i]
        # 50% chance to match the current_value
        if np.random.rand() < 0.5:
            hum_preds[i] = current_value
        else:
            # Choose a random value excluding the current_value
            possible_values = [v for v in [0, 1, 2, 3] if v != current_value]
            hum_preds[i] = np.random.choice(possible_values)
            
    dataset.hum_preds = hum_preds
    prior_rejector_preds = np.array([np.random.choice([0,1], p=[0.5,0.5]) for i in range(len(dataset.data_y))])
    
    teacher_gen = TeacherGenerative(dataset.data_x[data_train_ids], dataset.data_y[data_train_ids], dataset.hum_preds[data_train_ids], dataset.ai_preds[data_train_ids],
                                    prior_rejector_preds[data_train_ids], metric_y, max_teaching_points, alpha, beta_high, beta_low, delta)
    teacher_gen.epochs = epochs
    teacher_gen.lr = lr
    teacher_gen.initialization_restarts = initialization_restarts
    teacher_gen.initialization_epochs = initialization_epochs
    teacher_gen.fit()

    teacher_selection = TeacherSelective(dataset.data_x[data_train_ids], dataset.data_y[data_train_ids], dataset.hum_preds[data_train_ids], dataset.ai_preds[data_train_ids],
                                         prior_rejector_preds[data_train_ids], kernel, metric_y, max_teaching_points, alpha, beta_high, beta_low, randomized_sampling, delta, parallel_processes)
    print("SELECTION")
    teacher_selection.randomized_sampling = randomized_sampling
    teacher_selection.parallel_processes = parallel_processes
    teacher_selection.fit()

    saved_selec_points = copy.deepcopy(teacher_selection.teaching_set)
    saved_gen_points = copy.deepcopy(teacher_gen.teaching_set)
    # fit generative and selection
    metric_gen_train_trial = []
    metric_selec_train_trial = []
    metric_gen_test_trial = []
    metric_selec_test_trial = []
    metric_domino_train_trial = []
    metric_domino_test_trial = []
    metric_kmeans_train_trial = []
    metric_kmeans_test_trial = []
    metrics_human_alone_trial = []
    metrics_ai_alone_trial = []

    for data_size in DATA_SIZES:
        data_y_one_hot = np.zeros((len(data_train_ids), 4))
        data_y_one_hot[np.arange(len(data_train_ids)), dataset.data_y[data_train_ids]] = 1
        ai_preds_one_hot = np.zeros((len(data_train_ids), 4))
        ai_preds_one_hot[np.arange(len(data_train_ids)), dataset.ai_preds[data_train_ids]] = 1
        hum_preds_one_hot = np.zeros((len(data_train_ids), 4))
        dataset.hum_preds = dataset.hum_preds.astype(int)
        hum_preds_one_hot[np.arange(len(data_train_ids)), dataset.hum_preds[data_train_ids]] = 1
        try:
            teacher_domino = TeacherDomino(dataset.data_x[data_train_ids], data_y_one_hot,hum_preds_one_hot,ai_preds_one_hot, ai_preds_one_hot, metric_y, n_pca_components = None, n_mixture_components = data_size , teaching_points = data_size)
            teacher_domino.fit()
        except:
            print("Domino failed")
            teacher_domino = TeacherDomino(dataset.data_x[data_train_ids], data_y_one_hot,hum_preds_one_hot,ai_preds_one_hot, ai_preds_one_hot, metric_y, n_pca_components = None, n_mixture_components = 50 , teaching_points = data_size)
            teacher_domino.fit()          
        teacher_kmeans = TeacherKmeans(dataset.data_x[data_train_ids], dataset.data_y[data_train_ids], dataset.hum_preds[data_train_ids], dataset.ai_preds[data_train_ids], metric_y, data_size)
        print("Domino")
        teacher_domino.fit()
        teacher_kmeans.fit()
        teacher_selection.teaching_set = saved_selec_points[:data_size]
        teacher_gen.teaching_set = saved_gen_points[:data_size]
        # get eval results
        # TRAIN FIRST
        domino_defer_preds = teacher_domino.get_defer_preds(dataset.data_x[data_train_ids])
        domino_loss = compute_metrics( dataset.hum_preds[data_train_ids], dataset.ai_preds[data_train_ids], domino_defer_preds, dataset.data_y[data_train_ids], metric_y)
        kmeans_defer_preds = teacher_kmeans.get_defer_preds(dataset.data_x[data_train_ids])
        kmeans_loss = compute_metrics( dataset.hum_preds[data_train_ids], dataset.ai_preds[data_train_ids], kmeans_defer_preds, dataset.data_y[data_train_ids], metric_y)
        # trim selection and gen teaching sets

        selection_defer_preds = teacher_selection.get_defer_preds(dataset.data_x[data_train_ids], prior_rejector_preds[data_train_ids])
        selection_loss = compute_metrics( dataset.hum_preds[data_train_ids], dataset.ai_preds[data_train_ids], selection_defer_preds, dataset.data_y[data_train_ids], metric_y)
        gen_defer_preds = teacher_gen.get_defer_preds(dataset.data_x[data_train_ids], prior_rejector_preds[data_train_ids])
        gen_loss = compute_metrics( dataset.hum_preds[data_train_ids], dataset.ai_preds[data_train_ids], gen_defer_preds, dataset.data_y[data_train_ids], metric_y)
        human_only = compute_metrics( dataset.hum_preds[data_train_ids], dataset.ai_preds[data_train_ids], np.zeros(len(data_train_ids)), dataset.data_y[data_train_ids], metric_y)
        ai_only = compute_metrics( dataset.hum_preds[data_train_ids], dataset.ai_preds[data_train_ids], np.ones(len(data_train_ids)), dataset.data_y[data_train_ids], metric_y)
        metric_gen_train_trial.append(gen_loss)
        metric_selec_train_trial.append(selection_loss)
        metric_domino_train_trial.append(domino_loss)
        metric_kmeans_train_trial.append(kmeans_loss)
        metrics_human_alone_trial.append(human_only)
        metrics_ai_alone_trial.append(ai_only)
        # TEST, same code but use data_test_ids
        domino_defer_preds = teacher_domino.get_defer_preds(dataset.data_x[data_test_ids])
        domino_loss = compute_metrics( dataset.hum_preds[data_test_ids], dataset.ai_preds[data_test_ids], domino_defer_preds, dataset.data_y[data_test_ids], metric_y)
        kmeans_defer_preds = teacher_kmeans.get_defer_preds(dataset.data_x[data_test_ids])
        kmeans_loss = compute_metrics( dataset.hum_preds[data_test_ids], dataset.ai_preds[data_test_ids], kmeans_defer_preds, dataset.data_y[data_test_ids], metric_y)
        selection_defer_preds = teacher_selection.get_defer_preds(dataset.data_x[data_test_ids], prior_rejector_preds[data_test_ids])
        selection_loss = compute_metrics( dataset.hum_preds[data_test_ids], dataset.ai_preds[data_test_ids], selection_defer_preds, dataset.data_y[data_test_ids], metric_y)
        gen_defer_preds = teacher_gen.get_defer_preds(dataset.data_x[data_test_ids], prior_rejector_preds[data_test_ids])
        gen_loss = compute_metrics( dataset.hum_preds[data_test_ids], dataset.ai_preds[data_test_ids], gen_defer_preds, dataset.data_y[data_test_ids], metric_y)
        human_only = compute_metrics( dataset.hum_preds[data_test_ids], dataset.ai_preds[data_test_ids], np.zeros(len(data_test_ids)), dataset.data_y[data_test_ids], metric_y)
        ai_only = compute_metrics( dataset.hum_preds[data_test_ids], dataset.ai_preds[data_test_ids], np.ones(len(data_test_ids)), dataset.data_y[data_test_ids], metric_y)
        metric_gen_test_trial.append(gen_loss)
        metric_selec_test_trial.append(selection_loss)
        metric_domino_test_trial.append(domino_loss)
        metric_kmeans_test_trial.append(kmeans_loss)
        
    metrics_human_alone.append(metrics_human_alone_trial)
    metrics_ai_alone.append(metrics_ai_alone_trial)
    metrics_gen_train.append(metric_gen_train_trial)
    metrics_selec_train.append(metric_selec_train_trial)
    metrics_domino_train.append(metric_domino_train_trial)
    metrics_kmeans_train.append(metric_kmeans_train_trial)
    metrics_gen_test.append(metric_gen_test_trial)
    metrics_selec_test.append(metric_selec_test_trial)
    metrics_domino_test.append(metric_domino_test_trial)
    metrics_kmeans_test.append(metric_kmeans_test_trial)
    data_save = {}
    data_save["metrics_human_alone"] = metrics_human_alone
    data_save["metrics_ai_alone"] = metrics_ai_alone
    data_save["metrics_gen_train"] = metrics_gen_train
    data_save["metrics_selec_train"] = metrics_selec_train
    data_save["metrics_domino_train"] = metrics_domino_train
    data_save["metrics_kmeans_train"] = metrics_kmeans_train
    data_save["metrics_gen_test"] = metrics_gen_test
    data_save["metrics_selec_test"] = metrics_selec_test
    data_save["metrics_domino_test"] = metrics_domino_test
    data_save["metrics_kmeans_test"] = metrics_kmeans_test
    data_save['date'] = datetime.datetime.now()
    data_save['TOTAL_TRIALS'] = TOTAL_TRIALS
    data_save['DATA_SIZES'] = DATA_SIZES
    with open("../exp_data/results/mmlu_real_" + date_now + ".pkl", "wb") as f:
        pickle.dump(data_save, f)
    print("trial", trial, "done")
         








In [None]:

sys.path.append("../")
#matplotlib.rcParams['pdf.fonttype'] = 42
#matplotlib.rcParams['ps.fonttype'] = 42
#plt.rc('text', usetex=True)
#plt.rc('font', family='serif')

alpha_overlap = 0.5

# printing from a pickle file
max_trials = TOTAL_TRIALS
ns = DATA_SIZES


avgs_rand = [np.average([metrics_gen_train[triall][i][1]['score']   for triall in range(max_trials)]) for i in range(len(ns))]
stds_rand = [np.std([metrics_gen_train[triall][i][1]['score']     for triall in range(max_trials)]) for i in range(len(ns))]
plt.errorbar(ns,  avgs_rand, yerr=stds_rand, marker = "d",  label=f'GEN (ours)', alpha =alpha_overlap, color = "black")

avgs_rand = [np.average([metrics_domino_train[triall][i][1]['score']   for triall in range(max_trials)]) for i in range(len(ns))]
stds_rand = [np.std([metrics_domino_train[triall][i][1]['score']  for triall in range(max_trials)]) for i in range(len(ns))]
plt.errorbar(ns,  avgs_rand, yerr=stds_rand, marker = "o",  label=f'DOMINO', alpha =alpha_overlap, color = "red")

avgs_rand = [np.average([metrics_kmeans_train[triall][i][1]['score']   for triall in range(max_trials)]) for i in range(len(ns))]
stds_rand = [np.std([metrics_kmeans_train[triall][i][1]['score']  for triall in range(max_trials)]) for i in range(len(ns))]
plt.errorbar(ns,  avgs_rand, yerr=stds_rand, marker = "x",  label=f'KMEANS', alpha =alpha_overlap, color = "blue")


avgs_rand = [np.average([metrics_selec_train[triall][i][1]['score']     for triall in range(max_trials)]) for i in range(len(ns))]
stds_rand = [np.std([metrics_selec_train[triall][i][1]['score']     for triall in range(max_trials)]) for i in range(len(ns))]
plt.errorbar(ns,  avgs_rand, yerr=stds_rand, marker = "<",  label=f'SELECT', alpha =alpha_overlap, color = "green")


ax = plt.gca()
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.get_xaxis().tick_bottom()    
ax.get_yaxis().tick_left()   
plt.grid()
plt.legend(fontsize='xx-large')
plt.legend()
plt.ylabel('Train Loss ', fontsize='xx-large')
plt.xlabel('Training data size', fontsize='xx-large')
fig_size = plt.rcParams["figure.figsize"]
fig_size[0] = 6
fig_size[1] = 4.2

#plt.savefig("../exp_data/plots/plot_synth_data_realizable_"+ date_now+".pdf", dpi = 1000, bbox_inches='tight')
#plt.show()
# 

In [None]:

alpha_overlap = 0.5



# printing from a pickle file
max_trials = TOTAL_TRIALS
ns = DATA_SIZES


avgs_rand = [np.average([metrics_gen_test[triall][i][1]['score']   for triall in range(max_trials)]) for i in range(len(ns))]
stds_rand = [np.std([metrics_gen_test[triall][i][1]['score']     for triall in range(max_trials)]) for i in range(len(ns))]
plt.errorbar(ns,  avgs_rand, yerr=stds_rand, marker = "d",  label=f'GEN (ours)', alpha =alpha_overlap, color = "black")

avgs_rand = [np.average([metrics_domino_test[triall][i][1]['score']   for triall in range(max_trials)]) for i in range(len(ns))]
stds_rand = [np.std([metrics_domino_test[triall][i][1]['score']  for triall in range(max_trials)]) for i in range(len(ns))]
plt.errorbar(ns,  avgs_rand, yerr=stds_rand, marker = "o",  label=f'DOMINO', alpha =alpha_overlap, color = "red")

avgs_rand = [np.average([metrics_kmeans_test[triall][i][1]['score']   for triall in range(max_trials)]) for i in range(len(ns))]
stds_rand = [np.std([metrics_kmeans_test[triall][i][1]['score']  for triall in range(max_trials)]) for i in range(len(ns))]
plt.errorbar(ns,  avgs_rand, yerr=stds_rand, marker = "x",  label=f'KMEANS', alpha =alpha_overlap, color = "blue")


avgs_rand = [np.average([metrics_selec_test[triall][i][1]['score']     for triall in range(max_trials)]) for i in range(len(ns))]
stds_rand = [np.std([metrics_selec_test[triall][i][1]['score']     for triall in range(max_trials)]) for i in range(len(ns))]
plt.errorbar(ns,  avgs_rand, yerr=stds_rand, marker = "<",  label=f'SELECT', alpha =alpha_overlap, color = "green")





ax = plt.gca()
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.get_xaxis().tick_bottom()    
ax.get_yaxis().tick_left()   
plt.grid()
plt.legend(fontsize='xx-large')
plt.legend()
plt.ylabel('Test Loss ', fontsize='xx-large')
plt.xlabel('Training data size', fontsize='xx-large')
fig_size = plt.rcParams["figure.figsize"]
fig_size[0] = 6
fig_size[1] = 4.2

#plt.savefig("../exp_data/plots/plot_synth_data_realizable_"+ date_now+".pdf", dpi = 1000, bbox_inches='tight')
#plt.show()
# 

# MS-COCO

In [None]:
date_now = datetime.datetime.now()
date_now = date_now.strftime("%Y-%m-%d_%H%M%S")

In [None]:
dataset =  pickle.load(open('../data/cleaned_pkl/coco_dataset.pkl',"rb"))


In [None]:
data_size_n = len(dataset.data_y)
# PARAMETERS FOR ALGORITHMS
lr = 0.001
epochs = 2000





metrics_gen_train = []
metrics_domino_train = []
metrics_kmeans_train = []
metrics_selec_train = []

metrics_gen_test = []
metrics_domino_test = []
metrics_kmeans_test = []
metrics_selec_test = []

metrics_human_alone = []
metrics_ai_alone = []


max_teaching_points = max(DATA_SIZES)

for trial in range(TOTAL_TRIALS):
    data_ids = list(range(0,len(dataset.data_y) ))
    data_train_ids, data_test_ids = train_test_split(data_ids, test_size=0.3)
    hum_preds = np.zeros(data_size_n)
    # human is right 80% of the time
    for i in range(data_size_n):
        if dataset.data_y[i] == 1:
            hum_preds[i] = np.random.choice([0,1], p=[0.3, 0.7])
        else:
            hum_preds[i] = np.random.choice([0,1], p=[0.7, 0.3])
            
    dataset.hum_preds = hum_preds
    prior_rejector_preds = np.array([np.random.choice([0,1], p=[0.5,0.5]) for i in range(len(dataset.data_y))])



    teacher_gen = TeacherGenerative(dataset.data_x[data_train_ids], dataset.data_y[data_train_ids], dataset.hum_preds[data_train_ids], dataset.ai_preds[data_train_ids],
                                    prior_rejector_preds[data_train_ids], metric_y, max_teaching_points, alpha, beta_high, beta_low, delta)
    teacher_gen.epochs = epochs
    teacher_gen.lr = lr
    teacher_gen.initialization_restarts = initialization_restarts
    teacher_gen.initialization_epochs = initialization_epochs
    teacher_gen.fit()

    teacher_selection = TeacherSelective(dataset.data_x[data_train_ids], dataset.data_y[data_train_ids], dataset.hum_preds[data_train_ids], dataset.ai_preds[data_train_ids],
                                         prior_rejector_preds[data_train_ids], kernel, metric_y, max_teaching_points, alpha, beta_high, beta_low, randomized_sampling, delta, parallel_processes)
    print("SELECTION")
    teacher_selection.randomized_sampling = randomized_sampling
    teacher_selection.parallel_processes = parallel_processes
    teacher_selection.fit()

    saved_selec_points = copy.deepcopy(teacher_selection.teaching_set)
    saved_gen_points = copy.deepcopy(teacher_gen.teaching_set)
    # fit generative and selection
    metric_gen_train_trial = []
    metric_selec_train_trial = []
    metric_gen_test_trial = []
    metric_selec_test_trial = []
    metric_domino_train_trial = []
    metric_domino_test_trial = []
    metric_kmeans_train_trial = []
    metric_kmeans_test_trial = []
    metrics_human_alone_trial = []
    metrics_ai_alone_trial = []

    for data_size in DATA_SIZES:
        try:
            teacher_domino = TeacherDomino(dataset.data_x[data_train_ids], dataset.data_y[data_train_ids], dataset.hum_preds[data_train_ids], dataset.ai_preds[data_train_ids], dataset.ai_scores[data_train_ids], metric_y, n_pca_components = None, n_mixture_components = data_size , teaching_points = data_size)
            teacher_domino.fit()
        except:
            print("Domino failed")
            teacher_domino = TeacherDomino(dataset.data_x[data_train_ids], dataset.data_y[data_train_ids], dataset.hum_preds[data_train_ids], dataset.ai_preds[data_train_ids], dataset.ai_scores[data_train_ids], metric_y, n_pca_components = None, n_mixture_components = 50 , teaching_points = data_size)
            teacher_domino.fit()            
        teacher_kmeans = TeacherKmeans(dataset.data_x[data_train_ids], dataset.data_y[data_train_ids], dataset.hum_preds[data_train_ids], dataset.ai_preds[data_train_ids], metric_y, data_size)
        print("Domino")
        teacher_domino.fit()
        teacher_kmeans.fit()
        teacher_selection.teaching_set = saved_selec_points[:data_size]
        teacher_gen.teaching_set = saved_gen_points[:data_size]
        # get eval results
        # TRAIN FIRST
        domino_defer_preds = teacher_domino.get_defer_preds(dataset.data_x[data_train_ids])
        domino_loss = compute_metrics( dataset.hum_preds[data_train_ids], dataset.ai_preds[data_train_ids], domino_defer_preds, dataset.data_y[data_train_ids], metric_y)
        kmeans_defer_preds = teacher_kmeans.get_defer_preds(dataset.data_x[data_train_ids])
        kmeans_loss = compute_metrics( dataset.hum_preds[data_train_ids], dataset.ai_preds[data_train_ids], kmeans_defer_preds, dataset.data_y[data_train_ids], metric_y)
        # trim selection and gen teaching sets

        selection_defer_preds = teacher_selection.get_defer_preds(dataset.data_x[data_train_ids], prior_rejector_preds[data_train_ids])
        selection_loss = compute_metrics( dataset.hum_preds[data_train_ids], dataset.ai_preds[data_train_ids], selection_defer_preds, dataset.data_y[data_train_ids], metric_y)
        gen_defer_preds = teacher_gen.get_defer_preds(dataset.data_x[data_train_ids], prior_rejector_preds[data_train_ids])
        gen_loss = compute_metrics( dataset.hum_preds[data_train_ids], dataset.ai_preds[data_train_ids], gen_defer_preds, dataset.data_y[data_train_ids], metric_y)
        human_only = compute_metrics( dataset.hum_preds[data_train_ids], dataset.ai_preds[data_train_ids], np.zeros(len(data_train_ids)), dataset.data_y[data_train_ids], metric_y)
        ai_only = compute_metrics( dataset.hum_preds[data_train_ids], dataset.ai_preds[data_train_ids], np.ones(len(data_train_ids)), dataset.data_y[data_train_ids], metric_y)
        metric_gen_train_trial.append(gen_loss)
        metric_selec_train_trial.append(selection_loss)
        metric_domino_train_trial.append(domino_loss)
        metric_kmeans_train_trial.append(kmeans_loss)
        metrics_human_alone_trial.append(human_only)
        metrics_ai_alone_trial.append(ai_only)
        # TEST, same code but use data_test_ids
        domino_defer_preds = teacher_domino.get_defer_preds(dataset.data_x[data_test_ids])
        domino_loss = compute_metrics( dataset.hum_preds[data_test_ids], dataset.ai_preds[data_test_ids], domino_defer_preds, dataset.data_y[data_test_ids], metric_y)
        kmeans_defer_preds = teacher_kmeans.get_defer_preds(dataset.data_x[data_test_ids])
        kmeans_loss = compute_metrics( dataset.hum_preds[data_test_ids], dataset.ai_preds[data_test_ids], kmeans_defer_preds, dataset.data_y[data_test_ids], metric_y)
        selection_defer_preds = teacher_selection.get_defer_preds(dataset.data_x[data_test_ids], prior_rejector_preds[data_test_ids])
        selection_loss = compute_metrics( dataset.hum_preds[data_test_ids], dataset.ai_preds[data_test_ids], selection_defer_preds, dataset.data_y[data_test_ids], metric_y)
        gen_defer_preds = teacher_gen.get_defer_preds(dataset.data_x[data_test_ids], prior_rejector_preds[data_test_ids])
        gen_loss = compute_metrics( dataset.hum_preds[data_test_ids], dataset.ai_preds[data_test_ids], gen_defer_preds, dataset.data_y[data_test_ids], metric_y)
        human_only = compute_metrics( dataset.hum_preds[data_test_ids], dataset.ai_preds[data_test_ids], np.zeros(len(data_test_ids)), dataset.data_y[data_test_ids], metric_y)
        ai_only = compute_metrics( dataset.hum_preds[data_test_ids], dataset.ai_preds[data_test_ids], np.ones(len(data_test_ids)), dataset.data_y[data_test_ids], metric_y)
        metric_gen_test_trial.append(gen_loss)
        metric_selec_test_trial.append(selection_loss)
        metric_domino_test_trial.append(domino_loss)
        metric_kmeans_test_trial.append(kmeans_loss)
        
    metrics_human_alone.append(metrics_human_alone_trial)
    metrics_ai_alone.append(metrics_ai_alone_trial)
    metrics_gen_train.append(metric_gen_train_trial)
    metrics_selec_train.append(metric_selec_train_trial)
    metrics_domino_train.append(metric_domino_train_trial)
    metrics_kmeans_train.append(metric_kmeans_train_trial)
    metrics_gen_test.append(metric_gen_test_trial)
    metrics_selec_test.append(metric_selec_test_trial)
    metrics_domino_test.append(metric_domino_test_trial)
    metrics_kmeans_test.append(metric_kmeans_test_trial)
    data_save = {}
    data_save["metrics_human_alone"] = metrics_human_alone
    data_save["metrics_ai_alone"] = metrics_ai_alone
    data_save["metrics_gen_train"] = metrics_gen_train
    data_save["metrics_selec_train"] = metrics_selec_train
    data_save["metrics_domino_train"] = metrics_domino_train
    data_save["metrics_kmeans_train"] = metrics_kmeans_train
    data_save["metrics_gen_test"] = metrics_gen_test
    data_save["metrics_selec_test"] = metrics_selec_test
    data_save["metrics_domino_test"] = metrics_domino_test
    data_save["metrics_kmeans_test"] = metrics_kmeans_test
    data_save['date'] = datetime.datetime.now()
    data_save['TOTAL_TRIALS'] = TOTAL_TRIALS
    data_save['DATA_SIZES'] = DATA_SIZES
    with open("../exp_data/results/coco_real_" + date_now + ".pkl", "wb") as f:
        pickle.dump(data_save, f)
    print("trial", trial, "done")
         








In [None]:

sys.path.append("../")
#matplotlib.rcParams['pdf.fonttype'] = 42
#matplotlib.rcParams['ps.fonttype'] = 42
#plt.rc('text', usetex=True)
#plt.rc('font', family='serif')

alpha_overlap = 0.5

# printing from a pickle file
max_trials = TOTAL_TRIALS
ns = DATA_SIZES


avgs_rand = [np.average([metrics_gen_train[triall][i][1]['score']   for triall in range(max_trials)]) for i in range(len(ns))]
stds_rand = [np.std([metrics_gen_train[triall][i][1]['score']     for triall in range(max_trials)]) for i in range(len(ns))]
plt.errorbar(ns,  avgs_rand, yerr=stds_rand, marker = "d",  label=f'GEN (ours)', alpha =alpha_overlap, color = "black")

avgs_rand = [np.average([metrics_domino_train[triall][i][1]['score']   for triall in range(max_trials)]) for i in range(len(ns))]
stds_rand = [np.std([metrics_domino_train[triall][i][1]['score']  for triall in range(max_trials)]) for i in range(len(ns))]
plt.errorbar(ns,  avgs_rand, yerr=stds_rand, marker = "o",  label=f'DOMINO', alpha =alpha_overlap, color = "red")

avgs_rand = [np.average([metrics_kmeans_train[triall][i][1]['score']   for triall in range(max_trials)]) for i in range(len(ns))]
stds_rand = [np.std([metrics_kmeans_train[triall][i][1]['score']  for triall in range(max_trials)]) for i in range(len(ns))]
plt.errorbar(ns,  avgs_rand, yerr=stds_rand, marker = "x",  label=f'KMEANS', alpha =alpha_overlap, color = "blue")


avgs_rand = [np.average([metrics_selec_train[triall][i][1]['score']     for triall in range(max_trials)]) for i in range(len(ns))]
stds_rand = [np.std([metrics_selec_train[triall][i][1]['score']     for triall in range(max_trials)]) for i in range(len(ns))]
plt.errorbar(ns,  avgs_rand, yerr=stds_rand, marker = "<",  label=f'SELECT', alpha =alpha_overlap, color = "green")


ax = plt.gca()
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.get_xaxis().tick_bottom()    
ax.get_yaxis().tick_left()   
plt.grid()
plt.legend(fontsize='xx-large')
plt.legend()
plt.ylabel('Train Loss ', fontsize='xx-large')
plt.xlabel('Training data size', fontsize='xx-large')
fig_size = plt.rcParams["figure.figsize"]
fig_size[0] = 6
fig_size[1] = 4.2


In [None]:

alpha_overlap = 0.5



# printing from a pickle file
max_trials = TOTAL_TRIALS
ns = DATA_SIZES


avgs_rand = [np.average([metrics_gen_test[triall][i][1]['score']   for triall in range(max_trials)]) for i in range(len(ns))]
stds_rand = [np.std([metrics_gen_test[triall][i][1]['score']     for triall in range(max_trials)]) for i in range(len(ns))]
plt.errorbar(ns,  avgs_rand, yerr=stds_rand, marker = "d",  label=f'GEN (ours)', alpha =alpha_overlap, color = "black")

avgs_rand = [np.average([metrics_domino_test[triall][i][1]['score']   for triall in range(max_trials)]) for i in range(len(ns))]
stds_rand = [np.std([metrics_domino_test[triall][i][1]['score']  for triall in range(max_trials)]) for i in range(len(ns))]
plt.errorbar(ns,  avgs_rand, yerr=stds_rand, marker = "o",  label=f'DOMINO', alpha =alpha_overlap, color = "red")

avgs_rand = [np.average([metrics_kmeans_test[triall][i][1]['score']   for triall in range(max_trials)]) for i in range(len(ns))]
stds_rand = [np.std([metrics_kmeans_test[triall][i][1]['score']  for triall in range(max_trials)]) for i in range(len(ns))]
plt.errorbar(ns,  avgs_rand, yerr=stds_rand, marker = "x",  label=f'KMEANS', alpha =alpha_overlap, color = "blue")


avgs_rand = [np.average([metrics_selec_test[triall][i][1]['score']     for triall in range(max_trials)]) for i in range(len(ns))]
stds_rand = [np.std([metrics_selec_test[triall][i][1]['score']     for triall in range(max_trials)]) for i in range(len(ns))]
plt.errorbar(ns,  avgs_rand, yerr=stds_rand, marker = "<",  label=f'SELECT', alpha =alpha_overlap, color = "green")





ax = plt.gca()
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.get_xaxis().tick_bottom()    
ax.get_yaxis().tick_left()   
plt.grid()
plt.legend(fontsize='xx-large')
plt.legend()
plt.ylabel('Test Loss ', fontsize='xx-large')
plt.xlabel('Training data size', fontsize='xx-large')
fig_size = plt.rcParams["figure.figsize"]
fig_size[0] = 6
fig_size[1] = 4.2

#plt.savefig("../exp_data/plots/plot_synth_data_realizable_"+ date_now+".pdf", dpi = 1000, bbox_inches='tight')
#plt.show()
# 

# DYNASENT

In [None]:
date_now = datetime.datetime.now()
date_now = date_now.strftime("%Y-%m-%d_%H%M%S")

In [None]:
dataset =  pickle.load(open('../data/cleaned_pkl/dynasent_dataset.pkl',"rb"))


In [None]:
data_size_n = len(dataset.data_y)
# PARAMETERS FOR ALGORITHMS
lr = 0.001
epochs = 2000


metrics_gen_train = []
metrics_domino_train = []
metrics_kmeans_train = []
metrics_selec_train = []

metrics_gen_test = []
metrics_domino_test = []
metrics_kmeans_test = []
metrics_selec_test = []

metrics_human_alone = []
metrics_ai_alone = []


max_teaching_points = max(DATA_SIZES)

for trial in range(TOTAL_TRIALS):
    data_ids = list(range(0,len(dataset.data_y) ))
    data_train_ids, data_test_ids = train_test_split(data_ids, test_size=0.3)

    prior_rejector_preds = np.array([np.random.choice([0,1], p=[1,0]) for i in range(len(dataset.data_y))])



    teacher_gen = TeacherGenerative(dataset.data_x[data_train_ids], dataset.data_y[data_train_ids], dataset.hum_preds[data_train_ids], dataset.ai_preds[data_train_ids],
                                    prior_rejector_preds[data_train_ids], metric_y, max_teaching_points, alpha, beta_high, beta_low, delta)
    teacher_gen.epochs = epochs
    teacher_gen.lr = lr
    teacher_gen.initialization_restarts = initialization_restarts
    teacher_gen.initialization_epochs = initialization_epochs
    teacher_gen.fit()

    teacher_selection = TeacherSelective(dataset.data_x[data_train_ids], dataset.data_y[data_train_ids], dataset.hum_preds[data_train_ids], dataset.ai_preds[data_train_ids],
                                         prior_rejector_preds[data_train_ids], kernel, metric_y, max_teaching_points, alpha, beta_high, beta_low, randomized_sampling, delta, parallel_processes)
    print("SELECTION")
    teacher_selection.randomized_sampling = randomized_sampling
    teacher_selection.parallel_processes = parallel_processes
    teacher_selection.fit()

    saved_selec_points = copy.deepcopy(teacher_selection.teaching_set)
    saved_gen_points = copy.deepcopy(teacher_gen.teaching_set)
    # fit generative and selection
    metric_gen_train_trial = []
    metric_selec_train_trial = []
    metric_gen_test_trial = []
    metric_selec_test_trial = []
    metric_domino_train_trial = []
    metric_domino_test_trial = []
    metric_kmeans_train_trial = []
    metric_kmeans_test_trial = []
    metrics_human_alone_trial = []
    metrics_ai_alone_trial = []

    for data_size in DATA_SIZES:
        data_y_one_hot = np.zeros((len(data_train_ids), 4))
        data_y_one_hot[np.arange(len(data_train_ids)), dataset.data_y[data_train_ids]] = 1
        ai_preds_one_hot = np.zeros((len(data_train_ids), 4))
        ai_preds_one_hot[np.arange(len(data_train_ids)), dataset.ai_preds[data_train_ids]] = 1
        hum_preds_one_hot = np.zeros((len(data_train_ids), 4))
        dataset.hum_preds = dataset.hum_preds.astype(int)
        hum_preds_one_hot[np.arange(len(data_train_ids)), dataset.hum_preds[data_train_ids]] = 1
        try:
            teacher_domino = TeacherDomino(dataset.data_x[data_train_ids], data_y_one_hot,hum_preds_one_hot,ai_preds_one_hot, ai_preds_one_hot, metric_y, n_pca_components = None, n_mixture_components = data_size , teaching_points = data_size)
            teacher_domino.fit()
        except:
            print("Domino failed")
            teacher_domino = TeacherDomino(dataset.data_x[data_train_ids], data_y_one_hot,hum_preds_one_hot,ai_preds_one_hot, ai_preds_one_hot, metric_y, n_pca_components = None, n_mixture_components = 50 , teaching_points = data_size)
            teacher_domino.fit()           
        teacher_kmeans = TeacherKmeans(dataset.data_x[data_train_ids], dataset.data_y[data_train_ids], dataset.hum_preds[data_train_ids], dataset.ai_preds[data_train_ids], metric_y, data_size)
        print("Domino")
        teacher_domino.fit()
        teacher_kmeans.fit()
        teacher_selection.teaching_set = saved_selec_points[:data_size]
        teacher_gen.teaching_set = saved_gen_points[:data_size]
        # get eval results
        # TRAIN FIRST
        domino_defer_preds = teacher_domino.get_defer_preds(dataset.data_x[data_train_ids])
        domino_loss = compute_metrics( dataset.hum_preds[data_train_ids], dataset.ai_preds[data_train_ids], domino_defer_preds, dataset.data_y[data_train_ids], metric_y)
        kmeans_defer_preds = teacher_kmeans.get_defer_preds(dataset.data_x[data_train_ids])
        kmeans_loss = compute_metrics( dataset.hum_preds[data_train_ids], dataset.ai_preds[data_train_ids], kmeans_defer_preds, dataset.data_y[data_train_ids], metric_y)
        # trim selection and gen teaching sets

        selection_defer_preds = teacher_selection.get_defer_preds(dataset.data_x[data_train_ids], prior_rejector_preds[data_train_ids])
        selection_loss = compute_metrics( dataset.hum_preds[data_train_ids], dataset.ai_preds[data_train_ids], selection_defer_preds, dataset.data_y[data_train_ids], metric_y)
        gen_defer_preds = teacher_gen.get_defer_preds(dataset.data_x[data_train_ids], prior_rejector_preds[data_train_ids])
        gen_loss = compute_metrics( dataset.hum_preds[data_train_ids], dataset.ai_preds[data_train_ids], gen_defer_preds, dataset.data_y[data_train_ids], metric_y)
        human_only = compute_metrics( dataset.hum_preds[data_train_ids], dataset.ai_preds[data_train_ids], np.zeros(len(data_train_ids)), dataset.data_y[data_train_ids], metric_y)
        ai_only = compute_metrics( dataset.hum_preds[data_train_ids], dataset.ai_preds[data_train_ids], np.ones(len(data_train_ids)), dataset.data_y[data_train_ids], metric_y)
        metric_gen_train_trial.append(gen_loss)
        metric_selec_train_trial.append(selection_loss)
        metric_domino_train_trial.append(domino_loss)
        metric_kmeans_train_trial.append(kmeans_loss)
        metrics_human_alone_trial.append(human_only)
        metrics_ai_alone_trial.append(ai_only)
        # TEST, same code but use data_test_ids
        domino_defer_preds = teacher_domino.get_defer_preds(dataset.data_x[data_test_ids])
        domino_loss = compute_metrics( dataset.hum_preds[data_test_ids], dataset.ai_preds[data_test_ids], domino_defer_preds, dataset.data_y[data_test_ids], metric_y)
        kmeans_defer_preds = teacher_kmeans.get_defer_preds(dataset.data_x[data_test_ids])
        kmeans_loss = compute_metrics( dataset.hum_preds[data_test_ids], dataset.ai_preds[data_test_ids], kmeans_defer_preds, dataset.data_y[data_test_ids], metric_y)
        selection_defer_preds = teacher_selection.get_defer_preds(dataset.data_x[data_test_ids], prior_rejector_preds[data_test_ids])
        selection_loss = compute_metrics( dataset.hum_preds[data_test_ids], dataset.ai_preds[data_test_ids], selection_defer_preds, dataset.data_y[data_test_ids], metric_y)
        gen_defer_preds = teacher_gen.get_defer_preds(dataset.data_x[data_test_ids], prior_rejector_preds[data_test_ids])
        gen_loss = compute_metrics( dataset.hum_preds[data_test_ids], dataset.ai_preds[data_test_ids], gen_defer_preds, dataset.data_y[data_test_ids], metric_y)
        human_only = compute_metrics( dataset.hum_preds[data_test_ids], dataset.ai_preds[data_test_ids], np.zeros(len(data_test_ids)), dataset.data_y[data_test_ids], metric_y)
        ai_only = compute_metrics( dataset.hum_preds[data_test_ids], dataset.ai_preds[data_test_ids], np.ones(len(data_test_ids)), dataset.data_y[data_test_ids], metric_y)
        metric_gen_test_trial.append(gen_loss)
        metric_selec_test_trial.append(selection_loss)
        metric_domino_test_trial.append(domino_loss)
        metric_kmeans_test_trial.append(kmeans_loss)
        
    metrics_human_alone.append(metrics_human_alone_trial)
    metrics_ai_alone.append(metrics_ai_alone_trial)
    metrics_gen_train.append(metric_gen_train_trial)
    metrics_selec_train.append(metric_selec_train_trial)
    metrics_domino_train.append(metric_domino_train_trial)
    metrics_kmeans_train.append(metric_kmeans_train_trial)
    metrics_gen_test.append(metric_gen_test_trial)
    metrics_selec_test.append(metric_selec_test_trial)
    metrics_domino_test.append(metric_domino_test_trial)
    metrics_kmeans_test.append(metric_kmeans_test_trial)
    data_save = {}
    data_save["metrics_human_alone"] = metrics_human_alone
    data_save["metrics_ai_alone"] = metrics_ai_alone
    data_save["metrics_gen_train"] = metrics_gen_train
    data_save["metrics_selec_train"] = metrics_selec_train
    data_save["metrics_domino_train"] = metrics_domino_train
    data_save["metrics_kmeans_train"] = metrics_kmeans_train
    data_save["metrics_gen_test"] = metrics_gen_test
    data_save["metrics_selec_test"] = metrics_selec_test
    data_save["metrics_domino_test"] = metrics_domino_test
    data_save["metrics_kmeans_test"] = metrics_kmeans_test
    data_save['date'] = datetime.datetime.now()
    data_save['TOTAL_TRIALS'] = TOTAL_TRIALS
    data_save['DATA_SIZES'] = DATA_SIZES
    with open("../exp_data/results/dynasent_real_" + date_now + ".pkl", "wb") as f:
        pickle.dump(data_save, f)
    print("trial", trial, "done")
         








In [None]:

sys.path.append("../")
#matplotlib.rcParams['pdf.fonttype'] = 42
#matplotlib.rcParams['ps.fonttype'] = 42
#plt.rc('text', usetex=True)
#plt.rc('font', family='serif')

alpha_overlap = 0.5

# printing from a pickle file
max_trials = TOTAL_TRIALS
ns = DATA_SIZES


avgs_rand = [np.average([metrics_gen_train[triall][i][1]['score']   for triall in range(max_trials)]) for i in range(len(ns))]
stds_rand = [np.std([metrics_gen_train[triall][i][1]['score']     for triall in range(max_trials)]) for i in range(len(ns))]
plt.errorbar(ns,  avgs_rand, yerr=stds_rand, marker = "d",  label=f'GEN (ours)', alpha =alpha_overlap, color = "black")

avgs_rand = [np.average([metrics_domino_train[triall][i][1]['score']   for triall in range(max_trials)]) for i in range(len(ns))]
stds_rand = [np.std([metrics_domino_train[triall][i][1]['score']  for triall in range(max_trials)]) for i in range(len(ns))]
plt.errorbar(ns,  avgs_rand, yerr=stds_rand, marker = "o",  label=f'DOMINO', alpha =alpha_overlap, color = "red")

avgs_rand = [np.average([metrics_kmeans_train[triall][i][1]['score']   for triall in range(max_trials)]) for i in range(len(ns))]
stds_rand = [np.std([metrics_kmeans_train[triall][i][1]['score']  for triall in range(max_trials)]) for i in range(len(ns))]
plt.errorbar(ns,  avgs_rand, yerr=stds_rand, marker = "x",  label=f'KMEANS', alpha =alpha_overlap, color = "blue")


avgs_rand = [np.average([metrics_selec_train[triall][i][1]['score']     for triall in range(max_trials)]) for i in range(len(ns))]
stds_rand = [np.std([metrics_selec_train[triall][i][1]['score']     for triall in range(max_trials)]) for i in range(len(ns))]
plt.errorbar(ns,  avgs_rand, yerr=stds_rand, marker = "<",  label=f'SELECT', alpha =alpha_overlap, color = "green")


ax = plt.gca()
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.get_xaxis().tick_bottom()    
ax.get_yaxis().tick_left()   
plt.grid()
plt.legend(fontsize='xx-large')
plt.legend()
plt.ylabel('Train Loss ', fontsize='xx-large')
plt.xlabel('Training data size', fontsize='xx-large')
fig_size = plt.rcParams["figure.figsize"]
fig_size[0] = 6
fig_size[1] = 4.2


In [None]:

alpha_overlap = 0.5



# printing from a pickle file
max_trials = TOTAL_TRIALS
ns = DATA_SIZES


avgs_rand = [np.average([metrics_gen_test[triall][i][1]['score']   for triall in range(max_trials)]) for i in range(len(ns))]
stds_rand = [np.std([metrics_gen_test[triall][i][1]['score']     for triall in range(max_trials)]) for i in range(len(ns))]
plt.errorbar(ns,  avgs_rand, yerr=stds_rand, marker = "d",  label=f'GEN (ours)', alpha =alpha_overlap, color = "black")

avgs_rand = [np.average([metrics_domino_test[triall][i][1]['score']   for triall in range(max_trials)]) for i in range(len(ns))]
stds_rand = [np.std([metrics_domino_test[triall][i][1]['score']  for triall in range(max_trials)]) for i in range(len(ns))]
plt.errorbar(ns,  avgs_rand, yerr=stds_rand, marker = "o",  label=f'DOMINO', alpha =alpha_overlap, color = "red")

avgs_rand = [np.average([metrics_kmeans_test[triall][i][1]['score']   for triall in range(max_trials)]) for i in range(len(ns))]
stds_rand = [np.std([metrics_kmeans_test[triall][i][1]['score']  for triall in range(max_trials)]) for i in range(len(ns))]
plt.errorbar(ns,  avgs_rand, yerr=stds_rand, marker = "x",  label=f'KMEANS', alpha =alpha_overlap, color = "blue")


avgs_rand = [np.average([metrics_selec_test[triall][i][1]['score']     for triall in range(max_trials)]) for i in range(len(ns))]
stds_rand = [np.std([metrics_selec_test[triall][i][1]['score']     for triall in range(max_trials)]) for i in range(len(ns))]
plt.errorbar(ns,  avgs_rand, yerr=stds_rand, marker = "<",  label=f'SELECT', alpha =alpha_overlap, color = "green")





ax = plt.gca()
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.get_xaxis().tick_bottom()    
ax.get_yaxis().tick_left()   
plt.grid()
plt.legend(fontsize='xx-large')
plt.legend()
plt.ylabel('Test Loss ', fontsize='xx-large')
plt.xlabel('Training data size', fontsize='xx-large')
fig_size = plt.rcParams["figure.figsize"]
fig_size[0] = 6
fig_size[1] = 4.2
