In [1]:
import numpy as np
import tensorflow as tf
from curriculum_learning.models.classifier_model import ClassifierModel
from curriculum_learning import utils
import yaml
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
with open("models_hyperparameters.yaml", "r") as stream:
    models_hyperparameters = yaml.safe_load(stream)
    
with open("config_tests.yaml", "r") as stream:
    config_tests = yaml.safe_load(stream)
    
N_EPOCHS = 50
N_TRIALS = 30
BATCH_SIZE = 512

CONFIG = config_tests["proba_best"]

loss = tf.keras.losses.SparseCategoricalCrossentropy()

In [3]:
ds_1 = tfds.load("stl10", split="train", as_supervised=True, shuffle_files=False)

x = []
y = []
for x_, y_ in ds_1.as_numpy_iterator():
    x.append(x_)
    y.append(y_)

2024-05-01 12:37:18.322027: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [4]:
ds_2 = tfds.load("stl10", split="test", as_supervised=True, shuffle_files=False)
for x_, y_ in ds_2.as_numpy_iterator():
    x.append(x_)
    y.append(y_)

2024-05-01 12:37:19.309628: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [5]:
x = np.array(x, dtype=np.float32) / 255
y = np.array(y, dtype=np.float32)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42, stratify=y)
x_test, x_val, y_test, y_val = train_test_split(x_test, y_test, test_size=0.5, random_state=42, stratify=y_test)

n_classes = len(np.unique(y))
train_size = x_train.shape[0]
train_size, len(x_val), len(x_test)

(9100, 1950, 1950)

In [6]:
x_train_sorted = x_train[np.argsort(y_train)]
y_train_sorted = y_train[np.argsort(y_train)]
_, counts = np.unique(y_train_sorted, return_counts=True)

In [7]:
model = ClassifierModel(output_shape=n_classes, **models_hyperparameters["test_model_1"])
model.compile(optimizer="adam", loss=loss, metrics=["accuracy"])
model(x_train[0:1])
# model.save_weights("../models/default_model.weights.h5")
model.load_weights("../models/default_model.weights.h5")
model_weights = model.get_weights()

In [None]:
model_scores = []
verbose = 0
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', restore_best_weights=True, patience=5, start_from_epoch=0)

for _ in tqdm(range(N_TRIALS)):
    model.set_weights(model_weights)

    for i in range(N_EPOCHS):
        n_samples = int(np.tanh(4 * (i + 1) / N_EPOCHS) * train_size)

        samples_proba = utils.calculate_proba(
            model, x_train_sorted, y_train_sorted, counts, batch_size=BATCH_SIZE
        )

        samples_ids = utils.chose_samples(n_samples, samples_proba, CONFIG["order_type"])

        model.fit(
            x_train_sorted[samples_ids],
            y_train_sorted[samples_ids],
            # validation_data=(x_val, y_val),
            epochs=1,
            batch_size=BATCH_SIZE,
            verbose=verbose,
        )

    model.fit(
        x_train, y_train, validation_data=(x_val, y_val), epochs=500, batch_size=BATCH_SIZE, 
        callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', restore_best_weights=True, patience=5, start_from_epoch=10)], 
        verbose=verbose
    )
            
    _, accuracy = model.evaluate(x_test, y_test, batch_size=BATCH_SIZE, verbose=verbose)
    model_scores.append(accuracy)
    print("Mean:", np.mean(model_scores), " Median: ", np.median(model_scores))

  0%|          | 0/30 [00:00<?, ?it/s]

In [22]:
print("Mean:", np.mean(model_scores), " Median: ", np.median(model_scores))

Mean: 0.5750961601734161  Median:  0.577115386724472


In [18]:
print("Mean:", np.mean(model_scores), " Median: ", np.median(model_scores))

Mean: 0.5817307829856873  Median:  0.5800000131130219


In [13]:
model_scores_random = []
verbose = 0

for _ in tqdm(range(N_TRIALS)):
    model.set_weights(model_weights)

    model.fit(
        x_train, y_train, validation_data=(x_val, y_val), epochs=500, batch_size=BATCH_SIZE, verbose=verbose, shuffle=True, 
        callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', restore_best_weights=True, patience=5, start_from_epoch=35)]
    )

    _, accuracy = model.evaluate(x_test, y_test, batch_size=BATCH_SIZE, verbose=verbose)
    model_scores_random.append(accuracy)
    print("Mean:", np.mean(model_scores_random), " Median: ", np.median(model_scores_random))

  3%|▎         | 1/30 [00:44<21:16, 44.01s/it]

Mean: 0.6328204870223999  Median:  0.6328204870223999


  7%|▋         | 2/30 [01:22<18:56, 40.59s/it]

Mean: 0.6187179386615753  Median:  0.6187179386615753


 10%|█         | 3/30 [02:06<19:05, 42.41s/it]

Mean: 0.6140170892079672  Median:  0.6046153903007507


 13%|█▎        | 4/30 [02:54<19:14, 44.41s/it]

Mean: 0.6175640970468521  Median:  0.6164102554321289


 17%|█▋        | 5/30 [03:41<18:59, 45.58s/it]

Mean: 0.6159999966621399  Median:  0.609743595123291


 20%|██        | 6/30 [04:26<18:05, 45.23s/it]

Mean: 0.6132478614648184  Median:  0.6071794927120209


 23%|██▎       | 7/30 [05:06<16:39, 43.44s/it]

Mean: 0.6117216093199593  Median:  0.6046153903007507


 27%|██▋       | 8/30 [05:44<15:21, 41.87s/it]

Mean: 0.6106410250067711  Median:  0.6046153903007507


 30%|███       | 9/30 [06:22<14:10, 40.50s/it]

Mean: 0.6056410272916158  Median:  0.6046153903007507


 33%|███▎      | 10/30 [07:04<13:39, 40.99s/it]

Mean: 0.6050769269466401  Median:  0.6038461625576019


 37%|███▋      | 11/30 [07:42<12:41, 40.06s/it]

Mean: 0.6057342670180581  Median:  0.6046153903007507


 40%|████      | 12/30 [08:26<12:23, 41.31s/it]

Mean: 0.607264960805575  Median:  0.6046153903007507


 43%|████▎     | 13/30 [09:05<11:29, 40.56s/it]

Mean: 0.6069033558552082  Median:  0.6046153903007507


 47%|████▋     | 14/30 [09:43<10:38, 39.93s/it]

Mean: 0.6057875497000558  Median:  0.6038461625576019


 50%|█████     | 15/30 [10:20<09:45, 39.02s/it]

Mean: 0.6052991469701131  Median:  0.6030769348144531


 53%|█████▎    | 16/30 [10:56<08:54, 38.21s/it]

Mean: 0.6035897433757782  Median:  0.6028205156326294


 57%|█████▋    | 17/30 [11:37<08:26, 38.98s/it]

Mean: 0.6036500755478355  Median:  0.6030769348144531


 60%|██████    | 18/30 [12:19<07:56, 39.71s/it]

Mean: 0.6037891738944583  Median:  0.6038461625576019


 63%|██████▎   | 19/30 [12:58<07:15, 39.64s/it]

Mean: 0.6037516876270896  Median:  0.6030769348144531


 67%|██████▋   | 20/30 [13:42<06:50, 41.01s/it]

Mean: 0.6035384625196457  Median:  0.6030769348144531


 70%|███████   | 21/30 [14:31<06:28, 43.19s/it]

Mean: 0.6045665457135155  Median:  0.6030769348144531


 73%|███████▎  | 22/30 [15:10<05:36, 42.12s/it]

Mean: 0.6044988361271945  Median:  0.6030769348144531


 77%|███████▋  | 23/30 [15:51<04:52, 41.83s/it]

Mean: 0.6033667792444644  Median:  0.6030769348144531


 80%|████████  | 24/30 [16:30<04:05, 40.85s/it]

Mean: 0.6026495744784673  Median:  0.6030769348144531


 83%|████████▎ | 25/30 [17:12<03:25, 41.11s/it]

Mean: 0.6024000024795533  Median:  0.6030769348144531


 87%|████████▋ | 26/30 [17:56<02:48, 42.13s/it]

Mean: 0.602899411549935  Median:  0.6030769348144531


 90%|█████████ | 27/30 [18:35<02:02, 41.00s/it]

Mean: 0.6022032322706999  Median:  0.6030769348144531


 93%|█████████▎| 28/30 [19:17<01:22, 41.33s/it]

Mean: 0.6023443256105695  Median:  0.6030769348144531


 97%|█████████▋| 29/30 [19:57<00:40, 40.98s/it]

Mean: 0.6019451844281164  Median:  0.6030769348144531


100%|██████████| 30/30 [20:35<00:00, 41.18s/it]

Mean: 0.6009230792522431  Median:  0.6028205156326294





In [14]:
model_scores_random

[0.6328204870223999,
 0.6046153903007507,
 0.6046153903007507,
 0.6282051205635071,
 0.609743595123291,
 0.5994871854782104,
 0.6025640964508057,
 0.6030769348144531,
 0.5656410455703735,
 0.6000000238418579,
 0.6123076677322388,
 0.6241025924682617,
 0.6025640964508057,
 0.591282069683075,
 0.5984615087509155,
 0.5779486894607544,
 0.6046153903007507,
 0.6061538457870483,
 0.6030769348144531,
 0.5994871854782104,
 0.6251282095909119,
 0.6030769348144531,
 0.5784615278244019,
 0.5861538648605347,
 0.5964102745056152,
 0.6153846383094788,
 0.5841025710105896,
 0.6061538457870483,
 0.5907692313194275,
 0.5712820291519165]

In [17]:
np.mean(model_scores), np.mean(model_scores_random)

(0.5817307829856873, 0.5667923080921173)

In [16]:
import scipy.stats
scipy.stats.ttest_ind(model_scores, model_scores_random)

TtestResult(statistic=2.654683049658429, pvalue=0.010509567980683561, df=52.0)