In [13]:
import numpy as np
import tensorflow as tf
from curriculum_learning.models.classifier_model import ClassifierModel
from curriculum_learning import utils
import yaml
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
import seaborn as sns

In [14]:
with open("models_hyperparameters.yaml", "r") as stream:
    models_hyperparameters = yaml.safe_load(stream)
    
with open("config_tests.yaml", "r") as stream:
    config_tests = yaml.safe_load(stream)
    
N_EPOCHS = 50
N_TRIALS = 50
BATCH_SIZE = 512

CONFIG = config_tests["assessment_proba_best"]

loss = tf.keras.losses.SparseCategoricalCrossentropy()

In [15]:
ds_1 = tfds.load("stl10", split="train", as_supervised=True, shuffle_files=False)
ds_2 = tfds.load("stl10", split="test", as_supervised=True, shuffle_files=False)

x = []
y = []
for x_, y_ in ds_1.as_numpy_iterator():
    x.append(x_)
    y.append(y_)
for x_, y_ in ds_2.as_numpy_iterator():
    x.append(x_)
    y.append(y_)
    
x = np.array(x, dtype=np.float32) / 255
y = np.array(y, dtype=np.float32)

2024-03-29 10:46:42.302003: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2024-03-29 10:46:43.047442: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [16]:
# x, y = utils.load_cifar_data("../data/cifar-10-batches-py/")
# x /= 255

In [18]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42, stratify=y)
x_test, x_val, y_test, y_val = train_test_split(x_test, y_test, test_size=0.5, random_state=42, stratify=y_test)

n_classes = len(np.unique(y))
train_size = x_train.shape[0]
train_size, len(x_val), len(x_test)

(9100, 1950, 1950)

In [19]:
assessment_model = ClassifierModel(output_shape=n_classes, **models_hyperparameters["assessment_model"])

assessment_model.compile(loss=loss, metrics=["accuracy"])
assessment_model(x_train[:1])
assessment_model.load_weights("../models/assessment_model.weights.h5")

  trackable.load_own_variables(weights_store.get(inner_path))


In [20]:
assessment_model.evaluate(x_test, y_test)

2024-03-29 10:47:07.610497: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.5573 - loss: 1.2289


[1.1985386610031128, 0.5651282072067261]

In [21]:
x_train_sorted = x_train[np.argsort(y_train)]
y_train_sorted = y_train[np.argsort(y_train)]
_, counts = np.unique(y_train_sorted, return_counts=True)

samples_proba = utils.calculate_proba2(
    assessment_model, x_train_sorted, y_train_sorted, counts
)

In [23]:
model = ClassifierModel(output_shape=n_classes, **models_hyperparameters["test_model"])
model.compile(optimizer="adam", loss=loss, metrics=["accuracy"])
model(x_train[0:1])
# model.save_weights("../models/default_model.weights.h5")
model.load_weights("../models/default_model.weights.h5")
model_weights = model.get_weights()

In [24]:
print(CONFIG)
model_scores = []

for _ in tqdm(range(N_TRIALS)):
    model.set_weights(model_weights)
        
    for i in range(N_EPOCHS):
        n_samples = int(np.tanh(4 * (i + 1) / N_EPOCHS) * train_size)
        
        samples_ids = utils.chose_samples(n_samples, samples_proba, CONFIG["order_type"])
        model.fit(
            x_train_sorted[samples_ids],
            y_train_sorted[samples_ids],
            # validation_data=(x_val, y_val),
            epochs=1,
            batch_size=BATCH_SIZE,
            verbose=0,
        )
        
        if CONFIG["progressive"]:
            samples_proba = utils.calculate_proba2(
                model, x_train_sorted, y_train_sorted, counts # , CONFIG["negative_loss"]
            )
            
    _, accuracy = model.evaluate(x_test, y_test, batch_size=BATCH_SIZE, verbose=1)
    model_scores.append(accuracy)
    print("Mean:", np.mean(model_scores), " Median: ", np.median(model_scores))

{'negative_loss': True, 'progressive': False, 'order_type': 'proba'}


  0%|          | 0/50 [00:00<?, ?it/s]

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 109ms/step - accuracy: 0.5510 - loss: 1.6780


  2%|▏         | 1/50 [02:12<1:47:49, 132.02s/it]

Mean: 0.5594871640205383  Median:  0.5594871640205383
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step - accuracy: 0.5657 - loss: 1.6814


  4%|▍         | 2/50 [03:39<1:24:27, 105.58s/it]

Mean: 0.5666666626930237  Median:  0.5666666626930237
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step - accuracy: 0.5346 - loss: 1.8259


  6%|▌         | 3/50 [05:06<1:16:14, 97.33s/it] 

Mean: 0.5577777822812399  Median:  0.5594871640205383
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - accuracy: 0.5498 - loss: 1.6681


  8%|▊         | 4/50 [06:28<1:09:50, 91.09s/it]

Mean: 0.5589743554592133  Median:  0.5610256195068359
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - accuracy: 0.5431 - loss: 1.7311


 10%|█         | 5/50 [07:47<1:05:15, 87.02s/it]

Mean: 0.5588717937469483  Median:  0.5594871640205383
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - accuracy: 0.5707 - loss: 1.6181


 12%|█▏        | 6/50 [09:07<1:01:52, 84.37s/it]

Mean: 0.5625641047954559  Median:  0.5610256195068359
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step - accuracy: 0.5584 - loss: 1.6131


 14%|█▍        | 7/50 [10:25<59:04, 82.42s/it]  

Mean: 0.5643223438944135  Median:  0.5625640749931335
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - accuracy: 0.5619 - loss: 1.6469


 16%|█▌        | 8/50 [11:44<57:00, 81.44s/it]

Mean: 0.5657692328095436  Median:  0.5682051181793213
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - accuracy: 0.5385 - loss: 1.8269


 18%|█▊        | 9/50 [13:04<55:12, 80.78s/it]

Mean: 0.5634757876396179  Median:  0.5625640749931335
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - accuracy: 0.5324 - loss: 1.6863


 20%|██        | 10/50 [14:23<53:27, 80.19s/it]

Mean: 0.5616923093795776  Median:  0.5610256195068359
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step - accuracy: 0.5621 - loss: 1.7351


 22%|██▏       | 11/50 [15:42<51:54, 79.87s/it]

Mean: 0.5620979016477411  Median:  0.5625640749931335
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - accuracy: 0.5674 - loss: 1.6254


 24%|██▍       | 12/50 [17:02<50:38, 79.95s/it]

Mean: 0.5629059821367264  Median:  0.5643589496612549
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - accuracy: 0.5550 - loss: 1.6363


 26%|██▌       | 13/50 [18:22<49:18, 79.95s/it]

Mean: 0.5635897425504831  Median:  0.5661538243293762
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - accuracy: 0.5654 - loss: 1.6365


 28%|██▊       | 14/50 [19:44<48:19, 80.55s/it]

Mean: 0.5641391915934426  Median:  0.5687179267406464
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - accuracy: 0.5457 - loss: 1.6847


 30%|███       | 15/50 [21:04<46:51, 80.33s/it]

Mean: 0.5638290564219157  Median:  0.5661538243293762
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - accuracy: 0.5454 - loss: 1.6579


 32%|███▏      | 16/50 [22:23<45:26, 80.19s/it]

Mean: 0.5633012801408768  Median:  0.5643589496612549
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - accuracy: 0.5670 - loss: 1.5736


 34%|███▍      | 17/50 [23:44<44:12, 80.39s/it]

Mean: 0.5640723950722638  Median:  0.5661538243293762
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - accuracy: 0.5593 - loss: 1.7345


 36%|███▌      | 18/50 [47:56<4:22:34, 492.31s/it]

Mean: 0.5643589728408389  Median:  0.5676923096179962
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - accuracy: 0.5578 - loss: 1.6808


 38%|███▊      | 19/50 [49:15<3:10:14, 368.20s/it]

Mean: 0.564264504533065  Median:  0.5661538243293762
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step - accuracy: 0.5626 - loss: 1.6526


 40%|████      | 20/50 [50:29<2:19:58, 279.93s/it]

Mean: 0.5645897418260575  Median:  0.5676923096179962
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - accuracy: 0.5484 - loss: 1.7846


 42%|████▏     | 21/50 [51:46<1:45:49, 218.95s/it]

Mean: 0.564371182805016  Median:  0.5661538243293762
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - accuracy: 0.5568 - loss: 1.7273


 44%|████▍     | 22/50 [53:05<1:22:42, 177.22s/it]

Mean: 0.5646853121844205  Median:  0.5676923096179962


 44%|████▍     | 22/50 [53:24<1:07:58, 145.66s/it]


KeyboardInterrupt: 

In [None]:
np.mean(model_scores)

In [None]:
np.median(model_scores)

In [93]:
np.mean(model_scores)

0.5641453319787979

In [94]:
np.median(model_scores)

0.5772333145141602

In [95]:
model_scores

[0.6331333518028259,
 0.4480000138282776,
 0.574999988079071,
 0.6006666421890259,
 0.6215333342552185,
 0.541533350944519,
 0.6043333411216736,
 0.6056666374206543,
 0.5491999983787537,
 0.4354666769504547,
 0.5721333622932434,
 0.567466676235199,
 0.5573999881744385,
 0.5572666525840759,
 0.5580000281333923,
 0.6100000143051147,
 0.6237333416938782,
 0.6277999877929688,
 0.5673333406448364,
 0.5690666437149048,
 0.5794666409492493,
 0.5798666477203369,
 0.6197999715805054,
 0.5888000130653381,
 0.5439333319664001,
 0.590666651725769,
 0.5379999876022339,
 0.48526665568351746,
 0.6028666496276855,
 0.4907333254814148,
 0.5888000130653381,
 0.5167999863624573,
 0.5967333316802979,
 0.541266679763794,
 0.4643999934196472,
 0.5345333218574524,
 0.5027333498001099,
 0.6042666435241699,
 0.5983333587646484,
 0.5888000130653381,
 0.44973334670066833,
 0.6233333349227905,
 0.6090666651725769,
 0.48840001225471497,
 0.5842666625976562,
 0.46959999203681946,
 0.6227999925613403,
 0.58740001916

In [51]:
abc = [0.6394000053405762,
 0.6604999899864197,
 0.6775000095367432,
 0.6521999835968018,
 0.6334999799728394,
 0.6639999747276306,
 0.675000011920929,
 0.661899983882904,
 0.663100004196167,
 0.6538000106811523,
 0.6675000190734863,
 0.666700005531311,
 0.6355000138282776,
 0.6825000047683716,
 0.6384000182151794,
 0.661300003528595,
 0.6888999938964844,
 0.6801999807357788,
 0.6797999739646912,
 0.6589999794960022,
 0.6743999719619751,
 0.6650999784469604,
 0.6970000267028809,
 0.6700000166893005,
 0.6575999855995178,
 0.6556000113487244,
 0.6568999886512756,
 0.6488000154495239,
 0.6245999932289124,
 0.6732000112533569,
 0.6847000122070312,
 0.5996000170707703,
 0.652899980545044,
 0.6632000207901001,
 0.6603000164031982,
 0.48539999127388,
 0.6866000294685364,
 0.6546000242233276,
 0.6355999708175659,
 0.6445000171661377,
 0.6488999724388123,
 0.6886000037193298,
 0.6840999722480774,
 0.6948000192642212,
 0.6952999830245972,
 0.671999990940094,
 0.6729000210762024,
 0.6859999895095825,
 0.6485000252723694,
 0.6401000022888184]

In [16]:
# import matplotlib.pyplot as plt
# plt.figure(figsize=(10, 6))
# plt.subplot(131)
# plt.boxplot(results["test_model_1"])
# plt.ylim([0.63, 0.65])
# plt.subplot(132)
# plt.boxplot(ass_m["test_model_1"])
# plt.ylim([0.63, 0.65])
# plt.subplot(133)
# plt.boxplot(random_m["test_model_1"])
# plt.ylim([0.63, 0.65])
# plt.show()

In [17]:
import scipy.stats
scipy.stats.ttest_ind(abc * 2, model_scores * 2)

TtestResult(statistic=-12.31960666634431, pvalue=2.9282921617593514e-26, df=198.0)

In [55]:
np.mean(sorted(model_scores)[5:]), np.mean(sorted(abc)[5:])

(0.671435554822286, 0.6663088891241286)

In [58]:
(np.mean(sorted(model_scores)[10:]) - np.mean(sorted(abc)[10:]) ) * 100

0.7122500240802743