In [1]:
import numpy as np
import tensorflow as tf
from curriculum_learning.models.classifier_model import ClassifierModel
from curriculum_learning import utils
import yaml
from tqdm import tqdm
from sklearn.model_selection import train_test_split

In [2]:
with open("models_hyperparameters.yaml", "r") as stream:
    models_hyperparameters = yaml.safe_load(stream)
    
with open("config_tests.yaml", "r") as stream:
    config_tests = yaml.safe_load(stream)

In [23]:
N_EPOCHS = 30
N_TRIALS = 50
BATCH_SIZE = 512

CONFIG = config_tests["progressive_proba_best"]

loss = tf.keras.losses.SparseCategoricalCrossentropy()

In [24]:
x, y = utils.load_cifar_data("../data/cifar-10-batches-py/")
x /= 255

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.4, random_state=42)
x_test, x_val, y_test, y_val = train_test_split(x_test, y_test, test_size=0.5, random_state=42)

n_classes = len(np.unique(y))
train_size = x_train.shape[0]

In [25]:
assessment_model = ClassifierModel(output_shape=n_classes, **models_hyperparameters["assessment_model"])

assessment_model.compile(loss=loss, metrics=["accuracy"])
assessment_model(x_train[:1])
assessment_model.load_weights("../models/assessment_model.weights.h5")

In [26]:
assessment_model.evaluate(x_test, y_test)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.6241 - loss: 1.0372


[1.029136061668396, 0.6295999884605408]

In [27]:
x_train_sorted = x_train[np.argsort(y_train)]
y_train_sorted = y_train[np.argsort(y_train)]
_, counts = np.unique(y_train_sorted, return_counts=True)

samples_proba = utils.calculate_proba(
    assessment_model, x_train_sorted, y_train_sorted, counts, CONFIG["negative_loss"]
)

In [28]:
model = ClassifierModel(output_shape=n_classes, **models_hyperparameters["test_model_1"])
model.compile(optimizer="adam", loss=loss, metrics=["accuracy"])
model(x_train[0:1])
### model.save_weights("../models/default_model.weights.h5")
model.load_weights("../models/default_model.weights.h5")
model_weights = model.get_weights()

In [None]:
results = {}
model_scores = []

for _ in tqdm(range(N_TRIALS)):
    model.set_weights(model_weights)
        
    for i in range(N_EPOCHS):
        n_samples = int(np.tanh(4 * (i + 1) / N_EPOCHS) * train_size)
        
        samples_ids = utils.chose_samples(n_samples, samples_proba, CONFIG["order_type"])
        model.fit(
            x_train_sorted[samples_ids],
            y_train_sorted[samples_ids],
            # validation_data=(x_val, y_val),
            epochs=1,
            batch_size=BATCH_SIZE,
            verbose=0,
        )
        
        if CONFIG["progressive"]:
            samples_proba = utils.calculate_proba(
                model, x_train_sorted, y_train_sorted, counts, CONFIG["negative_loss"]
            )
            
    _, accuracy = model.evaluate(x_test, y_test, batch_size=BATCH_SIZE, verbose=1)
    model_scores.append(accuracy)

    results["test_model_1"] = model_scores

  0%|          | 0/50 [00:00<?, ?it/s]

In [20]:
results

{'test_model_1': [0.6618000268936157,
  0.6466000080108643,
  0.625,
  0.6243000030517578,
  0.6646000146865845,
  0.6240000128746033,
  0.6625000238418579,
  0.6847000122070312,
  0.6686999797821045,
  0.6251000165939331,
  0.6370000243186951,
  0.6363999843597412,
  0.6685000061988831,
  0.5289000272750854,
  0.6320000290870667,
  0.650600016117096,
  0.6658999919891357,
  0.6388999819755554,
  0.6471999883651733,
  0.6643999814987183,
  0.5927000045776367,
  0.6323000192642212,
  0.6427000164985657,
  0.6633999943733215,
  0.6328999996185303,
  0.6157000064849854,
  0.6482999920845032,
  0.671999990940094,
  0.6280999779701233,
  0.6459000110626221,
  0.6155999898910522,
  0.680899977684021,
  0.6488000154495239,
  0.5845999717712402,
  0.6744999885559082,
  0.5877000093460083,
  0.6427000164985657,
  0.6291000247001648,
  0.6870999932289124,
  0.5861999988555908,
  0.5940999984741211,
  0.6732000112533569,
  0.5968000292778015,
  0.646399974822998,
  0.6489999890327454,
  0.6701999

In [21]:
sum(results["test_model_1"]) / N_TRIALS

0.6370920014381408

In [11]:
sum(results["test_model_1"]) / N_TRIALS

0.6341319984197616

In [12]:
# import scipy.stats
# scipy.stats.ttest_ind(abc, results["test_model_1"])

In [22]:
abc = [0.6812000274658203,
  0.6647999882698059,
  0.6518999934196472,
  0.6837000250816345,
  0.6197999715805054,
  0.6370000243186951,
  0.6348999738693237,
  0.5795999765396118,
  0.597100019454956,
  0.6399000287055969,
  0.6011999845504761,
  0.6022999882698059,
  0.6406999826431274,
  0.645799994468689,
  0.6207000017166138,
  0.6618000268936157,
  0.5845999717712402,
  0.5770999789237976,
  0.6766999959945679,
  0.6568999886512756,
  0.6427000164985657,
  0.609499990940094,
  0.6531000137329102,
  0.6708999872207642,
  0.6642000079154968,
  0.6219000220298767,
  0.6248999834060669,
  0.6446999907493591,
  0.6068999767303467,
  0.6470999717712402,
  0.6611999869346619,
  0.6348000168800354,
  0.6656000018119812,
  0.6819999814033508,
  0.6370000243186951,
  0.6762999892234802,
  0.600600004196167,
  0.6376000046730042,
  0.4887000024318695,
  0.5899999737739563,
  0.663100004196167,
  0.65420001745224,
  0.6169000267982483,
  0.599399983882904,
  0.6161999702453613,
  0.6198999881744385,
  0.6546000242233276,
  0.6581000089645386,
  0.656499981880188,
  0.6503000259399414]

sum(abc) / 50

0.6341319984197616