In [3]:
import numpy as np
import tensorflow as tf
from curriculum_learning.models.classifier_model import ClassifierModel
from curriculum_learning import utils
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import tensorflow_datasets as tfds

In [ ]:
DATASET_NAME = "eurosat"

In [53]:
N_EPOCHS_CL = 50
N_TRIALS = 1
BATCH_SIZE = 512

model_fit_base_params = {
    "batch_size": BATCH_SIZE,
    "shuffle": True,
    "verbose": 0,
}
model_fit_1_params = model_fit_base_params | {"epochs": 1}
model_fit_500_params = model_fit_base_params | {"epochs": 500}

loss = tf.keras.losses.SparseCategoricalCrossentropy()

In [78]:
x = []
y = []

ds_1 = tfds.load(DATASET_NAME, split="train", as_supervised=True, shuffle_files=False)
for x_, y_ in ds_1.as_numpy_iterator():
    x.append(x_)
    y.append(y_)
    
# ds_2 = tfds.load(DATASET_NAME, split="test", as_supervised=True, shuffle_files=False)
# for x_, y_ in ds_2.as_numpy_iterator():
#     x.append(x_)
#     y.append(y_)

x = np.array(x, dtype=np.float32) / 255
y = np.array(y, dtype=np.float32)

2024-07-16 19:15:19.105334: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [79]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42, stratify=y)
x_test, x_val, y_test, y_val = train_test_split(x_test, y_test, test_size=0.5, random_state=42, stratify=y_test)

n_classes = len(np.unique(y))
train_size = x_train.shape[0]

x_train_sorted = x_train[np.argsort(y_train)]
y_train_sorted = y_train[np.argsort(y_train)]
_, counts = np.unique(y_train_sorted, return_counts=True)

train_size, len(x_val), len(x_test), train_size + len(x_val) + len(x_test)

(18900, 4050, 4050, 27000)

In [80]:
model = ClassifierModel(output_shape=n_classes, **utils.MODEL_ARCHITECTURE)
model.compile(optimizer="adam", loss=loss, metrics=["accuracy"])
model(x_train[0:1])
model.load_weights(f"../models/default_model_weights_{DATASET_NAME}.weights.h5")
model_weights = model.get_weights()

In [81]:
def run_experiments(value_type: str, order_type: utils.OrderType, name: str):
    acc = []
    re_mi = []
    re_ma = []
    pr_mi = []
    pr_ma = []
    f1_mi = []
    f1_ma = []
    best_model_weights = model.get_weights()
    
    if value_type == "edges":
        samples_values = utils.calculate_values_edges(x_train_sorted, blur=True)
        samples_proba = utils.normalize_values_per_group(samples_values, counts)
    
    for _ in tqdm(range(N_TRIALS)):
        model.set_weights(model_weights)
    
        for i in range(N_EPOCHS_CL):
            n_samples = int(np.tanh(4 * (i + 1) / N_EPOCHS_CL) * train_size)

            if value_type == "losses":
                samples_values = utils.calculate_values_losses(model, x_train_sorted, y_train_sorted, batch_size=BATCH_SIZE)
                samples_proba = utils.normalize_values_per_group(samples_values, counts)

            samples_ids = utils.chose_samples(n_samples, samples_proba, order_type)
    
            model.fit(x_train_sorted[samples_ids], y_train_sorted[samples_ids], **model_fit_1_params)
    
        model.fit(
            x_train_sorted, y_train_sorted, validation_data=(x_val, y_val), **model_fit_500_params,
            callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', restore_best_weights=True, patience=5, start_from_epoch=10)]
        )
              
        y_pred = np.argmax(model.predict(x_test, batch_size=BATCH_SIZE, verbose=0), axis=1)  
        utils.calculate_metrics(y_test, y_pred, acc, re_mi, re_ma, pr_mi, pr_ma, f1_mi, f1_ma)
        if acc[-1] >= max(acc):
            best_model_weights = model.get_weights()
            
        print(f"Mean accuracy: {np.mean(acc):.4f}, Current accuracy: {acc[-1]:.4f}")
        
    df_scores = utils.create_df_scores(acc, re_mi, re_ma, pr_mi, pr_ma, f1_mi, f1_ma)
    df_scores.to_csv(f"../data/results/{name}.csv")
    
    model.set_weights(best_model_weights)
    model.save_weights(f"../models/best/{name}.weights.h5")

# Edges

In [None]:
from sklearn import metrics
y_pred = np.argmax(model.predict(x_test, batch_size=BATCH_SIZE, verbose=0), axis=1)

In [75]:
print(metrics.classification_report(y_test, y_pred, digits=5))

              precision    recall  f1-score   support

         0.0    0.84831   0.77436   0.80965       195
         1.0    0.56983   0.52308   0.54545       195
         2.0    0.73460   0.79487   0.76355       195
         3.0    0.37809   0.54872   0.44770       195
         4.0    0.58511   0.56410   0.57441       195
         5.0    0.38953   0.34359   0.36512       195
         6.0    0.73826   0.56410   0.63953       195
         7.0    0.50909   0.57436   0.53976       195
         8.0    0.74299   0.81538   0.77751       195
         9.0    0.70513   0.56410   0.62678       195

    accuracy                        0.60667      1950
   macro avg    0.62009   0.60667   0.60895      1950
weighted avg    0.62009   0.60667   0.60895      1950


In [ ]:
metrics.recall_score(y_test, y_pred, average=None)

In [ ]:
metrics.recall_score(y_test, y_pred, average="weighted")

In [76]:
metrics.recall_score(y_test, y_pred, average="macro")

0.6066666666666667

In [77]:
metrics.precision_score(y_test, y_pred, average="macro")

0.6200942106847257

In [None]:
vals = "edges"
ot = utils.OrderType.PROBA
run_experiments(vals, ot, name=f"{vals}_{ot.value}_{DATASET_NAME}")

  0%|          | 0/1 [00:00<?, ?it/s]

In [ ]:
vals = "edges"
ot = utils.OrderType.FIXED
run_experiments(vals, ot, name=f"{vals}_{ot.value}_stl10")

# Losses

In [ ]:
vals = "losses"
ot = utils.OrderType.PROBA
run_experiments(vals, ot, name=f"{vals}_{ot.value}_stl10")

In [ ]:
vals = "losses"
ot = utils.OrderType.FIXED
run_experiments(vals, ot, name=f"{vals}_{ot.value}_stl10")

# Random

In [ ]:
def run_experiments_random(name: str):    
    acc = []
    re_mi = []
    re_ma = []
    pr_mi = []
    pr_ma = []
    f1_mi = []
    f1_ma = []
    best_model_weights = model.get_weights()
    
    for _ in tqdm(range(N_TRIALS)):
        model.set_weights(model_weights)
    
        model.fit(
            x_train_sorted, y_train_sorted, validation_data=(x_val, y_val), **model_fit_500_params,
            callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', restore_best_weights=True, patience=5, start_from_epoch=35)]
        )
    
        y_pred = np.argmax(model.predict(x_test, batch_size=BATCH_SIZE, verbose=0), axis=1)  
        utils.calculate_metrics(y_test, y_pred, acc, re_mi, re_ma, pr_mi, pr_ma, f1_mi, f1_ma)
        if acc[-1] > max(acc):
            best_model_weights = model.get_weights()
            
        print(f"Mean accuracy: {np.mean(acc):.4f}, Current accuracy: {acc[-1]:.4f}")
        
    df_scores = utils.create_df_scores(acc, re_mi, re_ma, pr_mi, pr_ma, f1_mi, f1_ma)
    df_scores.to_csv(f"../data/results/{name}.csv")

    model.set_weights(best_model_weights)
    model.save_weights(f"../models/best/{name}.weights.h5")

In [None]:
# with open('RANDOM.txt', 'w') as f:
#     for score in model_scores_random:
#         f.write(f"{score}\n")

In [None]:
# model_scores_random

In [None]:
# my_dict = {
#     'model_scores_random': model_scores_random,
#     'model_scores_proba_edges': model_scores_proba_edges,
#     'model_scores_fixed_edges': model_scores_fixed_edges,
#     'model_scores_proba_loss': model_scores_proba_loss,
#     'model_scores_fixed_loss': model_scores_fixed_loss,
# }
# 
# fig, ax = plt.subplots(figsize=(12, 4))
# ax.boxplot(my_dict.values())
# ax.set_xticklabels(my_dict.keys())
# plt.ylim([0.725, 0.865])