In [None]:
import numpy as np
import tensorflow as tf
from curriculum_learning.models.classifier_model import ClassifierModel
from curriculum_learning import utils
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import tensorflow_datasets as tfds

In [None]:
DATASET_NAME = "eurosat"

N_EPOCHS_CL = 50
N_TRIALS = 50
BATCH_SIZE = 512

model_fit_base_params = {
    "batch_size": BATCH_SIZE,
    "shuffle": True,
    "verbose": 0,
}
model_fit_1_params = model_fit_base_params | {"epochs": 1}
model_fit_500_params = model_fit_base_params | {"epochs": 500}

loss = tf.keras.losses.SparseCategoricalCrossentropy()

In [None]:
x = []
y = []

ds_1 = tfds.load(DATASET_NAME, split="train", as_supervised=True, shuffle_files=False)
for x_, y_ in ds_1.as_numpy_iterator():
    x.append(x_)
    y.append(y_)
    
# ds_2 = tfds.load(DATASET_NAME, split="test", as_supervised=True, shuffle_files=False)
# for x_, y_ in ds_2.as_numpy_iterator():
#     x.append(x_)
#     y.append(y_)

x = np.array(x, dtype=np.float32) / 255
y = np.array(y, dtype=np.float32)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42, stratify=y)
x_test, x_val, y_test, y_val = train_test_split(x_test, y_test, test_size=0.5, random_state=42, stratify=y_test)

n_classes = len(np.unique(y))
train_size = x_train.shape[0]

x_train_sorted = x_train[np.argsort(y_train)]
y_train_sorted = y_train[np.argsort(y_train)]
_, counts = np.unique(y_train_sorted, return_counts=True)

train_size, len(x_val), len(x_test), train_size + len(x_val) + len(x_test)

In [None]:
model = ClassifierModel(output_shape=n_classes, **utils.MODEL_ARCHITECTURE)
model.compile(optimizer="adam", loss=loss, metrics=["accuracy"])
model(x_train[0:1])
model.load_weights(f"../models/default_model_weights_{DATASET_NAME}.weights.h5")
model_weights = model.get_weights()

In [None]:
def run_experiments(value_type: str, order_type: utils.OrderType, name: str):
    best_model_weights = model.get_weights()
    acc = []
    
    re_ma = []
    re_wg = []
    
    pr_ma = []
    pr_wg = []
    
    f1_ma = []
    f1_wg = []
    
    if value_type == "edges":
        samples_values = utils.calculate_values_edges(x_train_sorted, blur=True)
        samples_proba = utils.normalize_values_per_group(samples_values, counts)
    
    for _ in tqdm(range(N_TRIALS)):
        model.set_weights(model_weights)
    
        for i in range(N_EPOCHS_CL):
            n_samples = int(np.tanh(4 * (i + 1) / N_EPOCHS_CL) * train_size)

            if value_type == "losses":
                samples_values = utils.calculate_values_losses(model, x_train_sorted, y_train_sorted, batch_size=BATCH_SIZE)
                samples_proba = utils.normalize_values_per_group(samples_values, counts)

            samples_ids = utils.chose_samples(n_samples, samples_proba, order_type)
    
            model.fit(x_train_sorted[samples_ids], y_train_sorted[samples_ids], **model_fit_1_params)
    
        model.fit(
            x_train_sorted, y_train_sorted, validation_data=(x_val, y_val), **model_fit_500_params,
            callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', restore_best_weights=True, patience=5, start_from_epoch=10)]
        )
              
        y_pred = np.argmax(model.predict(x_test, batch_size=BATCH_SIZE, verbose=0), axis=1)  
        utils.calculate_metrics(y_test, y_pred, acc, re_ma, re_wg, pr_ma, pr_wg, f1_ma, f1_wg)
        if acc[-1] >= max(acc):
            best_model_weights = model.get_weights()
            
        print(f"Mean accuracy: {np.mean(acc):.4f}, Current accuracy: {acc[-1]:.4f}")
        
    df_scores = utils.create_df_scores(acc, re_ma, re_wg, pr_ma, pr_wg, f1_ma, f1_wg)
    df_scores.to_csv(f"../data/results/{name}.csv")
    
    model.set_weights(best_model_weights)
    model.save_weights(f"../models/best/{name}.weights.h5")

# Edges

In [None]:
vals = "edges"
ot = utils.OrderType.PROBA
run_experiments(vals, ot, name=f"{vals}_{ot.value}_{DATASET_NAME}")

In [None]:
vals = "edges"
ot = utils.OrderType.FIXED
run_experiments(vals, ot, name=f"{vals}_{ot.value}_{DATASET_NAME}")

# Losses

In [None]:
model.load_weights(f"../models/best/edges_fixed_eurosat.weights.h5")

In [None]:
model.evaluate(x_test, y_test)

In [ ]:
vals = "losses"
ot = utils.OrderType.PROBA
run_experiments(vals, ot, name=f"{vals}_{ot.value}_{DATASET_NAME}")

In [ ]:
vals = "losses"
ot = utils.OrderType.FIXED
run_experiments(vals, ot, name=f"{vals}_{ot.value}_{DATASET_NAME}")

# Random

In [None]:
def run_experiments_random(name: str):    
    best_model_weights = model.get_weights()
    acc = []
    
    re_ma = []
    re_wg = []
    
    pr_ma = []
    pr_wg = []
    
    f1_ma = []
    f1_wg = []
    
    for _ in tqdm(range(N_TRIALS)):
        model.set_weights(model_weights)
    
        model.fit(
            x_train_sorted, y_train_sorted, validation_data=(x_val, y_val), **model_fit_500_params,
            callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', restore_best_weights=True, patience=5, start_from_epoch=35)]
        )
    
        y_pred = np.argmax(model.predict(x_test, batch_size=BATCH_SIZE, verbose=0), axis=1)  
        utils.calculate_metrics(y_test, y_pred, acc, re_ma, re_wg, pr_ma, pr_wg, f1_ma, f1_wg)
        if acc[-1] > max(acc):
            best_model_weights = model.get_weights()
            
        print(f"Mean accuracy: {np.mean(acc):.4f}, Current accuracy: {acc[-1]:.4f}")
        
    df_scores = utils.create_df_scores(acc, re_ma, re_wg, pr_ma, pr_wg, f1_ma, f1_wg)
    df_scores.to_csv(f"../data/results/{name}.csv")

    model.set_weights(best_model_weights)
    model.save_weights(f"../models/best/{name}.weights.h5")

In [None]:
run_experiments_random(name=f"random_{DATASET_NAME}")