In [1]:
import numpy as np
import tensorflow as tf
from curriculum_learning.models.classifier_model import ClassifierModel
from curriculum_learning import utils
import yaml
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from scipy import stats

In [2]:
with open("models_hyperparameters.yaml", "r") as stream:
    models_hyperparameters = yaml.safe_load(stream)
    
with open("config_tests.yaml", "r") as stream:
    config_tests = yaml.safe_load(stream)

In [3]:
N_EPOCHS = 25
N_TRIALS = 50
BATCH_SIZE = 128

CONFIG = config_tests["random"]

loss = tf.keras.losses.SparseCategoricalCrossentropy()

In [5]:
x, y = utils.load_cifar_data("../data/cifar-10-batches-py/")

In [6]:
# filepath_prefix = "../data/data2/seg_train/seg_train/"
#
# buildings = load_class_data(f"{filepath_prefix}buildings")
# street = load_class_data(f"{filepath_prefix}street")
# forest = load_class_data(f"{filepath_prefix}forest")
# glacier = load_class_data(f"{filepath_prefix}glacier")
# sea = load_class_data(f"{filepath_prefix}sea")
# mountain = load_class_data(f"{filepath_prefix}mountain")

In [7]:
# x = np.concatenate([buildings, street, forest, glacier, sea, mountain])
# y = np.concatenate(
#     [
#         [0] * buildings.shape[0],
#         [1] * street.shape[0],
#         [2] * forest.shape[0],
#         [3] * glacier.shape[0],
#         [4] * sea.shape[0],
#         [5] * mountain.shape[0],
#     ]
# )

In [8]:
x /= 255

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
x_test, x_val, y_test, y_val = train_test_split(x_test, y_test, test_size=0.5, random_state=42)

n_classes = len(np.unique(y))
train_size = x_train.shape[0]

In [9]:
assessment_model = ClassifierModel(output_shape=n_classes, **models_hyperparameters["assessment_model"])

assessment_model.compile(optimizer="adam", loss=loss, metrics=["accuracy"])

assessment_model.fit(x_train, y_train, epochs=25, batch_size=BATCH_SIZE)

Epoch 1/25
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 16ms/step - accuracy: 0.1913 - loss: 2.2716
Epoch 2/25
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 16ms/step - accuracy: 0.3746 - loss: 1.6959
Epoch 3/25
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 17ms/step - accuracy: 0.4243 - loss: 1.5567
Epoch 4/25
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - accuracy: 0.4642 - loss: 1.4694
Epoch 5/25
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 19ms/step - accuracy: 0.4825 - loss: 1.4190
Epoch 6/25
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 20ms/step - accuracy: 0.5085 - loss: 1.3556
Epoch 7/25
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 21ms/step - accuracy: 0.5216 - loss: 1.3243
Epoch 8/25
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 20ms/step - accuracy: 0.5346 - loss: 1.2947
Epoch 9/25
[1m313/313[0m [32m

<keras.src.callbacks.history.History at 0x315c61130>

In [10]:
x_train_sorted = x_train[np.argsort(y_train)]
y_train_sorted = y_train[np.argsort(y_train)]
_, counts = np.unique(y_train_sorted, return_counts=True)

samples_proba = utils.calculate_proba(
    assessment_model, x_train_sorted, y_train_sorted, counts, CONFIG["negative_loss"]
)

In [None]:
results = {}
# test_models = ["test_model_1", "test_model_2", "test_model_3"]
test_models = ["test_model_1"]


for test_model in test_models:
    print(test_model)
    model_scores = []

    for _ in tqdm(range(N_TRIALS)):
        model = ClassifierModel(output_shape=n_classes, **models_hyperparameters[test_model])
        model.compile(optimizer="adam", loss=loss, metrics=["accuracy"])

        for i in range(N_EPOCHS):
            n_samples = int(np.tanh(4 * (i + 1) / N_EPOCHS) * train_size)

            utils.chose_samples(n_samples, samples_proba, CONFIG["order_type"])
            samples_ids = np.random.choice(range(train_size), size=n_samples, replace=False)

            model.fit(
                x_train_sorted[samples_ids],
                y_train_sorted[samples_ids],
                validation_data=(x_val, y_val),
                epochs=1,
                batch_size=BATCH_SIZE,
                verbose=0,
            )
            
            if CONFIG["progressive"]:
                samples_proba = utils.calculate_proba(
                    model, x_train_sorted, y_train_sorted, counts, CONFIG["negative_loss"]
                )

        _, accuracy = model.evaluate(x_test, y_test, batch_size=BATCH_SIZE, verbose=1)

        model_scores.append(accuracy)

    results[test_model] = model_scores

test_model_1


  0%|          | 0/50 [00:00<?, ?it/s]

[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.6405 - loss: 1.0358


  2%|▏         | 1/50 [04:19<3:31:44, 259.27s/it]

[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.6368 - loss: 1.0522


  4%|▍         | 2/50 [08:23<3:20:21, 250.46s/it]

In [None]:
results

In [None]:
sum(results["test_model_1"]) / N_TRIALS