In [115]:
import numpy as np
import tensorflow as tf
from curriculum_learning.models.classifier_model import ClassifierModel
from curriculum_learning import utils
import yaml
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from PIL import Image
import matplotlib.pyplot as plt
import pathlib

In [116]:
def load_class_data(filepath):
    class_data = []
    
    for img_path in pathlib.Path(filepath).iterdir():
        img = np.array(Image.open(img_path))
        if img.shape == (150, 150, 3):
            class_data.append(img)
     
    return np.array(class_data)

In [117]:
buildings = load_class_data("../data/data2/seg_train/seg_train/buildings")
street = load_class_data("../data/data2/seg_train/seg_train/street")
forest = load_class_data("../data/data2/seg_train/seg_train/forest")
glacier = load_class_data("../data/data2/seg_train/seg_train/glacier")
sea = load_class_data("../data/data2/seg_train/seg_train/sea")
mountain = load_class_data("../data/data2/seg_train/seg_train/mountain")

In [118]:
x = np.concatenate([buildings, street, forest, glacier, sea, mountain])
y = np.concatenate(
    [
        [0] * buildings.shape[0],
        [1] * street.shape[0],
        [2] * forest.shape[0],
        [3] * glacier.shape[0],
        [4] * sea.shape[0],
        [5] * mountain.shape[0],
    ]
)

In [119]:
N_EPOCHS = 10
N_TRIALS = 3
BATCH_SIZE = 128

loss = tf.keras.losses.SparseCategoricalCrossentropy()

In [120]:
with open("models_hyperparameters.yaml", "r") as stream:
    models_hyperparameters = yaml.safe_load(stream)
    
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)
x_test, x_val, y_test, y_val = train_test_split(x_test, y_test, test_size=0.5, random_state=42)

train_size = x_train.shape[0]

x_train = x_train / 255
x_val = x_val / 255
x_test = x_test / 255

In [122]:
assessment_model = ClassifierModel(output_shape=10, **models_hyperparameters["assessment_model"])

assessment_model.compile(optimizer='adam', loss=loss, metrics=['accuracy'])
        
assessment_model.fit(x_train, y_train, epochs=10, batch_size=BATCH_SIZE)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x29c8c4c40>

In [108]:
y_pred_assessment = assessment_model.predict(x_train, verbose=0)

losses_assessment = np.array(utils.calculate_loss_per_sample(y_train, y_pred_assessment, loss))
losses_proba = np.exp(losses_assessment) / sum(np.exp(losses_assessment))

In [109]:
results = {}

# test_models = ["test_model_1", "test_model_2", "test_model_3"]
test_models = ["test_model_1"]

In [110]:
for test_model in test_models:
    print(test_model)
    model_scores = []

    for _ in tqdm(range(N_TRIALS)):
        model = ClassifierModel(output_shape=10, **models_hyperparameters[test_model])
        
        model.compile(optimizer='adam', loss=loss, metrics=['accuracy'])

        for i in range(N_EPOCHS):
            n_samples = int(np.tanh(4 * (i + 1) / N_EPOCHS) * train_size)
            samples_ids = np.random.choice(range(train_size), p=losses_proba, size=n_samples, replace=False)
            
            model.fit(
                x_train[samples_ids], y_train[samples_ids], 
                validation_data=(x_val, y_val),
                epochs=1, batch_size=BATCH_SIZE, verbose=1
            )
        
        _, accuracy = model.evaluate(x_test, y_test, batch_size=BATCH_SIZE, verbose=1)
        
        model_scores.append(accuracy)
    
    results[test_model] = model_scores

test_model_1


  0%|          | 0/3 [00:00<?, ?it/s]



 33%|███▎      | 1/3 [02:43<05:26, 163.14s/it]



 67%|██████▋   | 2/3 [05:24<02:42, 162.20s/it]



100%|██████████| 3/3 [08:16<00:00, 165.34s/it]


In [113]:
results

{'test_model_1': [0.704081654548645, 0.5102040767669678, 0.5072886347770691]}

In [114]:
sum(results["test_model_1"]) / 3

0.5738581220308939

In [None]:
{'test_model_1': [0.5553935766220093, 0.5058308839797974, 0.508746325969696]}
0.5233235955238342