In [None]:
from abc import ABC, abstractmethod, abstractstaticmethod

import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers import (
    Add,
    BatchNormalization,
    Conv2D,
    Dense,
    Dropout,
    Embedding,
    Flatten,
    Input,
    Layer,
    LayerNormalization,
    MaxPool2D,
    MultiHeadAttention,
    Normalization,
    RandomContrast,
    RandomFlip,
    RandomRotation, # zamula
    RandomTranslation,
    RandomZoom,
    Resizing
)
import keras_tuner as kt
import tensorflow_addons as tfa

In [None]:
# check if GPU is detected:
len(tf.config.list_physical_devices('GPU')) > 0

In [None]:
# project:

# np.random_seed(5) # uncommend for repetitive results

In [None]:
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
x_train = x_train/255.0  # normalization
x_test = x_test/255.0
x_train, x_validation, y_train, y_validation = train_test_split(x_train, y_train, train_size=0.8)

In [None]:
def plot_accuracy(history, model_name):
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='upper left')
    plt.title(model_name)
    plt.show()

def plot_loss(history, model_name):
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='upper left')
    plt.title(model_name)
    plt.show()
def plot_history(history, model_name):
    plot_accuracy(history, model_name)
    plot_loss(history, model_name)

In [None]:
cifar_id_to_names = {
    0: "airplane",
    1: "automobile",
    2: "bird",
    3: "cat",
    4: "deer",
    5: "dog",
    6: "frog",
    7: "horse",
    8: "ship",
    9: "truck",
}

cifar_names_to_id = {
    "airplane": 0,
    "automobile": 1,
    "bird": 2,
    "cat": 3,
    "deer": 4,
    "dog": 5,
    "frog": 6,
    "horse": 7,
    "ship": 8,
    "truck": 9,
}

In [None]:
class HPConfiguration(ABC):
    
    @abstractstaticmethod
    def build_hp_model(hp):
        pass
    
    @abstractstaticmethod
    def get_tuner():
        pass
    
    @abstractstaticmethod
    def get_callbacks():
        pass
    
    @abstractstaticmethod
    def get_tuner_callbacks():
        pass


In [None]:
# neural networks

class SuperSimpleConvModel(keras.Model, HPConfiguration):
    def __init__(self, 
                 n_filters, 
                 dense_units,
                 **kwargs):
        super().__init__(kwargs)

        self.conv2D_1 = Conv2D(filters=n_filters,kernel_size=(4,4),input_shape=(32,32,3),activation='relu')
        self.max_pool2D_1 = MaxPool2D(pool_size=(2,2))
        self.flatten = Flatten()
        self.dense_1 = Dense(dense_units,activation='relu')
        self.dense_2 = Dense(10,activation='softmax')
        
    def call(self, inputs, training=True):
        #print(inputs)
        x =  self.conv2D_1(inputs)
        x = self.max_pool2D_1(x)
        
        x = self.flatten(x)
        x = self.dense_1(x)
        x = self.dense_2(x)
        return x
   
    def build_hp_model(hp):
        n_filters = hp.Int("n_filters", min_value=4, max_value=32, step=2, sampling="log")
        dense_units = hp.Int("dense_units", min_value=16, max_value=256, step=2, sampling="log")
        model = SuperSimpleConvModel(
            n_filters=n_filters, dense_units=dense_units
        )
        model.compile(loss='sparse_categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
        return model

    def get_tuner():
        return kt.Hyperband(SuperSimpleConvModel.build_hp_model,
                            objective='val_accuracy',
                            #overwrite=True,
                            max_epochs=50,
                            factor=3,
                            directory='tuner/SuperSimpleConvModel',
                            project_name='model'
                           )
    def get_callbacks():
        return [
            tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20),
            #tf.keras.callbacks.ModelCheckpoint(filepath="models/SuperSimpleConvModel", save_best_only=True)
        ]
    
    def get_tuner_callbacks():
        return [
            tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20),
        ]
    
    def get_best_model():
        tuner = SuperSimpleConvModel.get_tuner()
        best_parameters = tuner.get_best_hyperparameters(num_trials=1)[0]
        return SuperSimpleConvModel.build_hp_model(best_parameters)
    
    
####################
class SimpleConvModel(keras.Model, HPConfiguration):
    def __init__(self, 
                 n_filters_1,
                 n_filters_2,
                 dense_units_1,
                 **kwargs):
        super().__init__(kwargs)

        self.conv2D_1 = Conv2D(filters=n_filters_1,kernel_size=(4,4),input_shape=(32,32,3),activation='relu')
        self.max_pool2D_1 = MaxPool2D(pool_size=(2,2))
        self.conv2D_2 = Conv2D(filters=n_filters_2,kernel_size=(4,4),input_shape=(32,32,3),activation='relu')
        self.max_pool2D_2 = MaxPool2D(pool_size=(2,2))
        self.flatten = Flatten()
        self.dense_1 = Dense(dense_units_1,activation='relu')
        self.dense_2 = Dense(10,activation='softmax')

    def call(self, inputs, training=True):
        #print(inputs)
        x =  self.conv2D_1(inputs)
        x = self.max_pool2D_1(x)
        x = self.conv2D_2(x)
        x = self.max_pool2D_2(x)

        x = self.flatten(x)
        x = self.dense_1(x)
        x = self.dense_2(x)
        return x

    def build_hp_model(hp):
        n_filters_1 = hp.Int("n_filters_1", min_value=4, max_value=32, step=2, sampling="log")
        n_filters_2 = hp.Int("n_filters_2", min_value=2, max_value=32, step=2, sampling="log")
        dense_units_1 = hp.Int("dense_units_1", min_value=16, max_value=256, step=2, sampling="log")
        model = SimpleConvModel(
            n_filters_1=n_filters_1, n_filters_2=n_filters_2, dense_units_1=dense_units_1
        )
        model.compile(loss='sparse_categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
        return model

    def get_tuner():
        return kt.Hyperband(SimpleConvModel.build_hp_model,
                            objective='val_accuracy',
                            #overwrite=True,
                            max_epochs=50,
                            factor=3,
                            directory='tuner/SimpleConvModel',
                            project_name='model'
                           )
    def get_callbacks():
        return [
            tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20),
            #tf.keras.callbacks.ModelCheckpoint(filepath="models/SimpleConvModel", save_best_only=True)
        ]
    
    def get_tuner_callbacks():
        return [
            tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20),
        ]
###########################
class SimpleConvDropoutModel(keras.Model, HPConfiguration):
    def __init__(self, 
                 n_filters_1,
                 n_filters_2,
                 dense_units_1,
                 **kwargs):
        super().__init__(kwargs)

        self.conv2D_1 = Conv2D(filters=n_filters_1,kernel_size=(4,4),input_shape=(32,32,3),activation='relu')
        self.max_pool2D_1 = MaxPool2D(pool_size=(2,2))
        self.conv2D_2 = Conv2D(filters=n_filters_2,kernel_size=(4,4),input_shape=(32,32,3),activation='relu')
        self.max_pool2D_2 = MaxPool2D(pool_size=(2,2))
        self.flatten = Flatten()
        self.dense_1 = Dense(dense_units_1,activation='relu')
        self.dense_2 = Dense(10,activation='softmax')
        
        self.dropout = Dropout(0.25)

    def call(self, inputs, training=True):
        #print(inputs)
        x =  self.conv2D_1(inputs)
        x = self.max_pool2D_1(x)
        x = self.conv2D_2(x)
        x = self.max_pool2D_2(x)
        if training:
            x = self.dropout(x)
        x = self.flatten(x)
        x = self.dense_1(x)
        x = self.dense_2(x)
        return x

    def build_hp_model(hp):
        n_filters_1 = hp.Int("n_filters_1", min_value=4, max_value=32, step=2, sampling="log")
        n_filters_2 = hp.Int("n_filters_2", min_value=2, max_value=32, step=2, sampling="log")
        dense_units_1 = hp.Int("dense_units_1", min_value=16, max_value=256, step=2, sampling="log")
        model = SimpleConvDropoutModel(
            n_filters_1=n_filters_1, n_filters_2=n_filters_2, dense_units_1=dense_units_1
        )
        model.compile(loss='sparse_categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
        return model

    def get_tuner():
        return kt.Hyperband(SimpleConvDropoutModel.build_hp_model,
                            objective='val_accuracy',
                            #overwrite=True,
                            max_epochs=50,
                            factor=3,
                            directory='tuner/SimpleConvDropoutModel',
                            project_name='model'
                           )
    def get_callbacks():
        return [
            tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20),
            tf.keras.callbacks.ModelCheckpoint(filepath="models/SimpleConvDropoutModel", save_best_only=True)
        ]
    
    def get_tuner_callbacks():
        return [
            tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20),
        ]
    
#######################
class SimpleConvDropoutAndBatchnormModel(keras.Model, HPConfiguration):
    def __init__(self, 
                 n_filters_1,
                 n_filters_2,
                 dense_units_1,
                 **kwargs):
        super().__init__(kwargs)

        self.conv2D_1 = Conv2D(filters=n_filters_1,kernel_size=(4,4),input_shape=(32,32,3),activation='relu')
        self.max_pool2D_1 = MaxPool2D(pool_size=(2,2))
        self.conv2D_2 = Conv2D(filters=n_filters_2,kernel_size=(4,4),input_shape=(32,32,3),activation='relu')
        self.max_pool2D_2 = MaxPool2D(pool_size=(2,2))
        self.flatten = Flatten()
        self.dense_1 = Dense(dense_units_1,activation='relu')
        self.dense_2 = Dense(10,activation='softmax')
        
        self.dropout = Dropout(0.25)
        self.batch_norm_1 = BatchNormalization()
        self.batch_norm_2 = BatchNormalization()

    def call(self, inputs, training=True):
        #print(inputs)
        x =  self.conv2D_1(inputs)
        # if training:
        x = self.batch_norm_1(x)
        x = self.max_pool2D_1(x)
        x = self.conv2D_2(x)
#         if training:
        x = self.batch_norm_2(x)
        x = self.max_pool2D_2(x)
        if training:
            x = self.dropout(x)
        x = self.flatten(x)
        x = self.dense_1(x)
        x = self.dense_2(x)
        return x

    def build_hp_model(hp):
        n_filters_1 = hp.Int("n_filters_1", min_value=4, max_value=32, step=2, sampling="log")
        n_filters_2 = hp.Int("n_filters_2", min_value=2, max_value=32, step=2, sampling="log")
        dense_units_1 = hp.Int("dense_units_1", min_value=16, max_value=256, step=2, sampling="log")
        model = SimpleConvDropoutAndBatchnormModel(
            n_filters_1=n_filters_1, n_filters_2=n_filters_2, dense_units_1=dense_units_1
        )
        model.compile(loss='sparse_categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
        return model

    def get_tuner():
        return kt.Hyperband(SimpleConvDropoutAndBatchnormModel.build_hp_model,
                            objective='val_accuracy',
                            #overwrite=True,
                            max_epochs=50,
                            factor=3,
                            directory='tuner/SimpleConvDropoutAndBatchnormModel',
                            project_name='model'
                           )
    def get_callbacks():
        return [
            tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20),
            tf.keras.callbacks.ModelCheckpoint(filepath="models/SimpleConvDropoutAndBatchnormModel", save_best_only=True)
        ]
    
    def get_tuner_callbacks():
        return [
            tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20),
        ]
    


In [None]:
model_classes = [SuperSimpleConvModel, SimpleConvModel] # tutaj dorzuć trzeci model do listy

# HYPERPARAMETER TUNING
for model_class in model_classes:
    print(model_class)
    # break
    tuner = model_class.get_tuner()
    #print(tuner)
    tuner.search(x_train, y_train, batch_size=256, epochs=50, validation_data=(x_validation, y_validation), callbacks=model_class.get_tuner_callbacks())
    best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]

In [None]:
#model_classes = [SimpleConvDropoutAndBatchnormModel]
model_classes = [SimpleConvModel]

# accuracy for every model (for training, validation and test set)
accuracy_results = {
    model_class.__name__ + "_" + str_type: [] for model_class in model_classes for str_type in ["train", "validation", "test"]
}

print(accuracy_results)

for model_class in model_classes:
    tuner = model_class.get_tuner()
    best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]
    print(best_hps.values)
    #best_hps.
    
    
    for replication in range(1):
        print(f'Starting {replication} iteration')
        best_model = model_class.build_hp_model(best_hps)
        ## DAMIAN 26.03
        #best_model = model_class(n_filters_1=32,
        #                                    n_filters_2=32,
        #                                    dense_units_1=64)
        #best_model.compile(loss='sparse_categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
        
        ## DAMIAN 26.03
        print(best_model)
        history = best_model.fit(x_train, y_train, epochs=200, validation_data=(x_validation, y_validation), batch_size=256, callbacks=model_class.get_callbacks(), verbose=1)
        
        ## DAMIAN 
        best_model = keras.models.load_model(f"models/{model_class.__name__}")
   # TODO: dorzucić wczytywanie wag najlepszego modelu (optymalizacja pod kątem max validation_accuracy)     
        print(f"model name: {model_class.__name__}")
        print(f"iteration: {replication}")
        train_loss, train_accuracy = best_model.evaluate(x=x_train, y=y_train)
        accuracy_results[model_class.__name__ + "_train"].append(train_accuracy)
        validation_loss, validation_accuracy = best_model.evaluate(x=x_validation, y=y_validation)
        accuracy_results[model_class.__name__ + "_validation"].append(validation_accuracy)
        test_loss, test_accuracy = best_model.evaluate(x=x_test, y=y_test)
        accuracy_results[model_class.__name__ + "_test"].append(test_accuracy)
        print(f"train accuracy: {train_accuracy}")
        print(f"validation accuracy: {validation_accuracy}")
        print(f"test accuracy: {test_accuracy}")
        plot_history(history, model_class.__name__)
        
        print('============ NEW ITERATION ============')

print(accuracy_results)

In [None]:
# wyniki accuracy results
for model_type, accuracy_list in accuracy_results.items():
    avg = np.average(accuracy_list)
    std = np.std(accuracy_list)
    minimal = np.min(accuracy_list)
    maximal = np.max(accuracy_list)
    print(f"mode: {model_type}, avg: {avg}, std: {std}, min: {minimal}, max: {maximal}")
    print('==============')

In [None]:
# comitee models
labels = ['Airplane', 'Automobile', 'Bird', 'Cat', 'Deer', 'Dog', 'Frog', 'Horse', 'Ship', 'Truck']
def drawConfMatrix(model, x_test, y_test, labels):
    predictions = model.predict(x_test, verbose=True)
    predictions = np.argmax(predictions, axis=1)
    confMat = ConfusionMatrixDisplay(confusion_matrix(y_test, predictions, normalize='true'), display_labels=labels)
    print('accuracy_score', accuracy_score(y_test, predictions))
    fig, ax = plt.subplots(figsize=(8,8))
    confMat.plot(ax = ax,  xticks_rotation = 'vertical')
    

In [None]:
model = keras.models.load_model(f"models/{model_class.__name__}")

drawConfMatrix(model, x_test, y_test, labels)

In [None]:
# Bird, Cat, Deer, Frog
bird_idx = 2
cat_idx = 3
deer_idx = 4
frog_idx = 6

animal_filter_train = np.logical_or.reduce((y_train == bird_idx, y_train == cat_idx, y_train == deer_idx, y_train == frog_idx))[...,0]
animal_filter_validation = np.logical_or.reduce((y_validation == bird_idx, y_validation == cat_idx, y_validation == deer_idx, y_validation == frog_idx))[...,0]
animal_filter_test = np.logical_or.reduce((y_test == bird_idx, y_test == cat_idx, y_test == deer_idx, y_test == frog_idx))[...,0]

x_train_animal = x_train[animal_filter_train]
y_train_animal = y_train[animal_filter_train]
x_validation_animal = x_validation[animal_filter_validation]
y_validation_animal = y_validation[animal_filter_validation]
x_test_animal = x_test[animal_filter_test]
y_test_animal = y_test[animal_filter_test]
print(animal_filter.shape)
print(x_train.shape)
print(x_train_animal.shape)

In [None]:
def map_from_animal_to_model(a):
    if a==2:
        return 0
    if a==3:
        return 1
    if a==4:
        return 2
    if a==6:
        return 3
def map_from_model_to_animal(a):
    if a==0:
        return 2
    if a==1:
        return 3
    if a==2:
        return 4
    if a==3:
        return 6
    
vectorized_map_from_animal_to_model = np.vectorize(map_from_animal_to_model)
vectorized_map_from_model_to_animal = np.vectorize(map_from_model_to_animal)

In [None]:
y_train_animal = vectorized_map_from_animal_to_model(y_train_animal)
y_validation_animal = vectorized_map_from_animal_to_model(y_validation_animal)
y_test_animal = vectorized_map_from_animal_to_model(y_test_animal)

In [None]:
class SimpleConvAnimalModel(keras.Model):
    def __init__(self, 
                 n_filters_1 =32,
                 n_filters_2 = 32,
                 dense_units_1 = 64,
                 **kwargs):
        super().__init__(kwargs)

        self.conv2D_1 = Conv2D(filters=n_filters_1,kernel_size=(4,4),input_shape=(32,32,3),activation='relu')
        self.max_pool2D_1 = MaxPool2D(pool_size=(2,2))
        self.conv2D_2 = Conv2D(filters=n_filters_2,kernel_size=(4,4),input_shape=(32,32,3),activation='relu')
        self.max_pool2D_2 = MaxPool2D(pool_size=(2,2))
        self.flatten = Flatten()
        self.dense_1 = Dense(dense_units_1,activation='relu')
        self.dense_2 = Dense(4,activation='softmax')

    def call(self, inputs, training=True):
        #print(inputs)
        x =  self.conv2D_1(inputs)
        x = self.max_pool2D_1(x)
        x = self.conv2D_2(x)
        x = self.max_pool2D_2(x)

        x = self.flatten(x)
        x = self.dense_1(x)
        x = self.dense_2(x)
        return x
animal_model = SimpleConvAnimalModel()
animal_model.compile(loss='sparse_categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
checkpoint_callback = keras.callbacks.ModelCheckpoint(
        "models/Animal",
        monitor="val_accuracy",
        save_best_only=True,
        # save_weights_only=True,
)
early_stopping_callback = keras.callbacks.EarlyStopping(
        monitor="val_accuracy",
        patience=20
    )
history = animal_model.fit(x_train_animal, y_train_animal, epochs=200, 
                           validation_data=(x_validation_animal, y_validation_animal), 
                           batch_size=256, 
                           callbacks=[checkpoint_callback, early_stopping_callback], 
                           verbose=1)

In [None]:
plot_history(history, "animal")

In [None]:
from copy import deepcopy
predictions = model.predict(x_test, verbose=True)
predictions = list(np.argmax(predictions, axis=1))
final_results = deepcopy(predictions)
for idx, single_prediction in enumerate(predictions):
    if single_prediction==bird_idx or single_prediction==cat_idx or single_prediction==deer_idx or single_prediction==frog_idx:
        image = x_test[idx]
        image = image[np.newaxis, ...]
        animal_prediction = animal_model.predict(image)
        animal_prediction = int(np.argmax(animal_prediction, axis=1))
        #print('animal_prediction', animal_prediction)
        primary_model_prediction = map_from_model_to_animal(animal_prediction)
        #print('primary_model_prediction', primary_model_prediction)
        final_results[idx] = primary_model_prediction
    if idx%100 == 0:
        print('idx:', idx)
    
    #confMat = ConfusionMatrixDisplay(confusion_matrix(y_test, predictions, normalize='true'), display_labels=labels)
    #fig, ax = plt.subplots(figsize=(8,8))
    #confMat.plot(ax = ax,  xticks_rotation = 'vertical')
comitee_y_results = np.array(final_results)

confMat = ConfusionMatrixDisplay(confusion_matrix(y_test, comitee_y_results, normalize='true'), display_labels=labels)
fig, ax = plt.subplots(figsize=(8,8))
confMat.plot(ax = ax,  xticks_rotation = 'vertical')

In [None]:


accuracy_score(y_test, comitee_y_results)

In [None]:
#for model_type, accuracy_list in accuracy_results.items():
#    plt.plot(accuracy_list, label=model_type)
#    #plt.violinplot(accuracy_list)
#plt.legend()
#y_min = min(min(accuracy_list) for accuracy_list in accuracy_results.values())
#y_max = max(max(accuracy_list) for accuracy_list in accuracy_results.values()) 
#plt.ylim(y_min, y_max*1.2)
##plt.show()

In [None]:
# dodać powtarzalnośc wyników 
# (czyli dla każdego modelu ze znalezionymi hyperparametrami należy puścić uczenie 5 razy 
# i zobaczyć jaka jest średnia i odchykebue standarowe)

# sprawdzić jakie klasy są najczęściej mylone i przygotować model 
# do rozpoznawania tylko tych mylących się klas. Połączyć następnie w całośc i sprawdzić wyniki

In [None]:
# testing augmentation impact (na jakiejś jednej dowolnej klasie żeby sprawdzić różne warianty jak wpływają na wynik)
# czyli np. 
# 1) 

In [None]:
# przygotować pretrenowane modele i sprawdzić wyniki (tutaj raczej nie trzeba wstawiać augmentacji danych).
# 

In [None]:
# augmentations variants (sprawdzanie która augmentacja ma największy wpływ)

augmentation_random_flip =keras.Sequential([
    RandomFlip("horizontal")
], name='augmentation_random_flip')

augmentation_random_zoom =keras.Sequential([
    RandomZoom(0.2)
], name='augmentation_random_zoom')

augmentation_random_rotation = keras.Sequential([
    RandomRotation(0.2)
], name='augmentation_random_rotation')


augmentation_random_translation = keras.Sequential([
    RandomTranslation(0.1, 0.1)
], name='augmentation_random_translation')

augmentation_random_contrast = keras.Sequential([
    RandomContrast(0.1)
], name='augmentation_random_contrast')

augmentation_combined = keras.Sequential([
    RandomFlip("horizontal"),
    RandomZoom(0.2),
    RandomRotation(0.2),
    RandomTranslation(0.1, 0.1),
    RandomContrast(0.1)
], name='augmentation_combined')

augmentation_random_flip_translation = keras.Sequential([
    RandomFlip("horizontal"),
    RandomTranslation(0.1, 0.1),
], name='augmentation_random_flip_translation')

augmentation_random_flip_translation_zoom = keras.Sequential([
    RandomFlip("horizontal"),
    RandomTranslation(0.1, 0.1),
    RandomZoom(0.2)
], name='augmentation_random_flip_translation_zoom')

In [None]:
augmentations = [
    #augmentation_random_flip,
    #augmentation_random_zoom,
    #augmentation_random_rotation,
    #augmentation_random_translation,
    #augmentation_random_contrast,
    augmentation_combined
    #augmentation_random_flip_translation,
    #augmentation_random_flip_translation_zoom
]

augmentation_results = {
    augmentation.name + "_" + str_type: [] for augmentation in augmentations for str_type in ["train", "validation", "test"]
}
print(augmentation_results)
for augmentation in augmentations:
    for replication in range(5):
        model = keras.Sequential([
        augmentation,
        SimpleConvModel(n_filters_1=32,
                       n_filters_2=32,
                       dense_units_1=64)])
        model.compile(loss='sparse_categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
        history = model.fit(x_train, 
                            y_train, 
                            epochs=400, 
                            validation_data=(x_validation, y_validation), 
                            batch_size=256, 
                            callbacks=SimpleConvModel.get_callbacks()
                            #callbacks=EarlyStopping(patience=50,monitor='val_loss')
                           )
        print(f"augmentation name: {augmentation.name}")
        print(f"iteration: {replication}")
        train_loss, train_accuracy = model.evaluate(x=x_train, y=y_train)
        augmentation_results[augmentation.name + "_train"].append(train_accuracy)
        validation_loss, validation_accuracy = model.evaluate(x=x_validation, y=y_validation)
        augmentation_results[augmentation.name + "_validation"].append(validation_accuracy)
        test_loss, test_accuracy = model.evaluate(x=x_test, y=y_test)
        augmentation_results[augmentation.name + "_test"].append(test_accuracy)
        print(f"train accuracy: {train_accuracy}")
        print(f"validation accuracy: {validation_accuracy}")
        print(f"test accuracy: {test_accuracy}")
        plot_history(history, augmentation.name)
        
        print('============ NEW ITERATION ============')

In [None]:
augmentation_results

In [None]:
# printing augmentation results
for augmentation_name, accuracy_list in augmentation_results.items():
    avg = np.average(accuracy_list)
    std = np.std(accuracy_list)
    minimal = np.min(accuracy_list)
    maximal = np.max(accuracy_list)
    
    print(f"mode: {augmentation_name}, avg: {avg}, std: {std}, min: {minimal}, max: {maximal}")
    print('===')

In [None]:
augmentation_results

In [None]:
# VISION transformer:
num_classes = 10
input_shape = (32, 32, 3)

learning_rate = 0.001
weight_decay = 0.0001
batch_size = 126
num_epochs = 100
image_size = 72  # We'll resize input images to this size
patch_size = 6  # Size of the patches to be extract from the input images
num_patches = (image_size // patch_size) ** 2
projection_dim = 64
num_heads = 4
transformer_units = [
    projection_dim * 2,
    projection_dim,
]  # Size of the transformer layers
transformer_layers = 8
mlp_head_units = [2048, 1024]  # Size of the dense layers of the final classifier

In [None]:
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data() # load unnormalized data
x_train, x_validation, y_train, y_validation = train_test_split(x_train, y_train, train_size=0.8)

data_augmentation = keras.Sequential(
    [
        Normalization(),
        Resizing(image_size, image_size),
        RandomFlip("horizontal"),
        RandomRotation(factor=0.02),
        RandomZoom(
            height_factor=0.2, width_factor=0.2
        ),
    ],
    name="data_augmentation",
)
# Compute the mean and the variance of the training data for normalization.
data_augmentation.layers[0].adapt(x_train)

In [None]:
# Implement multilayer perceptron (MLP)
def mlp(x, hidden_units, dropout_rate):
    for units in hidden_units:
        x = Dense(units, activation=tf.nn.gelu)(x)
        x = Dropout(dropout_rate)(x)
    return x


# Implement patch creation as a layer
class Patches(Layer):
    def __init__(self, patch_size):
        super().__init__()
        self.patch_size = patch_size

    def call(self, images):
        batch_size = tf.shape(images)[0]
        patches = tf.image.extract_patches(
            images=images,
            sizes=[1, self.patch_size, self.patch_size, 1],
            strides=[1, self.patch_size, self.patch_size, 1],
            rates=[1, 1, 1, 1],
            padding="VALID",
        )
        patch_dims = patches.shape[-1]
        patches = tf.reshape(patches, [batch_size, -1, patch_dims])
        return patches

In [None]:
# Let's display patches for a sample image

import matplotlib.pyplot as plt

plt.figure(figsize=(4, 4))
image = x_train[np.random.choice(range(x_train.shape[0]))]
plt.imshow(image.astype("uint8"))
plt.axis("off")

resized_image = tf.image.resize(
    tf.convert_to_tensor([image]), size=(image_size, image_size)
)
patches = Patches(patch_size)(resized_image)
print(f"Image size: {image_size} X {image_size}")
print(f"Patch size: {patch_size} X {patch_size}")
print(f"Patches per image: {patches.shape[1]}")
print(f"Elements per patch: {patches.shape[-1]}")

n = int(np.sqrt(patches.shape[1]))
plt.figure(figsize=(4, 4))
for i, patch in enumerate(patches[0]):
    ax = plt.subplot(n, n, i + 1)
    patch_img = tf.reshape(patch, (patch_size, patch_size, 3))
    plt.imshow(patch_img.numpy().astype("uint8"))
    plt.axis("off")

In [None]:
# Implement the patch encoding layer
class PatchEncoder(Layer):
    def __init__(self, num_patches, projection_dim):
        super().__init__()
        self.num_patches = num_patches
        self.projection = Dense(units=projection_dim)
        self.position_embedding = Embedding(
            input_dim=num_patches, output_dim=projection_dim
        )

    def call(self, patch):
        positions = tf.range(start=0, limit=self.num_patches, delta=1)
        encoded = self.projection(patch) + self.position_embedding(positions)
        return encoded

In [None]:
def create_vit_classifier():
    inputs = Input(shape=input_shape)
    # Augment data.
    augmented = data_augmentation(inputs)
    # Create patches.
    patches = Patches(patch_size)(augmented)
    # Encode patches.
    encoded_patches = PatchEncoder(num_patches, projection_dim)(patches)

    # Create multiple layers of the Transformer block.
    for _ in range(transformer_layers):
        # Layer normalization 1.
        x1 = LayerNormalization(epsilon=1e-6)(encoded_patches)
        # Create a multi-head attention layer.
        attention_output = MultiHeadAttention(
            num_heads=num_heads, key_dim=projection_dim, dropout=0.1
        )(x1, x1)
        # Skip connection 1.
        x2 = Add()([attention_output, encoded_patches])
        # Layer normalization 2.
        x3 = LayerNormalization(epsilon=1e-6)(x2)
        # MLP.
        x3 = mlp(x3, hidden_units=transformer_units, dropout_rate=0.1)
        # Skip connection 2.
        encoded_patches = Add()([x3, x2])

    # Create a [batch_size, projection_dim] tensor.
    representation = LayerNormalization(epsilon=1e-6)(encoded_patches)
    representation = Flatten()(representation)
    representation = Dropout(0.5)(representation)
    # Add MLP.
    features = mlp(representation, hidden_units=mlp_head_units, dropout_rate=0.5)
    # Classify outputs.
    logits = Dense(num_classes)(features)
    # Create the Keras model.
    model = keras.Model(inputs=inputs, outputs=logits)
    return model

In [None]:
vit_results = {"train": [], "validation": [], "test": []}
def run_experiment(model):
    optimizer = tfa.optimizers.AdamW(
        learning_rate=learning_rate, weight_decay=weight_decay
    )

    model.compile(
        optimizer=optimizer,
        loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=[
            keras.metrics.SparseCategoricalAccuracy(name="accuracy"),
            keras.metrics.SparseTopKCategoricalAccuracy(5, name="top-5-accuracy"),
        ],
    )

    checkpoint_filepath = "models/vision_transformer"
    checkpoint_callback = keras.callbacks.ModelCheckpoint(
        checkpoint_filepath,
        monitor="val_accuracy",
        save_best_only=True,
        # save_weights_only=True,
    )
    early_stopping_callback = keras.callbacks.EarlyStopping(
        monitor="val_accuracy",
        patience=20
    )

    history = model.fit(
        x=x_train,
        y=y_train,
        validation_data=(x_validation, y_validation),
        batch_size=batch_size,
        epochs=num_epochs,
        #validation_split=0.1,
        callbacks=[checkpoint_callback,early_stopping_callback],
    )

    #model.load_weights(checkpoint_filepath) # zmienic
    model = keras.models.load_model(checkpoint_filepath)
    
    _, accuracy, top_5_accuracy = model.evaluate(x_train, y_train)
    vit_results["train"].append(accuracy)
    print(f"Train accuracy: {round(accuracy * 100, 2)}%")
    print(f"Train top 5 accuracy: {round(top_5_accuracy * 100, 2)}%")
    _, accuracy, top_5_accuracy = model.evaluate(x_validation, y_validation)
    vit_results["validation"].append(accuracy)
    print(f"Validation accuracy: {round(accuracy * 100, 2)}%")
    print(f"Validation top 5 accuracy: {round(top_5_accuracy * 100, 2)}%")
    _, accuracy, top_5_accuracy = model.evaluate(x_test, y_test)
    vit_results["test"].append(accuracy)
    print(f"Test accuracy: {round(accuracy * 100, 2)}%")
    print(f"Test top 5 accuracy: {round(top_5_accuracy * 100, 2)}%")

    return history

for i in range(5):
    vit_classifier = create_vit_classifier()
    history = run_experiment(vit_classifier)
    plot_history(history, "vision transformer")

In [None]:
plot_history(history, "vision transformer")

In [None]:
# wyniki accuracy results
for dataset_type, accuracy_list in vit_results.items():
    avg = np.average(accuracy_list)
    std = np.std(accuracy_list)
    minimal = np.min(accuracy_list)
    maximal = np.max(accuracy_list)
    print(f"dataset_type: {dataset_type}, avg: {avg}, std: {std}, min: {minimal}, max: {maximal}")
    print('==============')

In [None]:
vit_model = keras.models.load_model('models/vision_transformer')

In [None]:
    _, accuracy, top_5_accuracy = vit_model.evaluate(x_train, y_train)
    print(f"Train accuracy: {accuracy}")
    print(f"Train top 5 accuracy: {top_5_accuracy}")
    _, accuracy, top_5_accuracy = vit_model.evaluate(x_validation, y_validation)
    print(f"Validation accuracy: {accuracy}")
    print(f"Validation top 5 accuracy: {top_5_accuracy}")
    _, accuracy, top_5_accuracy = vit_model.evaluate(x_test, y_test)
    print(f"Test accuracy: {accuracy}")
    print(f"Test top 5 accuracy: {top_5_accuracy}")

In [None]:
predictions = vit_model.predict(x_test)

In [None]:
predictions.shape

In [None]:
y_test.shape

In [None]:
predicted_classes = np.argmax(predictions, axis=1)
predicted_classes = predicted_classes[..., np.newaxis]
predicted_classes.shape

In [None]:
(predicted_classes == y_test)

In [None]:
confusion_matrix(y_test,predicted_classes)