In [2]:
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
import importlib

import kaggle_functions as kaggle

### Load images into datasets

In [None]:
# Load original images, training and test
complete_dataset, train_dataset, valid_dataset, valid_labels = kaggle.load_train_as_dataset(return_complete_set=True)

# Augment training dataset and show images to check augmentation
train_dataset_augmented, epoch_length = kaggle.augment_dataset(train_dataset)
kaggle.show_images(train_dataset_augmented, 3)

### Training and fine-tuning
The first round of training uses augmented training data to train all architectures used in the final model.

The second round of fine-tuning exposes each model to the entire dataset, non-augmented, to boost the performance.

In [4]:
architecture_paths = {
    'model1_vgg' : (220, 20, 0.80),
    'model2_deepervgg': (220, 25, 0.84),
    'model3_deepervgg': (220, 25, 0.84),
    'model4_vggres' : (180, 25, 0.80),
    'model5_wideresnet' : (150, 15, 0.84),
    'model6_wideresnet' : (180, 15, 0.84),
    'model7_simplenet' : (200, 25, 0.87),
    'model8_simplenet' : (200, 25, 0.89),
    'model9_simplenet' : (200, 25, 0.88),
}

In [None]:
for architecture_path, (epochs, valid_patience, _) in architecture_paths.items():
    print(f'Training {architecture_path}')
    architecture = importlib.import_module(architecture_path)
    # Main training
    model, _ = kaggle.train_model(
        architecture.Model().build(None), train_dataset_augmented, valid_dataset, 
        epochs=epochs, epoch_length=epoch_length, valid_patience=valid_patience)
    # Fine-tuning
    fine_model, _ = kaggle.fine_tune_model(
        model,complete_dataset, valid_dataset, 
        epochs=4, learning_rate=1E-5)
    # Saving for later
    fine_model.save(f'models/{architecture_path}')

### Stacking
Gets predictions from all models and averages them for final result.

"Accuracy" for each model was obtained by fine-tuning models on only training data, then evaluating performance on the validation set. This performance was used as "accuracy".

This method was also used to obtain the "power weights" for probability and accuracy. Different values were tried for all models, and the power weights chosen had the best performance on the validation set. For this final model, the models were fine-tuning on validation data as well, so the weights had to be set in advance.

In [4]:
x_test_real = kaggle.load_test_set()
test_predictions = np.zeros((17831, 11, len(architecture_paths)))

# Loads and predicts probabilities for each image, for each model
for index, architecture_path in enumerate(architecture_paths.keys()):
    print(f'Loading and predicting {architecture_path}')
    model = keras.models.load_model(f'models/{architecture_path}')
    softmax_test_pred = np.array(tf.nn.softmax(model.predict(x_test_real)))
    
    test_predictions[:, :, index] = softmax_test_pred

# Squares probabilities to give higher weight to more confident predictions
modified_test_predictions = np.power(test_predictions, 2)
# Weights each probability by the accuracy of its model, to the power 20
for index, (_, _, accuracy) in enumerate(architecture_paths.values()):
    modified_test_predictions[:, :, index] *= accuracy**20
# Sums probabilities across all models to find final predictions
modified_test_predictions = np.sum(modified_test_predictions, axis=2)
modified_test_predictions = np.argmax(modified_test_predictions, axis=1)

# Saves final predictions
kaggle.save_test_pred('final_predictions.csv', modified_test_predictions)

Loading and predicting model6/VGG_6_79
