In [1]:
#####################
# IMPORT LIBS
#####################

from PIL import Image
from sklearn.model_selection import train_test_split
from skimage import io, transform
from skimage.transform import AffineTransform, warp
from sklearn.metrics import roc_auc_score, mean_absolute_error
import tensorflow as tf

from tensorflow.keras.models import load_model
from tensorflow.compat.v1.keras import backend as K

import pandas as pd
from pathlib import Path
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np
import random
import glob
import albumentations as A
# from efficientnet_pytorch import EfficientNet


#####################
# SET CONSTANTS
#####################

INPUT_PATH = Path('../input')
OUTPUT_PATH = Path('../output')
TRAIN_PATH = INPUT_PATH / 'idao_dataset' / 'train'
PRIVATE_PATH = INPUT_PATH / 'idao_dataset' / 'private_test'
PUBLIC_PATH = INPUT_PATH / 'idao_dataset' / 'public_test'

RANDOM_SEED = 4444

def seed_everything(seed=1234):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
seed_everything(RANDOM_SEED)

In [2]:
class Config:
    BATCH_SIZE = 32
    TRAINING_EPOCHS = 60
    VALIDATION_STEPS_PER_EPOCH = 5
    VALIDATION_EPOCHS = 10
    STEPS_PER_EPOCH = 30
    EARLY_STOP_PATIENCE = 5
    
    
    # Declare an augmentation pipeline
    train_transform = A.Compose([
        #A.HorizontalFlip(p=0.5),
        A.Cutout(num_holes=4, max_h_size=8, max_w_size=8, p=0.3),
        A.OneOf([A.RandomContrast(),
             A.RandomGamma(),
             A.RandomBrightness()],p=0.2),
        A.OneOf([A.Blur(p = 0.3),
             A.GaussNoise(p=0.3)
                ],p=0.5),
        A.CLAHE(clip_limit=4, tile_grid_size=(8,8), always_apply=False, p=0.3),
        A.Normalize(p=1)
    ],)
    
    validation_transform = A.Compose([
        A.Normalize(p=1)
    ],)

In [3]:
def getFeatures(img_path):
    if len(img_path.split('_')) == 18:
        particle_class = 0 # ER
        particle_energy = int(img_path.split('_')[7])
    else:
        particle_class = 1 # HE
        particle_energy = int(img_path.split('_')[8])
    return [img_path, particle_class, particle_energy]

images = glob.glob(str(TRAIN_PATH / '**/*.png'), recursive=True)
images = pd.DataFrame(map(getFeatures, images))
images.columns = ['path', 'class', 'energy']

In [4]:
#################
# EXTRACT TEST
#################

# HE - 0, ER - 1

he_test_idx = list(images[(images['class'] == 0) & (images['energy'].apply(lambda x: x in [1, 6, 20]))].index)
er_test_idx = list(images[(images['class'] == 1) & (images['energy'].apply(lambda x: x in [3, 10, 30]))].index)

In [5]:
test_idx = he_test_idx + er_test_idx
test_images = images.iloc[test_idx]
images = images.drop(index = test_idx)

train_images, valid_images = train_test_split(images, shuffle = True, random_state = RANDOM_SEED)
train_images = train_images.reset_index(drop = True)
valid_images = valid_images.reset_index(drop = True)

In [6]:
def calc_metric(y_binary_true, y_binary_pred, y_reg_true, y_reg_pred):
    '''
    Competition metric
    '''
    
    roc = roc_auc_score(y_binary_true, y_binary_pred)
    mae = mean_absolute_error(y_reg_true, y_reg_pred)
    return 1000 * (roc - mae), roc, mae

In [7]:
class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, images, transform=None, batch_size=32,  shuffle=True, is_classification = True, LABEL_MAPPER = {1:1,3:3,6:6,10:10,20:20,30:30}):
        self.images = images
        self.indices = np.arange(len(images))
        self.transform = transform
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.is_classification = is_classification
        self.LABEL_MAPPER = LABEL_MAPPER
        
        self.on_epoch_end()

    def __len__(self):
        return len(self.images) // self.batch_size

    def __getitem__(self, index):
        index = self.index[index * self.batch_size:(index + 1) * self.batch_size]
        batch = [self.indices[k] for k in index]
    
        X, y = self.__get_data(batch)
        return X, y

    def on_epoch_end(self):
        self.index = np.arange(len(self.indices))
        if self.shuffle == True:
            np.random.shuffle(self.index)

    def __get_data(self, batch):
        X = np.zeros((self.batch_size, 150, 150, 3))
        y_class = np.zeros((self.batch_size,))
        y_energy = np.zeros((self.batch_size,3))
        for i, idx in enumerate(batch):
            image=cv2.imread(self.images.iloc[idx, 0])[225:375, 225:375, :]
            X[i,] = image
            particle_class = self.images.iloc[idx, 1]
            particle_energy = self.images.iloc[idx, 2]
            y_class[i] = particle_class
            y_energy[i] = self.LABEL_MAPPER[particle_energy]
        if self.is_classification:
            return X / 255.0, y_class

        return X / 255.0, y_energy


In [8]:
train_datagen = DataGenerator(train_images, batch_size = Config.BATCH_SIZE, is_classification = True)
valid_datagen = DataGenerator(valid_images, batch_size = Config.BATCH_SIZE, is_classification = True)
test_datagen = DataGenerator(test_images, batch_size = 1, is_classification = True)

In [9]:
# Example data extract

X, y_class = train_datagen.__getitem__(0)

## Class model

In [154]:
from keras.applications.vgg16 import VGG16
from keras.applications.mobilenet_v2 import MobileNetV2
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
import numpy as np

mobilenet_model = MobileNetV2(weights='imagenet', include_top=False)

In [158]:
from keras import layers as L
from keras.models import Model
import tensorflow as tf
import keras

earlystop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
    
def create_classification_model(base_model):
    x = base_model.output
    x = L.GlobalAveragePooling2D()(x)
    # let's add a fully-connected layer
    x = L.Dense(128, activation='relu')(x)
    # and a logistic layer -- let's say we have 200 classes
    predictions = L.Dense(1, activation='sigmoid')(x)

    # this is the model we will train
    model = Model(inputs=base_model.input, outputs=predictions)

    for layer in base_model.layers:
        layer.trainable = True

    # compile the model (should be done *after* setting layers to non-trainable)
    model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics = ['AUC'])
    return model

In [159]:
model = create_classification_model(mobilenet_model)

In [160]:
model.fit(
    train_datagen, 
    steps_per_epoch = Config.STEPS_PER_EPOCH, 
    validation_data = valid_datagen, 
    validation_steps = Config.VALIDATION_STEPS_PER_EPOCH, 
    epochs = Config.TRAINING_EPOCHS,
    verbose = 1,
    callbacks = [earlystop]
    )

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60


<tensorflow.python.keras.callbacks.History at 0x7f1b7032e910>

In [163]:
model.evaluate(valid_datagen, batch_size=64, steps=valid_images.shape[0] // 64)



[0.3105323016643524, 0.968234658241272]

In [164]:
model.evaluate(test_datagen, batch_size=1, steps=test_images.shape[0])



[0.9347938895225525, 0.7222222089767456]

In [165]:
model.save(OUTPUT_PATH / 'models' / 'cnn_classification')

## Regression model

In [10]:
from keras.applications.vgg16 import VGG16
from keras.applications.mobilenet_v2 import MobileNetV2
from keras.applications.densenet import DenseNet121
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
import numpy as np

mobilenet_model = MobileNetV2(weights='imagenet', include_top=False)

In [11]:
ENERGY2CLASS_HE = {
    1: [1, 0, 0],
    6: [0, 1, 0],
    20: [0, 0, 1],
    
}

ENERGY2CLASS_ER = {
    3: [1, 0, 0],
    10: [0, 1, 0],
    30: [0, 0, 1],
    
}

ENERGY2CLASS_HE_TEST = {
    3: [1, 0, 0],
    10: [0, 1, 0],
    30: [0, 0, 1],
    
}

ENERGY2CLASS_ER_TEST = {
    1: [1, 0, 0],
    6: [0, 1, 0],
    20: [0, 0, 1],
}

In [12]:
CLASS2ENERGY_HE = {
    0:1,
    1:6,
    2:20
}

CLASS2ENERGY_ER = {
    0:3,
    1:10,
    2:30,
    
}

CLASS2ENERGY_HE_TEST = {
    0:3,
    1:10,
    2:30,
    
}

CLASS2ENERGY_ER_TEST = {
    0:1,
    1:6,
    2:20,
}

### ER model

In [26]:
from keras import layers as L
from keras.models import Model
import tensorflow as tf
import keras

earlystop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
    
def create_classification_model(base_model):
    x = base_model.output
    x = L.GlobalAveragePooling2D()(x)
    # let's add a fully-connected layer
    x = L.Dense(512, activation='relu')(x)
    x = L.Dense(64, activation='relu')(x)
    # and a logistic layer -- let's say we have 200 classes
    predictions = L.Dense(3, activation='sigmoid')(x)

    # this is the model we will train
    model = Model(inputs=base_model.input, outputs=predictions)

    for layer in base_model.layers:
        layer.trainable = False

    # compile the model (should be done *after* setting layers to non-trainable)
    model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics = ['categorical_accuracy'])
    return model

In [27]:
model = create_classification_model(mobilenet_model)

In [28]:
train_datagen = DataGenerator(train_images[train_images['class'] == 0].reset_index(drop=True), batch_size = Config.BATCH_SIZE, is_classification = False, LABEL_MAPPER=ENERGY2CLASS_ER)
valid_datagen = DataGenerator(valid_images[valid_images['class'] == 0].reset_index(drop=True), batch_size = Config.BATCH_SIZE, is_classification = False, LABEL_MAPPER=ENERGY2CLASS_ER)
test_datagen = DataGenerator(test_images[test_images['class']==0].reset_index(drop=True), batch_size = 1, is_classification = False, LABEL_MAPPER=ENERGY2CLASS_ER_TEST)

In [29]:
model.fit(
    train_datagen, 
    steps_per_epoch = Config.STEPS_PER_EPOCH, 
    validation_data = valid_datagen, 
    validation_steps = Config.VALIDATION_STEPS_PER_EPOCH, 
    epochs = Config.TRAINING_EPOCHS,
    verbose = 1,
    callbacks = [earlystop]
    )

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60


<tensorflow.python.keras.callbacks.History at 0x7f1dc43b5dd0>

In [30]:
model.evaluate(valid_datagen, batch_size=64, steps=valid_images.shape[0] // 64)



[0.03242973983287811, 0.9897836446762085]

In [31]:
valid_datagen = DataGenerator(valid_images[valid_images['class'] == 0], batch_size = 1, is_classification = False, shuffle = False)
validation_preds = model.predict_generator(valid_datagen, verbose = 1)





In [32]:
from sklearn.metrics import mean_absolute_error
Y_pred = np.vectorize(CLASS2ENERGY_ER.get)(np.argmax(validation_preds, axis =1 ))
Y_true = valid_images[valid_images['class'] == 0]['energy']
mean_absolute_error(Y_pred, Y_true)

0.16814683244523387

In [33]:
model.evaluate(test_datagen, batch_size=1, steps=test_images.shape[0])



[0.5942790508270264, 0.8333333134651184]

In [34]:
test_datagen = DataGenerator(test_images[test_images['class'] == 0], batch_size = 1, is_classification = False, shuffle = False)
test_preds = model.predict_generator(test_datagen, verbose = 1)





In [35]:
Y_pred = np.vectorize(CLASS2ENERGY_ER_TEST.get)(np.argmax(test_preds, axis =1 ))
Y_true = test_images[test_images['class'] == 0]['energy']
mean_absolute_error(Y_pred, Y_true)

0.8333333333333334

In [36]:
model.save(OUTPUT_PATH / 'models' / 'cnn_energy_er')

### HE model

In [37]:
mobilenet_model = MobileNetV2(weights='imagenet', include_top=False)
model = create_classification_model(mobilenet_model)

In [38]:
train_datagen = DataGenerator(train_images[train_images['class'] == 1].reset_index(drop=True), batch_size = Config.BATCH_SIZE, is_classification = False, LABEL_MAPPER=ENERGY2CLASS_HE)
valid_datagen = DataGenerator(valid_images[valid_images['class'] == 1].reset_index(drop=True), batch_size = Config.BATCH_SIZE, is_classification = False, LABEL_MAPPER=ENERGY2CLASS_HE)
test_datagen = DataGenerator(test_images[test_images['class']==1].reset_index(drop=True), batch_size = 1, is_classification = False, LABEL_MAPPER=ENERGY2CLASS_HE_TEST)

In [39]:
model.fit(
    train_datagen, 
    steps_per_epoch = Config.STEPS_PER_EPOCH, 
    validation_data = valid_datagen, 
    validation_steps = Config.VALIDATION_STEPS_PER_EPOCH, 
    epochs = Config.TRAINING_EPOCHS,
    verbose = 1,
    callbacks = [earlystop]
    )

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60


<tensorflow.python.keras.callbacks.History at 0x7f19c6b74910>

In [40]:
model.evaluate(valid_datagen, batch_size=64, steps=valid_images.shape[0] // 64)



[0.05928764119744301, 0.9773284196853638]

In [41]:
valid_datagen = DataGenerator(valid_images[valid_images['class'] == 1], batch_size = 1, is_classification = False, shuffle = False)
validation_preds = model.predict_generator(valid_datagen, verbose = 1)





In [42]:
from sklearn.metrics import mean_absolute_error
Y_pred = np.vectorize(CLASS2ENERGY_HE.get)(np.argmax(validation_preds, axis =1 ))
Y_true = valid_images[valid_images['class'] == 1]['energy']
mean_absolute_error(Y_pred, Y_true)

0.11151295961422544

In [43]:
model.evaluate(test_datagen, batch_size=1, steps=test_images.shape[0])



[0.37743833661079407, 0.8333333134651184]

In [44]:
test_datagen = DataGenerator(test_images[test_images['class'] == 1], batch_size = 1, is_classification = False, shuffle = False)
test_preds = model.predict_generator(test_datagen, verbose = 1)





In [45]:
Y_pred = np.vectorize(CLASS2ENERGY_HE_TEST.get)(np.argmax(test_preds, axis =1 ))
Y_true = test_images[test_images['class'] == 1]['energy']
mean_absolute_error(Y_pred, Y_true)

1.1666666666666667

In [46]:
model.save(OUTPUT_PATH / 'models' / 'cnn_energy_he')

## Predict

In [58]:
class PredictDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, images, transform=None, batch_size=32,  shuffle=True):
        self.images = images
        self.indices = np.arange(len(images))
        self.transform = transform
        self.batch_size = batch_size
        self.shuffle = shuffle
        
        self.on_epoch_end()

    def __len__(self):
        return len(self.images) // self.batch_size

    def __getitem__(self, index):
        index = self.index[index * self.batch_size:(index + 1) * self.batch_size]
        batch = [self.indices[k] for k in index]
    
        X = self.__get_data(batch)
        return X

    def on_epoch_end(self):
        self.index = np.arange(len(self.indices))
        if self.shuffle:
            np.random.shuffle(self.index)

    def __get_data(self, batch):
        X = np.zeros((self.batch_size, 150, 150, 3))
        for i, idx in enumerate(batch):
            image=cv2.imread(str(self.images.iloc[idx, 3]))[225:375, 225:375, :]
            X[i,] = image

        return X / 255.0


In [50]:
import glob
private_test = glob.glob(str(PRIVATE_PATH / '**/*.png'), recursive=True)
public_test = glob.glob(str(PUBLIC_PATH / '**/*.png'), recursive=True)

In [51]:
sample_submission = pd.read_csv(INPUT_PATH / 'track1_predictions_example.csv')
private_ids = [t.split('/')[-1].split('.')[0] for t in private_test]
public_ids = [t.split('/')[-1].split('.')[0] for t in public_test]

sample_submission['path'] = sample_submission['id'].apply(lambda x: PRIVATE_PATH / f'{x}.png' if x in private_ids else PUBLIC_PATH / f'{x}.png')

In [59]:
prediction_datagen = PredictDataGenerator(sample_submission, shuffle = False, batch_size = 41)

In [60]:
####################################
# PREDICT WITH CLASSIFICATION MODEL
####################################

cnn_classification = keras.models.load_model(OUTPUT_PATH / 'models' / 'cnn_classification')
y_pred_class = cnn_classification.predict_generator(prediction_datagen, verbose = 1)



In [None]:
cnn_regression_er = keras.models.load_model(OUTPUT_PATH / 'models' / 'cnn_energy_er')
cnn_regression_he = keras.models.load_model(OUTPUT_PATH / 'models' / 'cnn_energy_he')

In [69]:
####################################
# PREDICT WITH REGRESSION MODEL
####################################


CONDITION_PUBLIC_ER = (y_pred_class.reshape(-1) < 0.5) & (sample_submission['path'].apply(lambda x: 'public_test' in str(x)))
CONDITION_PUBLIC_HE = (y_pred_class.reshape(-1) >= 0.5) & (sample_submission['path'].apply(lambda x: 'public_test' in str(x)))

CONDITION_PRIVATE_ER = (y_pred_class.reshape(-1) < 0.5) & (sample_submission['path'].apply(lambda x: 'private_test' in str(x)))
CONDITION_PRIVATE_HE = (y_pred_class.reshape(-1) >= 0.5) & (sample_submission['path'].apply(lambda x: 'private_test' in str(x)))

public_er_datagen = PredictDataGenerator(sample_submission[CONDITION_PUBLIC_ER], shuffle = False, batch_size = 1)
public_he_datagen = PredictDataGenerator(sample_submission[CONDITION_PUBLIC_HE], shuffle = False, batch_size = 1)
private_er_datagen = PredictDataGenerator(sample_submission[CONDITION_PRIVATE_ER], shuffle = False, batch_size = 1)
private_he_datagen = PredictDataGenerator(sample_submission[CONDITION_PRIVATE_HE], shuffle = False, batch_size = 1)

y_pred_energy = cnn_regression_er.predict_generator(public_er_datagen, verbose = 1)
sample_submission.iloc[CONDITION_PUBLIC_ER, 2] = np.vectorize(CLASS2ENERGY_ER.get)(np.argmax(y_pred_energy, axis = 1))

y_pred_energy = cnn_regression_he.predict_generator(public_he_datagen, verbose = 1)
sample_submission.iloc[CONDITION_PUBLIC_HE, 2] = np.vectorize(CLASS2ENERGY_HE.get)(np.argmax(y_pred_energy, axis = 1))

y_pred_energy = cnn_regression_er.predict_generator(private_er_datagen, verbose = 1)
sample_submission.iloc[CONDITION_PRIVATE_ER, 2] = np.vectorize(CLASS2ENERGY_ER_TEST.get)(np.argmax(y_pred_energy, axis = 1))

y_pred_energy = cnn_regression_he.predict_generator(private_he_datagen, verbose = 1)
sample_submission.iloc[CONDITION_PRIVATE_HE, 2] = np.vectorize(CLASS2ENERGY_HE_TEST.get)(np.argmax(y_pred_energy, axis = 1))



In [73]:
sample_submission['classification_predictions'] = y_pred_class.reshape(-1)

In [74]:
sample_submission.drop(columns = ['path']).to_csv(OUTPUT_PATH / 'predictions' / 'prediction.csv', index = None)