In [1]:
#####################
# IMPORT LIBS
#####################

from PIL import Image
from sklearn.model_selection import train_test_split
from skimage import io, transform
from skimage.transform import AffineTransform, warp
from sklearn.metrics import roc_auc_score, mean_absolute_error
import tensorflow as tf

from tensorflow.keras.models import load_model
from tensorflow.compat.v1.keras import backend as K

import pandas as pd
from pathlib import Path
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np
import random
import glob
import albumentations as A
# from efficientnet_pytorch import EfficientNet


#####################
# SET CONSTANTS
#####################

INPUT_PATH = Path('../input')
OUTPUT_PATH = Path('../output')
TRAIN_PATH = INPUT_PATH / 'idao_dataset' / 'train'
PRIVATE_PATH = INPUT_PATH / 'idao_dataset' / 'private_test'
PUBLIC_PATH = INPUT_PATH / 'idao_dataset' / 'public_test'

RANDOM_SEED = 4444

ENERGY2CLASS = {
    1: [1, 0, 0, 0, 0, 0],
    3: [0, 1, 0, 0, 0, 0],
    6: [0, 0, 1, 0, 0, 0],
    10: [0, 0, 0, 1, 0, 0],
    20: [0, 0, 0, 0, 1, 0],
    30: [0, 0, 0, 0, 0, 1]
    
}

CLASS2ENERGY = {
    0: 1,
    1: 3,
    2: 6,
    3: 10,
    4: 20,
    5: 30
}

def seed_everything(seed=1234):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
seed_everything(RANDOM_SEED)

In [99]:
class Config:
    BATCH_SIZE = 32
    TRAINING_EPOCHS = 150
    VALIDATION_STEPS_PER_EPOCH = 5
    VALIDATION_EPOCHS = 10
    STEPS_PER_EPOCH = 30
    EARLY_STOP_PATIENCE = 5
    
    
    # Declare an augmentation pipeline
    train_transform = A.Compose([
        #A.HorizontalFlip(p=0.5),
        A.Cutout(num_holes=4, max_h_size=8, max_w_size=8, p=0.3),
        A.OneOf([A.RandomContrast(),
             A.RandomGamma(),
             A.RandomBrightness()],p=0.2),
        A.OneOf([A.Blur(p = 0.3),
             A.GaussNoise(p=0.3)
                ],p=0.5),
        A.CLAHE(clip_limit=4, tile_grid_size=(8,8), always_apply=False, p=0.3),
    ],)
    
    validation_transform = A.Compose([
    ],)

In [3]:
def getFeatures(img_path):
    if len(img_path.split('_')) == 18:
        particle_class = 0 # ER
        particle_energy = int(img_path.split('_')[7])
    else:
        particle_class = 1 # HE
        particle_energy = int(img_path.split('_')[8])
    return [img_path, particle_class, particle_energy]

images = glob.glob(str(TRAIN_PATH / '**/*.png'), recursive=True)
images = pd.DataFrame(map(getFeatures, images))
images.columns = ['path', 'class', 'energy']

In [4]:
#################
# EXTRACT TEST
#################

# HE - 0, ER - 1

he_test_idx = list(images[(images['class'] == 0) & (images['energy'].apply(lambda x: x in [1, 6, 20]))].index)
er_test_idx = list(images[(images['class'] == 1) & (images['energy'].apply(lambda x: x in [3, 10, 30]))].index)

In [5]:
test_idx = he_test_idx + er_test_idx
test_images = images.iloc[test_idx]
images = images.drop(index = test_idx)

train_images, valid_images = train_test_split(images, shuffle = True, random_state = RANDOM_SEED)
train_images = train_images.reset_index(drop = True)
valid_images = valid_images.reset_index(drop = True)

In [6]:
def calc_metric(y_binary_true, y_binary_pred, y_reg_true, y_reg_pred):
    '''
    Competition metric
    '''
    
    roc = roc_auc_score(y_binary_true, y_binary_pred)
    mae = mean_absolute_error(y_reg_true, y_reg_pred)
    return 1000 * (roc - mae), roc, mae

In [100]:
class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, images, transform=None, batch_size=32,  shuffle=True, is_classification = True, augment = False):
        self.images = images
        self.indices = np.arange(len(images))
        self.transform = transform
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.augment = augment
        self.is_classification = is_classification
        
        self.on_epoch_end()

    def __len__(self):
        return len(self.images) // self.batch_size

    def __getitem__(self, index):
        index = self.index[index * self.batch_size:(index + 1) * self.batch_size]
        batch = [self.indices[k] for k in index]
    
        X, y = self.__get_data(batch)
        return X, y

    def on_epoch_end(self):
        self.index = np.arange(len(self.indices))
        if self.shuffle:
            np.random.shuffle(self.index)

    def __get_data(self, batch):
        X = np.zeros((self.batch_size, 150, 150, 3))
        y_class = np.zeros((self.batch_size,))
        y_energy = np.zeros((self.batch_size))
        for i, idx in enumerate(batch):
            image=cv2.imread(self.images.iloc[idx, 0])[225:375, 225:375, :]
            X[i,] = image
            
            if self.augment:
                X[i, ] = Config.train_transform(image=X[i,].astype(np.uint8))['image']
            particle_class = self.images.iloc[idx, 1]
            particle_energy = self.images.iloc[idx, 2]
            y_class[i] = particle_class
            y_energy[i] = particle_energy
        if self.is_classification:
            return X / 255.0, y_class
        return X / 255.0, y_energy


In [122]:
train_datagen = DataGenerator(train_images, batch_size = Config.BATCH_SIZE, is_classification = True, augment = False)
valid_datagen = DataGenerator(valid_images, batch_size = Config.BATCH_SIZE, is_classification = True)
test_datagen = DataGenerator(test_images, batch_size = 1, is_classification = True, shuffle = False)

In [123]:
# Example data extract

X, y_class = train_datagen.__getitem__(0)

## Class model

In [129]:
import tensorflow as tf
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.applications.densenet import DenseNet121

from tensorflow.keras.applications import EfficientNetB0
import numpy as np

base_model = DenseNet121(weights='imagenet', include_top=False)

In [133]:
from keras import layers as L
from keras.models import Model
import tensorflow as tf
import keras

earlystop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
    
def create_classification_model(base_model):
    x = base_model.output
    x = L.GlobalAveragePooling2D()(x)
    # let's add a fully-connected layer
    #x = L.Dense(512, activation='relu')(x)
    x = L.Dense(128, activation='relu')(x)
    # and a logistic layer -- let's say we have 200 classes
    predictions = L.Dense(1, activation='sigmoid')(x)

    # this is the model we will train
    model = Model(inputs=base_model.input, outputs=predictions)

    for layer in base_model.layers:
        layer.trainable = False

    # compile the model (should be done *after* setting layers to non-trainable)
    model.compile(optimizer=tensorflow.keras.optimizers.RMSprop(learning_rate=1e-3), loss='binary_crossentropy', metrics = ['AUC'])
    return model

In [131]:
model = create_classification_model(base_model)

In [134]:
model.fit(
    train_datagen, 
    steps_per_epoch = Config.STEPS_PER_EPOCH, 
    validation_data = test_datagen, 
    validation_steps = test_images.shape[0],#Config.VALIDATION_STEPS_PER_EPOCH, 
    epochs = Config.TRAINING_EPOCHS,
    verbose = 1,
    callbacks = [earlystop]
    )

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150

KeyboardInterrupt: 

In [49]:
model.evaluate(valid_datagen, batch_size=64, steps=valid_images.shape[0] // 64)



[0.17558863759040833, 0.981806218624115]

In [107]:
model.evaluate(test_datagen, batch_size=1, steps=test_images.shape[0])



[1.1403473615646362, 0.5972222685813904]

In [84]:
from sklearn.metrics import accuracy_score, roc_auc_score
roc_auc_score(test_images['class'].values, model.predict_generator(test_datagen, steps=test_images.shape[0]).reshape(-1))

[0.]
[0.]
[0.]
[0.]
[0.]
[0.]
[0.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]




0.7222222222222222

In [51]:
model.save(OUTPUT_PATH / 'models' / 'cnn_classification')

## Regression model

In [50]:
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
from tensorflow.keras.applications.densenet import DenseNet121
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input
import numpy as np

base_model = MobileNetV2(weights='imagenet', include_top=False)

In [51]:
from keras import layers as L
from keras.models import Model
import tensorflow as tf
import keras

earlystop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
    
def create_classification_model(base_model):
    x = base_model.output
    x = L.GlobalAveragePooling2D()(x)
    # let's add a fully-connected layer
    x = L.Dense(512, activation='relu')(x)
    x = L.Dense(64, activation='relu')(x)
    # and a logistic layer -- let's say we have 200 classes
    predictions = L.Dense(1)(x)

    # this is the model we will train
    model = Model(inputs=base_model.input, outputs=predictions)

    for layer in base_model.layers:
        layer.trainable = False

    # compile the model (should be done *after* setting layers to non-trainable)
    model.compile(optimizer='rmsprop', loss='mean_absolute_error', metrics = ['mean_absolute_error'])
    return model

In [52]:
model = create_classification_model(base_model)

In [53]:
train_datagen = DataGenerator(train_images, batch_size = Config.BATCH_SIZE, is_classification = False)
valid_datagen = DataGenerator(valid_images, batch_size = Config.BATCH_SIZE, is_classification = False)
test_datagen = DataGenerator(test_images, batch_size = 1, is_classification = False)

In [54]:
model.fit(
    train_datagen, 
    steps_per_epoch = Config.STEPS_PER_EPOCH, 
    validation_data = valid_datagen, 
    validation_steps = Config.VALIDATION_STEPS_PER_EPOCH, 
    epochs = Config.TRAINING_EPOCHS,
    verbose = 1,
    callbacks = [earlystop]
    )

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60


<tensorflow.python.keras.callbacks.History at 0x7fccce4b4210>

In [279]:
model.evaluate(valid_datagen, batch_size=64, steps=valid_images.shape[0] // 64)



[2.1931848526000977, 2.1931848526000977]

In [298]:
valid_datagen = DataGenerator(valid_images, batch_size = 1, is_classification = False, shuffle = False)
validation_preds = model.predict_generator(valid_datagen, verbose = 1)

  13/3348 [..............................] - ETA: 29s





In [299]:
from sklearn.metrics import mean_absolute_error
Y_pred = validation_preds.reshape(-1)
Y_true = valid_images['energy']
mean_absolute_error(Y_pred, Y_true)

2.2237342863545053

In [300]:
model.evaluate(test_datagen, batch_size=1, steps=test_images.shape[0])



[7.8741021156311035, 7.8741021156311035]

In [301]:
test_datagen = DataGenerator(test_images, batch_size = 1, is_classification = False, shuffle = False)
test_preds = model.predict_generator(test_datagen, verbose = 1)





In [302]:
Y_pred = test_preds
Y_true = test_images['energy']
mean_absolute_error(Y_pred, Y_true)

3.7480902622143426

In [214]:
model.save(OUTPUT_PATH / 'models' / 'cnn_energy')

## Predict

In [146]:
class PredictDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, images, transform=None, batch_size=32,  shuffle=True):
        self.images = images
        self.indices = np.arange(len(images))
        self.transform = transform
        self.batch_size = batch_size
        self.shuffle = shuffle
        
        self.on_epoch_end()

    def __len__(self):
        return len(self.images) // self.batch_size

    def __getitem__(self, index):
        index = self.index[index * self.batch_size:(index + 1) * self.batch_size]
        batch = [self.indices[k] for k in index]
    
        X = self.__get_data(batch)
        return X

    def on_epoch_end(self):
        self.index = np.arange(len(self.indices))
        if self.shuffle:
            np.random.shuffle(self.index)

    def __get_data(self, batch):
        X = np.zeros((self.batch_size, 150, 150, 3))
        for i, idx in enumerate(batch):
            image=cv2.imread(str(self.images.iloc[idx, 3]))[225:375, 225:375, :]
            X[i,] = image

        return X / 255.0


In [136]:
import glob
private_test = glob.glob(str(PRIVATE_PATH / '**/*.png'), recursive=True)
public_test = glob.glob(str(PUBLIC_PATH / '**/*.png'), recursive=True)

In [139]:
sample_submission = pd.read_csv(INPUT_PATH / 'track1_leak.csv')
private_ids = [t.split('/')[-1].split('.')[0] for t in private_test]
public_ids = [t.split('/')[-1].split('.')[0] for t in public_test]

sample_submission['path'] = sample_submission['id'].apply(lambda x: PRIVATE_PATH / f'{x}.png' if x in private_ids else PUBLIC_PATH / f'{x}.png')

In [147]:
prediction_datagen = PredictDataGenerator(sample_submission, shuffle = False, batch_size = 4)

In [148]:
####################################
# PREDICT WITH CLASSIFICATION MODEL
####################################

cnn_classification = keras.models.load_model(OUTPUT_PATH / 'models' / 'cnn_classification')
y_pred_class = cnn_classification.predict_generator(prediction_datagen, verbose = 1)



In [None]:
# 0 - [:750]
# 1 - [750:1502]
# 0 - [1502:7531]
# 1 - [7531:]

In [None]:
y_pred_class.reshape(-1)[1503:7532]

In [320]:
####################################
# PREDICT WITH REGRESSION MODEL
####################################

cnn_regression = keras.models.load_model(OUTPUT_PATH / 'models' / 'cnn_energy')
y_pred_energy = cnn_regression.predict_generator(prediction_datagen, verbose = 1)
y_pred_energy = np.vectorize(CLASS2ENERGY.get)(np.argmax(y_pred_energy, axis =1 ))



In [326]:
sample_submission['classification_predictions'] = (y_pred_class.reshape(-1) > 0.5).astype(int)
sample_submission['regression_predictions'] = y_pred_energy
sample_submission.drop(columns = ['path']).to_csv(OUTPUT_PATH / 'predictions' / 'prediction.csv', index = None)