In [180]:
#####################
# IMPORT LIBS
#####################

from PIL import Image
from sklearn.model_selection import train_test_split
from skimage import io, transform
from skimage.transform import AffineTransform, warp
from sklearn.metrics import roc_auc_score, mean_absolute_error
import tensorflow as tf

from tensorflow.keras.models import load_model
from tensorflow.compat.v1.keras import backend as K

import pandas as pd
from pathlib import Path
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np
import random
import glob
import albumentations as A
# from efficientnet_pytorch import EfficientNet


#####################
# SET CONSTANTS
#####################

INPUT_PATH = Path('../input')
OUTPUT_PATH = Path('../output')
TRAIN_PATH = INPUT_PATH / 'idao_dataset' / 'train'
PRIVATE_PATH = INPUT_PATH / 'idao_dataset' / 'private_test'
PUBLIC_PATH = INPUT_PATH / 'idao_dataset' / 'public_test'

RANDOM_SEED = 4444

ENERGY2CLASS = {
    1: [1, 0, 0, 0, 0, 0],
    3: [0, 1, 0, 0, 0, 0],
    6: [0, 0, 1, 0, 0, 0],
    10: [0, 0, 0, 1, 0, 0],
    20: [0, 0, 0, 0, 1, 0],
    30: [0, 0, 0, 0, 0, 1]
    
}

CLASS2ENERGY = {
    0: 1,
    1: 3,
    2: 6,
    3: 10,
    4: 20,
    5: 30
}

def seed_everything(seed=1234):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
seed_everything(RANDOM_SEED)

In [168]:
class Config:
    BATCH_SIZE = 32
    TRAINING_EPOCHS = 60
    VALIDATION_STEPS_PER_EPOCH = 5
    VALIDATION_EPOCHS = 10
    STEPS_PER_EPOCH = 30
    EARLY_STOP_PATIENCE = 5
    
    
    # Declare an augmentation pipeline
    train_transform = A.Compose([
        #A.HorizontalFlip(p=0.5),
        A.Cutout(num_holes=4, max_h_size=8, max_w_size=8, p=0.3),
        A.OneOf([A.RandomContrast(),
             A.RandomGamma(),
             A.RandomBrightness()],p=0.2),
        A.OneOf([A.Blur(p = 0.3),
             A.GaussNoise(p=0.3)
                ],p=0.5),
        A.CLAHE(clip_limit=4, tile_grid_size=(8,8), always_apply=False, p=0.3),
        A.Normalize(p=1)
    ],)
    #keypoint_params=A.KeypointParams(format='xy'))
    
    validation_transform = A.Compose([
        A.Normalize(p=1)
    ],)
    #keypoint_params=A.KeypointParams(format='xy'))

In [20]:
def getFeatures(img_path):
    if len(img_path.split('_')) == 18:
        particle_class = 0 # ER
        particle_energy = int(img_path.split('_')[7])
    else:
        particle_class = 1 # HE
        particle_energy = int(img_path.split('_')[8])
    return [img_path, particle_class, particle_energy]

images = glob.glob(str(TRAIN_PATH / '**/*.png'), recursive=True)
images = pd.DataFrame(map(getFeatures, images))
images.columns = ['path', 'class', 'energy']

In [30]:
#################
# EXTRACT TEST
#################

# HE - 0, ER - 1

he_test_idx = list(images[(images['class'] == 0) & (images['energy'].apply(lambda x: x in [1, 6, 20]))].index)
er_test_idx = list(images[(images['class'] == 1) & (images['energy'].apply(lambda x: x in [3, 10, 30]))].index)

In [31]:
test_idx = he_test_idx + er_test_idx
test_images = images.iloc[test_idx]
images = images.drop(index = test_idx)

train_images, valid_images = train_test_split(images, shuffle = True, random_state = RANDOM_SEED)
train_images = train_images.reset_index(drop = True)
valid_images = valid_images.reset_index(drop = True)

In [32]:
def calc_metric(y_binary_true, y_binary_pred, y_reg_true, y_reg_pred):
    '''
    Competition metric
    '''
    
    roc = roc_auc_score(y_binary_true, y_binary_pred)
    mae = mean_absolute_error(y_reg_true, y_reg_pred)
    return 1000 * (roc - mae), roc, mae

In [296]:
class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, images, transform=None, batch_size=32,  shuffle=True, is_classification = True):
        self.images = images
        self.indices = np.arange(len(images))
        self.transform = transform
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.is_classification = is_classification
        
        self.on_epoch_end()

    def __len__(self):
        return len(self.images) // self.batch_size

    def __getitem__(self, index):
        index = self.index[index * self.batch_size:(index + 1) * self.batch_size]
        batch = [self.indices[k] for k in index]
    
        X, y = self.__get_data(batch)
        return X, y

    def on_epoch_end(self):
        self.index = np.arange(len(self.indices))
        if self.shuffle == True:
            np.random.shuffle(self.index)

    def __get_data(self, batch):
        X = np.zeros((self.batch_size, 150, 150, 3))
        y_class = np.zeros((self.batch_size,))
        y_energy = np.zeros((self.batch_size,6))
        for i, idx in enumerate(batch):
            image=cv2.imread(self.images.iloc[idx, 0])[225:375, 225:375, :]
            X[i,] = image
            particle_class = self.images.iloc[idx, 1]
            particle_energy = self.images.iloc[idx, 2]
            y_class[i] = particle_class
            y_energy[i] = particle_energy
        if self.is_classification:
            return X / 255.0, y_class

        return X / 255.0, y_energy


In [297]:
train_datagen = DataGenerator(train_images, batch_size = Config.BATCH_SIZE, is_classification = True)
valid_datagen = DataGenerator(valid_images, batch_size = Config.BATCH_SIZE, is_classification = True)
test_datagen = DataGenerator(test_images, batch_size = 1, is_classification = True)

In [61]:
# Example data extract

X, y_class = train_datagen.__getitem__(0)

## Class model

In [154]:
from keras.applications.vgg16 import VGG16
from keras.applications.mobilenet_v2 import MobileNetV2
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
import numpy as np

mobilenet_model = MobileNetV2(weights='imagenet', include_top=False)

In [158]:
from keras import layers as L
from keras.models import Model
import tensorflow as tf
import keras

earlystop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
    
def create_classification_model(base_model):
    x = base_model.output
    x = L.GlobalAveragePooling2D()(x)
    # let's add a fully-connected layer
    x = L.Dense(128, activation='relu')(x)
    # and a logistic layer -- let's say we have 200 classes
    predictions = L.Dense(1, activation='sigmoid')(x)

    # this is the model we will train
    model = Model(inputs=base_model.input, outputs=predictions)

    for layer in base_model.layers:
        layer.trainable = True

    # compile the model (should be done *after* setting layers to non-trainable)
    model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics = ['AUC'])
    return model

In [159]:
model = create_classification_model(mobilenet_model)

In [160]:
model.fit(
    train_datagen, 
    steps_per_epoch = Config.STEPS_PER_EPOCH, 
    validation_data = valid_datagen, 
    validation_steps = Config.VALIDATION_STEPS_PER_EPOCH, 
    epochs = Config.TRAINING_EPOCHS,
    verbose = 1,
    callbacks = [earlystop]
    )

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60


<tensorflow.python.keras.callbacks.History at 0x7f1b7032e910>

In [163]:
model.evaluate(valid_datagen, batch_size=64, steps=valid_images.shape[0] // 64)



[0.3105323016643524, 0.968234658241272]

In [164]:
model.evaluate(test_datagen, batch_size=1, steps=test_images.shape[0])



[0.9347938895225525, 0.7222222089767456]

In [165]:
model.save(OUTPUT_PATH / 'models' / 'cnn_classification')

## Regression model

In [274]:
from keras.applications.vgg16 import VGG16
from keras.applications.mobilenet_v2 import MobileNetV2
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
import numpy as np

mobilenet_model = MobileNetV2(weights='imagenet', include_top=False)

In [275]:
from keras import layers as L
from keras.models import Model
import tensorflow as tf
import keras

earlystop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
    
def create_classification_model(base_model):
    x = base_model.output
    x = L.GlobalAveragePooling2D()(x)
    # let's add a fully-connected layer
    x = L.Dense(512, activation='relu')(x)
    x = L.Dense(128, activation='relu')(x)
    x = L.Dense(32, activation='relu')(x)
    # and a logistic layer -- let's say we have 200 classes
    predictions = L.Dense(1)(x)

    # this is the model we will train
    model = Model(inputs=base_model.input, outputs=predictions)

    for layer in base_model.layers:
        layer.trainable = False

    # compile the model (should be done *after* setting layers to non-trainable)
    model.compile(optimizer='rmsprop', loss='mean_absolute_error', metrics = ['mean_absolute_error'])
    return model

In [276]:
model = create_classification_model(mobilenet_model)

In [277]:
train_datagen = DataGenerator(train_images, batch_size = Config.BATCH_SIZE, is_classification = False)
valid_datagen = DataGenerator(valid_images, batch_size = Config.BATCH_SIZE, is_classification = False)
test_datagen = DataGenerator(test_images, batch_size = 1, is_classification = False)

In [278]:
model.fit(
    train_datagen, 
    steps_per_epoch = Config.STEPS_PER_EPOCH, 
    validation_data = valid_datagen, 
    validation_steps = Config.VALIDATION_STEPS_PER_EPOCH, 
    epochs = Config.TRAINING_EPOCHS,
    verbose = 1,
    callbacks = [earlystop]
    )

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60


<tensorflow.python.keras.callbacks.History at 0x7f16c2a1d9d0>

In [279]:
model.evaluate(valid_datagen, batch_size=64, steps=valid_images.shape[0] // 64)



[2.1931848526000977, 2.1931848526000977]

In [298]:
valid_datagen = DataGenerator(valid_images, batch_size = 1, is_classification = False, shuffle = False)
validation_preds = model.predict_generator(valid_datagen, verbose = 1)

  13/3348 [..............................] - ETA: 29s





In [299]:
from sklearn.metrics import mean_absolute_error
Y_pred = validation_preds.reshape(-1)
Y_true = valid_images['energy']
mean_absolute_error(Y_pred, Y_true)

2.2237342863545053

In [300]:
model.evaluate(test_datagen, batch_size=1, steps=test_images.shape[0])



[7.8741021156311035, 7.8741021156311035]

In [301]:
test_datagen = DataGenerator(test_images, batch_size = 1, is_classification = False, shuffle = False)
test_preds = model.predict_generator(test_datagen, verbose = 1)





In [302]:
Y_pred = test_preds
Y_true = test_images['energy']
mean_absolute_error(Y_pred, Y_true)

3.7480902622143426

In [214]:
model.save(OUTPUT_PATH / 'models' / 'cnn_energy')

## Predict

In [313]:
class PredictDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, images, transform=None, batch_size=32,  shuffle=True):
        self.images = images
        self.indices = np.arange(len(images))
        self.transform = transform
        self.batch_size = batch_size
        self.shuffle = True
        
        self.on_epoch_end()

    def __len__(self):
        return len(self.images) // self.batch_size

    def __getitem__(self, index):
        index = self.index[index * self.batch_size:(index + 1) * self.batch_size]
        batch = [self.indices[k] for k in index]
    
        X = self.__get_data(batch)
        return X

    def on_epoch_end(self):
        self.index = np.arange(len(self.indices))
        if self.shuffle == True:
            np.random.shuffle(self.index)

    def __get_data(self, batch):
        X = np.zeros((self.batch_size, 150, 150, 3))
        for i, idx in enumerate(batch):
            image=cv2.imread(str(self.images.iloc[idx, 3]))[225:375, 225:375, :]
            X[i,] = image

        return X / 255.0


In [314]:
import glob
private_test = glob.glob(str(PRIVATE_PATH / '**/*.png'), recursive=True)
public_test = glob.glob(str(PUBLIC_PATH / '**/*.png'), recursive=True)

In [315]:
sample_submission = pd.read_csv(INPUT_PATH / 'track1_predictions_example.csv')
private_ids = [t.split('/')[-1].split('.')[0] for t in private_test]
public_ids = [t.split('/')[-1].split('.')[0] for t in public_test]

sample_submission['path'] = sample_submission['id'].apply(lambda x: PRIVATE_PATH / f'{x}.png' if x in private_ids else PUBLIC_PATH / f'{x}.png')

In [316]:
prediction_datagen = PredictDataGenerator(sample_submission, shuffle = False, batch_size = 4)

In [318]:
####################################
# PREDICT WITH CLASSIFICATION MODEL
####################################

cnn_classification = keras.models.load_model(OUTPUT_PATH / 'models' / 'cnn_classification')
y_pred_class = cnn_classification.predict_generator(prediction_datagen, verbose = 1)





In [320]:
####################################
# PREDICT WITH REGRESSION MODEL
####################################

cnn_regression = keras.models.load_model(OUTPUT_PATH / 'models' / 'cnn_energy')
y_pred_energy = cnn_regression.predict_generator(prediction_datagen, verbose = 1)
y_pred_energy = np.vectorize(CLASS2ENERGY.get)(np.argmax(y_pred_energy, axis =1 ))



In [322]:
sample_submission['classification_predictions'] = y_pred_class.reshape(-1)
sample_submission['regression_predictions'] = y_pred_energy
sample_submission.drop(columns = ['path']).to_csv(OUTPUT_PATH / 'predictions' / 'prediction.csv')

In [324]:
sample_submission.drop(columns = ['path'])

Unnamed: 0,id,classification_predictions,regression_predictions
0,0002894871bb30af2670648c58b2506e9801a321,0.956455,30
1,000a95ca23df016a149ff9af94b6e9d8633d6691,0.366795,3
2,000b4407dee7dad2ba46586e6ba1264f45965e51,0.101904,10
3,000c45d729066044e3dd3f885fddf013b845b6d7,0.971503,30
4,0011f80e44faa713714ead6aeab3b2f9db54aebd,0.593469,6
...,...,...,...
16559,ffdfd25b2f522ba2312dda90189ec48205578199,0.958916,10
16560,ffe29ac691148f84c7a7759e687aa6c2f0eac11a,0.595683,3
16561,ffedc12ec5c142dae2086408b778cf0db2ff944e,0.000169,1
16562,fff423086a80114984b647af69f710cf3ef844f9,0.969830,3
