In [87]:
import os
import cv2
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_io as tfio
import tensorflow_addons as tfa
import seaborn as sns

from PIL import Image
from PIL import ImageEnhance
from PIL import ImageFilter as Filter
from matplotlib import pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import InputLayer, Conv2D, MaxPooling2D, Input, Activation, Add, ReLU
from tensorflow.keras.layers import BatchNormalization, Dropout, AveragePooling2D, Dense, Flatten
from tensorflow.keras.activations import relu, softmax
from tensorflow.keras.regularizers import l2
from tensorflow.keras.preprocessing.image import ImageDataGenerator

* #### config

In [91]:
train_meta_data = '../input/unifesp-xray-body-part-classifier-dataset-jpeg/data/train.csv'
train_data_dir = '../input/unifesp-xray-body-part-classifier-dataset-jpeg/data/train'
epochs = 200
lr = 1e-3
valid_split = 0.2
input_size = 224
batch_size = 32
classes = 22
initializer = tf.keras.initializers.HeUniform()
optimizer = tf.keras.optimizers.Nadam(learning_rate=lr)
loss = tf.keras.losses.categorical_crossentropy
weight_decay = 5e-4

## Pre-processing pipeline

In [3]:
test_image = Image.open('../input/unifesp-xray-body-part-classifier-dataset-jpeg/data/train/10093718510761734264315711288650185574.jpg')
test_arr = np.expand_dims(np.asarray(test_image),2)
test_arr_rgb = tf.image.grayscale_to_rgb(tf.convert_to_tensor(test_arr))

In [4]:
plt.figure(figsize=[8,8], dpi=100)
plt.imshow(test_arr,cmap=plt.cm.gray)

#### augmentations apply:

* edge enhance
* de-texturization
* filp right-left
* flip up-down
* random cropping

In [5]:
ada_thold = cv2.adaptiveThreshold(test_arr, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 2.01)

In [8]:
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=[12,6], dpi=200)
axes = axes.ravel()

axes[0].imshow(test_arr, cmap=plt.cm.gray)
axes[1].imshow(ada_thold,cmap=plt.cm.gray)

axes[0].axis('off')
axes[1].axis('off')

In [6]:
laplacian = cv2.Laplacian(test_arr,cv2.CV_64F, ksize=5)

In [7]:
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=[12,6], dpi=200)
axes = axes.ravel()

axes[0].imshow(test_arr, cmap=plt.cm.gray)
axes[1].imshow(laplacian, cmap=plt.cm.gray)

axes[0].axis('off')
axes[1].axis('off')

In [8]:
edgeEnahnced = test_image.filter(Filter.EDGE_ENHANCE_MORE)

In [9]:
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=[12,6], dpi=200)
axes = axes.ravel()

axes[0].imshow(test_arr, cmap=plt.cm.gray)
axes[1].imshow(edgeEnahnced, cmap=plt.cm.gray)

axes[0].axis('off')
axes[1].axis('off')

In [10]:
de_texturize = cv2.bilateralFilter(test_arr,9,50,50)

In [11]:
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=[12,6], dpi=200)
axes = axes.ravel()

axes[0].imshow(test_arr, cmap=plt.cm.gray)
axes[1].imshow(de_texturize, cmap=plt.cm.gray)

axes[0].axis('off')
axes[1].axis('off')

In [12]:
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=[12,6], dpi=200)
axes = axes.ravel()

axes[0].imshow(test_arr, cmap=plt.cm.gray)
axes[1].imshow(tf.image.adjust_saturation(test_arr_rgb, 1.4), cmap=plt.cm.gray)
axes[2].imshow(tf.image.adjust_contrast(test_arr, 0.3), cmap=plt.cm.gray)

axes[0].axis('off')
axes[1].axis('off')
axes[2].axis('off')

In [13]:
image = Image.open('../input/unifesp-xray-body-part-classifier-dataset-jpeg/data/train/10166555243811009418614649510306269973.jpg')
image_arr = np.expand_dims(np.asarray(image),2) 
v_center = image_arr.shape[1]//2
h_center = image_arr.shape[0]//2

fig, axes = plt.subplots(nrows=1, ncols=4, figsize=[12,6], dpi=200)
axes = axes.ravel()

axes[0].imshow(image_arr[:h_center,:,:], cmap=plt.cm.gray)
axes[1].imshow(image_arr[h_center:,:,:], cmap=plt.cm.gray)
axes[2].imshow(image_arr[:,:v_center,:], cmap=plt.cm.gray)
axes[3].imshow(image_arr[:,v_center:,:], cmap=plt.cm.gray)

axes[0].axis('off')
axes[1].axis('off')
axes[2].axis('off')
axes[3].axis('off')

* ##### resize croped images without distroying aspect ratio

In [14]:
img = Image.fromarray(np.squeeze(image_arr[:h_center,:,:], axis=2)).convert('L')
img.thumbnail((400, 400))

plt.imshow(img, cmap=plt.cm.gray)

In [15]:
def edge_enhancing(array):
    method = np.random.choice(['ada_thold', 'laplacian', 'edge_enahnced'])
    
    if method=='ada_thold':     
        return np.expand_dims(cv2.adaptiveThreshold(array, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 1), 2)
    
    elif method=='laplacian':
        return np.expand_dims(cv2.Laplacian(array,cv2.CV_64F, ksize=5), 2)
    
    else:
        image = Image.fromarray(np.squeeze(array, axis=2)).convert('L')
        return np.expand_dims(np.asarray(image.filter(Filter.EDGE_ENHANCE_MORE)), 2)

In [16]:
def de_texturization(array):
    n = np.random.choice([5, 9, 13, 15])
    sigma = np.random.choice([50, 65, 75])
    
    return np.expand_dims(cv2.bilateralFilter(array, n, sigma, sigma), 2)

In [17]:
def random_flip(array):    
    if random.choice([True, False]):
        return tf.image.random_flip_left_right(array).numpy()
    else:
        return tf.image.random_flip_up_down(array).numpy()

In [18]:
def tumbnail(array, shape=(512,512)):
    return cv2.resize(array, shape) 

In [19]:
def random_crop(array):
    method = np.random.choice(['left', 'right', 'top', 'down'])
    v_center = array.shape[1]//2
    h_center = array.shape[0]//2
    
    if method == 'left':
        return array[:,:v_center,:]
    elif method == 'right':
        return array[:,v_center:,:]
    elif method == 'top':
        return array[:h_center,:,:]
    elif method == 'down':
        return array[h_center:,:,:]
    else:
        return array

In [20]:
image = Image.open('../input/unifesp-xray-body-part-classifier-dataset-jpeg/data/train/10166555243811009418614649510306269973.jpg')
image_arr = np.expand_dims(np.asarray(image), axis=2)
fig, axes = plt.subplots(nrows=1, ncols=4, figsize=[12,6], dpi=200)
axes = axes.ravel()

for i in range(len(axes)):
    axes[i].imshow(edge_enhancing(image_arr), cmap=plt.cm.gray)
    axes[i].axis('off')

In [21]:
image_arr = np.expand_dims(np.asarray(image), axis=2)
fig, axes = plt.subplots(nrows=1, ncols=4, figsize=[12,6], dpi=200)
axes = axes.ravel()

for i in range(len(axes)):
    axes[i].imshow(de_texturization(image_arr), cmap=plt.cm.gray)
    axes[i].axis('off')

In [22]:
image_arr = np.expand_dims(np.asarray(image), axis=2)
fig, axes = plt.subplots(nrows=1, ncols=4, figsize=[12,6], dpi=200)
axes = axes.ravel()

for i in range(len(axes)):
    img = tumbnail(random_crop(image_arr))
    axes[i].imshow(img, cmap=plt.cm.gray)
    axes[i].axis('off')

In [92]:
def pre_procrssing(image):
    temp = np.asarray(image)
    
    if np.random.choice([True, False], p=[0.45, 0.55]):
        # other augmentaions
        temp = de_texturization(temp)
        
        # crop
        if random.choice([True, False]):
            temp = tf.image.random_crop(temp, (128,128,1)).numpy()
        else:
            temp = random_crop(temp)
        
        temp = tumbnail(temp, (input_size, input_size))

        return np.expand_dims(temp, 2)
    
    else:
        return temp

### Test Pre-processing Pipeline

In [93]:
files = os.listdir('../input/unifesp-xray-body-part-classifier-dataset-jpeg/data/train')
rand_files = random.choices(files, k=24)

fig, axes = plt.subplots(nrows=4, ncols=6, figsize=[12,8], dpi=200)
axes = axes.ravel()

for i in range(len(axes)):
    img = Image.open(os.path.join('../input/unifesp-xray-body-part-classifier-dataset-jpeg/data/train',rand_files[i]))
    img = pre_procrssing(img)
    axes[i].imshow(img, cmap=plt.cm.gray)
    axes[i].axis('off')

In [25]:
train_df = pd.read_csv('../input/xray-body-images-in-png-unifesp-competion/train_df.csv', usecols=['image_path', 'Target'])
# train_df['file_name'] = train_df['file_name'] + '.jpg'
train_df

In [26]:
# train_df['file_name'] = '../input/unifesp-xray-body-part-classifier-dataset-jpeg/data/train/' + train_df['file_name']
train_df['Target'] = train_df['Target'].str.strip()

In [27]:
multi_labels = [i for i, target in enumerate(train_df['Target']) if len(target) > 2]
len(multi_labels)

In [28]:
corrected_labels = []

for ml in multi_labels:    
    corrected_labels.append([train_df.loc[ml, 'image_path'], train_df.loc[ml, 'Target'].split(' ')[0]])

In [29]:
train_df = pd.concat([train_df.drop(train_df.loc[multi_labels].index),
                     pd.DataFrame(corrected_labels, columns=['image_path', 'Target'])], ignore_index=True)
train_df

In [39]:
train_df

In [43]:
train_df['image_path'] = train_df['image_path'].str.replace('./images','../input/xray-body-images-in-png-unifesp-competion/images')

## Create more training samples

In [65]:
new_train_data = []

if not os.path.isdir('./train'):
    os.mkdir('./train')
    
for j, (tar, file) in enumerate(train_df.values):
    img = cv2.imread(file, cv2.IMREAD_GRAYSCALE)
    img = np.expand_dims(img, 2)
    
    for i, scale in enumerate([0.5, 0.65, 0.8, 0.95]):
        new_file_path = f"./train/c-{i}-{file.split('/')[-1]}"
        temp = tf.image.central_crop(img, scale).numpy()
        temp = edge_enhancing(temp)
        cv2.imwrite(new_file_path, temp)
        new_train_data.append([tar, new_file_path])
        
        print(f'{j}/{train_df.shape[0]} -- {i+1}', end='\r')

In [70]:
files = os.listdir('./train/')
rand_files = random.choices(files, k=24)

fig, axes = plt.subplots(nrows=4, ncols=6, figsize=[12,8], dpi=200)
axes = axes.ravel()

for i in range(len(axes)):
    img = Image.open(os.path.join('./train',rand_files[i]))
    axes[i].imshow(img, cmap=plt.cm.gray)
    axes[i].axis('off')

In [67]:
new_train_data = pd.DataFrame(new_train_data, columns=['Target', 'image_path'])
new_train_data

In [69]:
train_df = pd.concat([train_df, new_train_data], ignore_index=True)
train_df

## Build Model

In [71]:
train, test = train_test_split(train_df[['image_path', 'Target']], test_size=0.1)
train.shape, test.shape

### Config Data Loders

In [95]:
generator = ImageDataGenerator(rescale=1 / 255,
                               rotation_range=10,
                               width_shift_range=0.25,
                               height_shift_range=0.25,
                               shear_range=0.2,
                               horizontal_flip=True,
                               vertical_flip=True,
                               samplewise_center=True,
                               samplewise_std_normalization=True,
                               validation_split=valid_split,
                               preprocessing_function=pre_procrssing
                              )

test_generator = ImageDataGenerator(rescale=1 / 255,
                                    samplewise_center=True,
                                    samplewise_std_normalization=True)

train_datagen = generator.flow_from_dataframe(dataframe=train,
                                              x_col='image_path',
                                              y_col='Target',
                                              target_size=(input_size, input_size),
                                              batch_size=batch_size,
                                              color_mode='grayscale',
                                              subset='training',
                                              seed=48)

valid_datagen = generator.flow_from_dataframe(dataframe=train,
                                              x_col='image_path',
                                              y_col='Target',
                                              target_size=(input_size, input_size),
                                              batch_size=batch_size,
                                              color_mode='grayscale',
                                              subset='validation',
                                              shuffle=False,
                                              seed=48)

test_datagen = test_generator.flow_from_dataframe(dataframe=test,
                                                  x_col='image_path',
                                                  y_col='Target',
                                                  color_mode='grayscale',
                                                  batch_size=batch_size,
                                                  target_size=(input_size, input_size),
                                                  shuffle=False,
                                                  seed=48)

#### Train mini batch

In [96]:
fig, axes = plt.subplots(nrows=4, ncols=8, figsize=[32, 10], dpi=200)
axes = axes.ravel()

for i, arr in enumerate(train_datagen.next()[0]):
    img = tf.keras.utils.array_to_img(arr)
    axes[i].imshow(img, cmap=plt.cm.gray)
    
plt.show()

### Validation mini batch

In [97]:
fig, axes = plt.subplots(nrows=4, ncols=8, figsize=[32, 10], dpi=200)
axes = axes.ravel()

for i, arr in enumerate(valid_datagen.next()[0]):
    img = tf.keras.utils.array_to_img(arr)
    axes[i].imshow(img, cmap=plt.cm.gray)
    
plt.show()

### Test mini batch

In [98]:
fig, axes = plt.subplots(nrows=4, ncols=8, figsize=[32, 10], dpi=200)
axes = axes.ravel()

for i, arr in enumerate(test_datagen.next()[0]):
    img = tf.keras.utils.array_to_img(arr)
    axes[i].imshow(img, cmap=plt.cm.gray)
    
plt.show()

### Baseline Sequential Model

In [99]:
early_stop = tf.keras.callbacks.EarlyStopping(patience=25,
                                              monitor='val_loss',
                                              restore_best_weights=True,
                                              verbose=1)

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(patience=5,
                                                 monitor='val_loss',
                                                 factor=0.5,
                                                 verbose=1)

In [100]:
def residual_block(x, filters, kernel=(3, 3), strid=(1, 1), projection=False):
    r = Conv2D(filters=filters, kernel_size=kernel, kernel_initializer=initializer, strides=strid, padding='same')(x)
    r = BatchNormalization()(r)
    r = Activation(activation=relu)(r)
    r = Conv2D(filters=filters, kernel_size=kernel, kernel_initializer=initializer, padding='same')(r)
    r = BatchNormalization()(r)

    if projection:
        skip = Conv2D(filters=filters, kernel_size=kernel, kernel_initializer=initializer, strides=strid, padding='same')(x)
        skip = BatchNormalization()(skip)
        r = Add()([r, skip])
    else:
        r = Add()([r, x])

    r = Activation(activation=relu)(r)

    return r


def build(input_shape=(input_size,input_size,1), classes=classes):
    input_layer = Input(input_shape)
    x = Conv2D(filters=64, kernel_size=(7, 7), kernel_initializer=initializer, strides=(2, 2), padding='valid')(input_layer)
    x = BatchNormalization()(x)
    x = Activation(activation=relu)(x)
    x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(x)

    x = residual_block(x, filters=64)
    x = residual_block(x, filters=64)
    x = residual_block(x, filters=64)

    x = residual_block(x, filters=128, strid=(2, 2), projection=True)
    x = residual_block(x, filters=128)
    x = residual_block(x, filters=128)
    x = residual_block(x, filters=128)

    x = residual_block(x, filters=256, strid=(2, 2), projection=True)
    x = residual_block(x, filters=256)
    x = residual_block(x, filters=256)
    x = residual_block(x, filters=256)
    x = residual_block(x, filters=256)
    x = residual_block(x, filters=256)

    x = residual_block(x, filters=512, strid=(2, 2), projection=True)
    x = residual_block(x, filters=512)
    x = residual_block(x, filters=512)

    x = AveragePooling2D((5,5))(x)
    x = Flatten()(x)
    output_layer = Dense(units=classes, activation=softmax)(x)

    model = Model(input_layer, output_layer)
    model.compile(optimizer=optimizer,
                  loss=loss,
                  metrics=['accuracy', tfa.metrics.F1Score(num_classes=classes, threshold=0.5)])
    
    model.summary()

    return model

In [101]:
model = build()

### Training and Evaluation

In [102]:
history = model.fit(train_datagen,
                    validation_data=valid_datagen,
                    batch_size=batch_size,
                    epochs=epochs,
                    callbacks=[early_stop,reduce_lr])

In [105]:
model.evaluate(test_datagen)

In [127]:
test_preds = np.argmax(model.predict(test_datagen), axis=1)
pred_y = [{v:k for k,v in train_datagen.class_indices.items()}[k] for k in test_preds]
true_y = test.Target.values

In [128]:
acc_score_test = accuracy_score(true_y, pred_y)
conf_metric = confusion_matrix(true_y, pred_y, labels=list(train_datagen.class_indices.keys()))

In [180]:
plt.figure(figsize=[12,12], dpi=100)
sns.heatmap(np.round(conf_metric/np.sum(conf_metric, axis=1), 2),
            cbar=False,
            annot=True,
            annot_kws={"size": 9},
            cmap=plt.cm.Blues)
plt.xlabel('True labels')
plt.ylabel('Predicted labels')
plt.show()

In [109]:
plt.figure(figsize=[12,6], dpi=300)
sns.lineplot(x=list(range(len(history.history['accuracy']))),
             y=history.history['accuracy'],
             label='train')
sns.lineplot(x=list(range(len(history.history['val_accuracy']))),
             y=history.history['val_accuracy'],
             label='validation')
plt.show()

In [110]:
plt.figure(figsize=[12,6], dpi=300)
sns.lineplot(x=list(range(len(np.mean(history.history['f1_score'], axis=1)))),
             y=np.mean(history.history['f1_score'], axis=1),
             label='train')
sns.lineplot(x=list(range(len(np.mean(history.history['val_f1_score'], axis=1)))),
             y=np.mean(history.history['val_f1_score'], axis=1),
             label='validation')
plt.show()

In [111]:
plt.figure(figsize=[12,6], dpi=300)
sns.lineplot(x=list(range(len(history.history['loss']))),
             y=history.history['loss'],
             label='train')
sns.lineplot(x=list(range(len(history.history['val_loss']))),
             y=history.history['val_loss'],
             label='validation')
plt.show()

### Saving files

In [112]:
temp = pd.DataFrame(history.history)
temp.to_csv('model_resnet36.csv', index=False)

In [113]:
model.save('model_resnet36.hdf5')

In [114]:
model.save_weights('model_resnet36_weight.hdf5')

### Inference

In [181]:
test_loc = '../input/xray-body-images-in-png-unifesp-competion/images/test'

test_data = ImageDataGenerator(rescale=1.0/255,
                               samplewise_center=True,
                               samplewise_std_normalization=True).flow_from_directory(directory=test_loc,
                                                                                      target_size=(input_size, input_size),
                                                                                      batch_size=batch_size,
                                                                                      classes=['.'],
                                                                                      color_mode='grayscale',
                                                                                      shuffle=False)

In [182]:
preds_p = model.predict(test_data, verbose=1)

In [183]:
inverse_map = {v:k for k,v in train_datagen.class_indices.items()}
inverse_map

In [187]:
test_preds = []

for i in range(len(preds_p)):
    multi_lab = np.where(preds_p[i]>0.5)[0].tolist()
    
    if len(multi_lab) > 1:
        test_preds.append([test_data.filenames[i], ' '.join([inverse_map[m] for m in multi_lab])])
    else:
        test_preds.append([test_data.filenames[i], inverse_map[np.argmax(preds_p[i])]])

In [188]:
test_preds = pd.DataFrame(test_preds, columns=['SOPInstanceUID', 'Target'])
test_preds

In [189]:
test_preds['SOPInstanceUID'] = test_preds['SOPInstanceUID'].str.replace('-c.png', '')
test_preds['SOPInstanceUID'] = test_preds['SOPInstanceUID'].str.replace('./', '')
test_preds

In [191]:
test_preds.to_csv('model_submission_v3.csv', index=False)

In [192]:
pd.options.display.max_rows = 150
test_preds.Target.value_counts()