https://www.kaggle.com/code/thedevastator/train-infer-coatnet-efficientnet  
https://www.kaggle.com/code/datark1/eda-images-processing-and-exploration  

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sn
import cv2
from IPython.display import clear_output
import tifffile
from PIL import Image
from tqdm.auto import tqdm
import plotly.express as px
import binascii
import struct
import scipy
import scipy.misc
import scipy.cluster
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn import utils
from keras import backend as K
from keras import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten, BatchNormalization
from keras.losses import SparseCategoricalCrossentropy

BASE_PATH = "../input/mayo-clinic-strip-ai/"
Image.MAX_IMAGE_PIXELS = 5_000_000_000

df_train = pd.read_csv(os.path.join(BASE_PATH, "train.csv"))
df_test = pd.read_csv(os.path.join(BASE_PATH, "test.csv"))
df_other = pd.read_csv(os.path.join(BASE_PATH, "other.csv"))
df_sub = pd.read_csv(os.path.join(BASE_PATH, "sample_submission.csv"))

print('Images (train): ', df_train.shape[0])
print('Images (test): ', df_test.shape[0])
print('Patients (train): ', df_train["patient_id"].unique().size)
print('Patients (test): ', df_test["patient_id"].unique().size)

In [None]:
def read_image(image_id, dset, scale=None, verbose=1):
    with tifffile.TiffFile(os.path.join(BASE_PATH, dset, f"{image_id}.tif")) as tif:
        tif_tags = {}
        for tag in tif.pages[0].tags.values():
            name, value = tag.name, tag.value
            tif_tags[name] = value
        del tif_tags["TileOffsets"] 
        del tif_tags["TileByteCounts"]
        image = tif.pages[0].asarray()
    if verbose:
        print(f"[{image_id}] Image shape: {image.shape}")
    if scale:
        new_size = (image.shape[1] // scale, image.shape[0] // scale)
        image = cv2.resize(image, new_size, interpolation=cv2.INTER_AREA)
        if image.shape[1]>1.5*image.shape[0]:
            out=cv2.transpose(image)
            image=cv2.flip(out,flipCode=0)
        if verbose:
            print(f"[{image_id}] Resized Image shape: {image.shape}")
    return image, tif_tags
        
def resizeAndPad(img, size, padColor=0):
    h, w = img.shape[:2]
    sh, sw = size
    if h > sh or w > sw: 
        interp = cv2.INTER_AREA
    else: 
        interp = cv2.INTER_CUBIC
    aspect = w/h
    if aspect > 1:
        new_w = sw
        new_h = np.round(new_w/aspect).astype(int)
        pad_vert = (sh-new_h)/2
        pad_top, pad_bot = np.floor(pad_vert).astype(int), np.ceil(pad_vert).astype(int)
        pad_left, pad_right = 0, 0
    elif aspect < 1:
        new_h = sh
        new_w = np.round(new_h*aspect).astype(int)
        pad_horz = (sw-new_w)/2
        pad_left, pad_right = np.floor(pad_horz).astype(int), np.ceil(pad_horz).astype(int)
        pad_top, pad_bot = 0, 0
    else:
        new_h, new_w = sh, sw
        pad_left, pad_right, pad_top, pad_bot = 0, 0, 0, 0
    if len(img.shape) is 3 and not isinstance(padColor, (list, tuple, np.ndarray)): # color image but only one color provided
        padColor = [padColor]*3
    scaled_img = cv2.resize(img, (new_w, new_h), interpolation=interp)
    scaled_img = cv2.copyMakeBorder(scaled_img, pad_top, pad_bot, pad_left, pad_right, borderType=cv2.BORDER_CONSTANT, value=padColor)
    return scaled_img

In [None]:
df_train

In [None]:
df_test

In [None]:
df_other

In [None]:
df_sub

In [None]:
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(14, 7))
axs[0] = sn.countplot(x="label", data=df_train, ax=axs[0])
axs[0].bar_label(axs[0].containers[0])
axs[1] = df_train[["label"]].value_counts().plot.pie(autopct='%1.1f%%', ylabel="label", labels = ["CE","LAA"])

df = df_train[["center_id"]].value_counts().reset_index(name="cnt")
lb = list(df.center_id.values)
od = df.sort_values("cnt", ascending=False).center_id
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(14, 7))
cl = sn.color_palette("Paired")
axs[0] = sn.barplot(x="center_id",  y="cnt", data=df, order=od, palette=cl, ax=axs[0])
axs[0].bar_label(axs[0].containers[0])
axs[1] = df["cnt"].plot.pie(autopct='%1.1f%%', ylabel="center_id", labels=lb, colors=cl)

fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(14, 7))
lb = list(range(5))
ex = (0,0.1,0.1,0.4,0.6)
axs[0] = sn.countplot(x="image_num", data=df_train, dodge=False, ax=axs[0])
axs[0].bar_label(axs[0].containers[0])
axs[1] = df_train[["image_num"]].value_counts().plot.pie(autopct='%1.1f%%', ylabel="image_num", labels=lb, explode=ex)
plt.show()

df = df_train.groupby(["patient_id"])["center_id"].count().reset_index(name="center_count")
lb = list(range(1,6))
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(14, 7))
axs[0] = sn.countplot(x="center_count", data=df, dodge=False, ax=axs[0])
axs[0].bar_label(axs[0].containers[0])
axs[1] = df[["center_count"]].value_counts().plot.pie(autopct='%1.1f%%', ylabel="center_count", labels=lb, explode=ex)
plt.show()

df = pd.crosstab(index=df_train.center_id, columns=df_train.label).reset_index()
df["LAA/CE"] = df.LAA/df.CE
ax = df.plot(x="center_id", y=["CE","LAA"], kind="bar", width=0.8, figsize=(14,7))
ax.bar_label(ax.containers[0])
ax.bar_label(ax.containers[1])
df.plot(y=["LAA/CE"], secondary_y="LAA/CE", color="lightgreen", linewidth=4, ax=ax);

df = pd.crosstab(index=df_train.image_num, columns=df_train.label).reset_index()
df["LAA/CE"] = df.LAA/df.CE
ax = df.plot(x="image_num", y=["CE","LAA"], kind="bar", width=0.8, figsize=(14,7))
ax.bar_label(ax.containers[0])
ax.bar_label(ax.containers[1]);

sizes = []
for name in df_train["image_id"]:
    img = Image.open(os.path.join(BASE_PATH, "train", f"{name}.tif"))
    sizes.append({"img_height": img.height, 
                  "img_width": img.width, 
                  "img_size": img.size[0]*img.size[1]/(1024**2)})
df_train = pd.concat([pd.DataFrame(sizes),df_train], axis=1)
del sizes
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(15, 5))
for i, col in enumerate(["img_height", "img_width"]):
    _= sn.histplot(df_train[[col]], ax=axs[i], bins=40, kde=True)
df = df_train.groupby(["center_id"])[["img_height", "img_width"]].mean().reset_index()
ax = df.plot(x="center_id", kind="bar", width=0.8, figsize=(14,7))

df = (df_train.groupby(["patient_id","label"])["image_num"].count().reset_index(name='image_count'))
df[df["image_count"]>2].set_index("patient_id").style.background_gradient(cmap='Reds')

In [None]:
'''%%time
X, y = [], []
for i in range(len(df_train)):
    image_id = df_train.image_id[i]
    image, tif_tags = read_image(image_id, 'train', scale=100, verbose=0)
    imageNew = resizeAndPad(image, (256,256), 255)
    X.append(imageNew)
    y.append(df_train.label[i])
    print(i+1, '/', len(df_train))
    clear_output(wait=True)
X, y = np.asarray(X), np.reshape(np.asarray(y), (len(y), 1))
np.save('X_array.npy', X)
np.save('y_array.npy', y)

X_sub = []
for i in range(len(df_test)):
    image_id = df_test.image_id[i]
    image, tif_tags = read_image(image_id, 'test', scale=100, verbose=0)
    imageNew = resizeAndPad(image, (256,256), 255)
    X_sub.append(imageNew)
X_sub = np.asarray(X_sub)
X_sub = X_sub / 255.0'''

In [None]:
X = np.load('X_array.npy')
y = np.load('y_array.npy')

y = np.where(y == 'CE', 0, y)
y = np.where(y == 'LAA', 1, y)

fig = plt.figure(figsize=(20, 20))
columns = 3
rows = 3
for i in range(0, columns*rows):
    img = X[i]
    fig.add_subplot(rows, columns, i+1)
    plt.title(y[i][0])
    plt.imshow(img)
plt.show()

In [None]:
print('X shape:', X.shape, '\ny shape:', y.shape)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=5)

X_train = X_train / 255.0
X_test = X_test / 255.0

y_train = y_train.astype('float32').reshape((-1,1))
y_test = y_test.astype('float32').reshape((-1,1))

print('X_train shape:', X_train.shape, '\ny_train shape:', y_train.shape, 
      '\nX_test shape:', X_test.shape, '\ny_test shape:', y_test.shape)

In [None]:
loss = 'sparse_categorical_crossentropy'
optimizer = 'adam'
metrics = ['accuracy']
epochs = 3

model = Sequential([
                    Conv2D(32, (3, 3), padding='same', activation='relu', input_shape=(256, 256, 3)),
                    MaxPooling2D((2, 2)),
                    Conv2D(64, (3, 3), padding='same', activation='relu'),
                    MaxPooling2D((2, 2)),
                    Conv2D(64, (3, 3), padding='same', activation='relu'),
                    Dropout(0.75),
                    Flatten(),
                    Dense(32, activation='relu'),
                    Dropout(0.5),
                    Dense(16, activation='relu'),
                    Dense(2, activation='sigmoid')
])

model.compile(optimizer=optimizer, loss=loss, metrics=metrics) 
model.fit(X_train, y_train, epochs=epochs, validation_data=(X_test, y_test))

In [None]:
X_pred = model.predict(X_sub)
X_pred

In [None]:
df = df_sub.copy()
for i in range(len(df_sub)):
    df.CE[i] = X_pred[i][0]
    df.LAA[i] = X_pred[i][1]

df

In [None]:
df_sub.to_csv('submission.csv', index=False)