In [None]:
import tensorflow as tf
import tensorflow.keras
import pandas as pd
import wandb

from tqdm.notebook import tqdm
from wandb.keras import WandbCallback
from sklearn.model_selection import KFold

%run /content/src/model_functions.ipynb

In [None]:
def print_metrics(y_test, predictions):
    auc = tf.keras.metrics.AUC(multi_label=True)
    accuracy = tf.keras.metrics.Accuracy()
    tn = tf.keras.metrics.TrueNegatives()
    tp = tf.keras.metrics.TruePositives()
    fn = tf.keras.metrics.FalseNegatives()
    fp = tf.keras.metrics.FalsePositives()

    tn.update_state(y_test, predictions)
    tp.update_state(y_test, predictions)
    fn.update_state(y_test, predictions)
    fp.update_state(y_test, predictions)
    auc.update_state(y_test, predictions)
    accuracy.update_state(y_test, np.around(predictions))
    print(f'AUC: {auc.result().numpy()}, ' +
          f'accuracy: {accuracy.result().numpy()}, ' +
          f'sensitivity: {tp.result().numpy()/(tp.result().numpy()+fn.result().numpy())}, ' +
          f'specificity: {tn.result().numpy()/(tn.result().numpy()+fp.result().numpy())}')

## Training the final model

In [None]:
RD_SEED = 123
N_SPLITS = 3
EPOCHS = 5
TRAIN_DF_PATH = "/content/dataframes/train_final.csv"
TRAIN_IMG_PATH = "/content/jpg_data/stage_2_train_jpg/"

ARCHITECTURE = "DenseNet121" # or "EfficientNet121"
BATCH_SIZE = 32
DIM = (224, 224)
N_CLASSES = 6
WINDOW = (40, 80)
IMAGE_FORMAT = 'jpg'
LEARNING_RATE = 0.00001      # or 0.00002
POOLING = 'avg'
METRICS = [tf.keras.metrics.AUC(multi_label=True)]
OPTIMIZER = tf.keras.optimizers.Adam

LOAD_WEIGHTS_PATH = None     # path to weights you want to use for training 
SAVE_WEIGHTS = True
OUTPUT_DIR = "/content/weights/"

# in the case of limited runtime or the preference of running a specific fold:
RUN_SINGLE_FOLD = False
FOLD = 1

In [None]:
dataframe = pd.read_csv(TRAIN_DF_PATH)

dataframe.drop(index=dataframe.loc[dataframe['ID'] == "ID_6431af929"].index, inplace=True)
dataframe.drop(index=dataframe.loc[dataframe['ID'] == "ID_00de64f80"].index, inplace=True)
studies = dataframe['Study'].unique()
    
for i, (train, valid) in enumerate(KFold(n_splits=N_SPLITS, shuffle=True, random_state=RD_SEED).split(studies)):
    print(f'Cross-validation fold {i+1}')
    if RUN_SINGLE_FOLD and i+1 != FOLD:
        print("Skipped...")
        continue
        
    tf.keras.backend.clear_session()
    model = create_model(ARCHITECTURE, (224, 224, 3), POOLING, OPTIMIZER, LEARNING_RATE, weighted_multi_label_log_loss, METRICS, LOAD_WEIGHTS_PATH)
    
    for epoch in range(EPOCHS):
        print(f'Epoch {epoch+1}')
        
        X_train, y_train, X_valid, y_valid = get_balanced_train_valid_tuples(dataframe, studies[train], studies[valid])
        training_generator = DataGenerator(X_train, y_train, TRAIN_IMG_PATH, dataframe, BATCH_SIZE, DIM, N_CLASSES, shuffle=True, window=WINDOW, image_format=IMAGE_FORMAT)
        validation_generator = DataGenerator(X_valid, y_valid, TRAIN_IMG_PATH, dataframe,BATCH_SIZE, DIM, N_CLASSES, shuffle=True, window=WINDOW, augment=False, image_format=IMAGE_FORMAT)

        model.fit(x=training_generator, epochs=1)
        if SAVE_WEIGHTS:
            model.save_weights(OUTPUT_DIR + "weights_" + ARCHITECTURE + "_fold_" + str(i+1) + "_epoch_" + str(epoch+1) + ".h5")
        model.evaluate(x=validation_generator)
    
    if SAVE_WEIGHTS:
        model.save_weights(OUTPUT_DIR + "weights_" + ARCHITECTURE + "_fold_" + str(i+1) + ".h5")

## Evaluate the model on custom test set

In [None]:
TEST_DF_PATH = "/content/dataframes/test_final.csv"
TEST_IMG_PATH = "/content/jpg_data/stage_2_train_jpg/"

ARCHITECTURE = "DenseNet121"   # or "EfficientNet121"
BATCH_SIZE = 32
LEARNING_RATE = 0.00001        # or 0.00002
POOLING = 'avg'
METRICS = [tf.keras.metrics.AUC(multi_label=True)]
OPTIMIZER = tf.keras.optimizers.Adam

WEIGHTS_DIR = "/content/weights/"
WEIGHTS = "weights_DenseNet121_fold_1.h5"  # modify to use other weights
USE_TTA = True
SAVE_PREDICTIONS = True
OUTPUT_DIR = "/content/predictions/"

In [None]:
def predict(model, batches, img_path, tta):
    predictions = []
    augmentation = DataAugmentation(123, max_angle=30)
    
    for batch in tqdm(batches):
        samples = np.empty((len(batch), 224, 224, 3))
        for i, ID in enumerate(batch):
            samples[i] = cv2.imread(img_path + ID + ".jpg") # TODO: add dicom possibility

        if not tta:
            predictions.extend(model.predict(samples))
        else:
            n_samples = 5
            sample_predictions = []
            for n in range(n_samples):
                aug_samples = np.empty((len(batch), 224, 224, 3))
                for s in range(len(batch)):
                    aug_samples[s] = augmentation.random_augment(samples[s], False)
                sample_predictions.append(model.predict(aug_samples))
            sample_predictions = np.array(sample_predictions)
            predictions.extend([[sample_predictions[:,y,x].sum()/n_samples for x in range(6)] for y in range(len(batch))])
    return np.array(predictions)

In [None]:
dataframe = pd.read_csv(TEST_DF_PATH)

X_test, y_test = dataframe['ID'].values[::6], np.reshape(dataframe['Label'].values, (-1, 6))
X_test_batches = np.array_split(X_test, len(X_test)//BATCH_SIZE)

tf.keras.backend.clear_session()
model = create_model(ARCHITECTURE, (224, 224, 3), POOLING, OPTIMIZER, LEARNING_RATE, weighted_multi_label_log_loss, METRICS, WEIGHTS_DIR + WEIGHTS)

predictions = predict(model, X_test_batches, TEST_IMG_PATH, USE_TTA)

if SAVE_PREDICTIONS:
    pd.DataFrame(predictions).to_csv(OUTPUT_DIR + WEIGHTS[:-3] + ("_TTA" if USE_TTA else "") + "_predictions.csv", index=False)

print_metrics(y_test, predictions)

### Ensembling the predictions

In [None]:
TEST_DF_PATH = "/content/dataframes/test_final.csv"
PREDICTIONS_DIR = "/content/predictions/"
PREDICTIONS_FILES = ["pred1.csv", "pred2.csv", "pred3.csv"] # specify the predictions files
SAVE_PREDICTIONS = False
OUTPUT_DIR = "/content/predictions/"
OUTPUT_FILE = "DenseNet121_ensemble_predictions.csv" # modify if you want to use another file name

In [None]:
test_df = pd.read_csv(TEST_DF_PATH)
y_test = np.reshape(test_df['Label'].values, (-1, 6))

predictions = np.zeros_like(y_test).astype('float64')
for file in PREDICTIONS_FILES:
    df = pd.read_csv(PREDICTIONS_DIR + file)
    predictions += df.to_numpy()
predictions /= len(PREDICTIONS_FILES)

if SAVE_PREDICTIONS:
    pd.DataFrame(predictions).to_csv(OUTPUT_DIR + OUTPUT_FILE, index=False)

print_metrics(y_test, predictions)

## Predict the submission dataset

In [None]:
MOD_SUBMISSION_DF_PATH = "/content/dataframes/stage_2_sample_submission_mod.csv"
SUBMISSION_IMG_PATH = "/content/jpg_data/stage_2_test_jpg/"
ORIG_SUBMISSION_DF_PATH = "/content/drive/MyDrive/rsna-intracranial-hemorrhage-detection/stage_2_sample_submission.csv"

ARCHITECTURE = "DenseNet121"   # or "EfficientNet121"
BATCH_SIZE = 32
LEARNING_RATE = 0.00001        # or 0.00002
POOLING = 'avg'
METRICS = [tf.keras.metrics.AUC(multi_label=True)]
OPTIMIZER = tf.keras.optimizers.Adam

WEIGHTS_DIR = "/content/weights/"
WEIGHTS = "weights_DenseNet121_fold_1.h5" # specify the weights you want to use
USE_TTA = True
SAVE_PREDICTIONS = True
OUTPUT_DIR = "/content/predictions/"
OUTPUT_FILE = "DenseNet121_fold_1_submission.csv" # modify if you want to use another file name

In [None]:
dataframe = pd.read_csv(MOD_SUBMISSION_DF_PATH)

X_test = dataframe['ID'].values[::6]
X_test_batches = np.array_split(X_test, len(X_test)//BATCH_SIZE)

tf.keras.backend.clear_session()
model = create_model(ARCHITECTURE, (224, 224, 3), POOLING, OPTIMIZER, LEARNING_RATE, weighted_multi_label_log_loss, METRICS, WEIGHTS_DIR + WEIGHTS)

predictions = predict(model, X_test_batches, SUBMISSION_IMG_PATH, USE_TTA)

submission_df = pd.read_csv(ORIG_SUBMISSION_DF_PATH)
submission_df['Label'] = predictions.flatten()

if SAVE_PREDICTIONS:
    submission_df.to_csv(OUTPUT_DIR + OUTPUT_FILE, index=False)

### Ensembling submissions

In [None]:
ORIG_SUBMISSION_DF_PATH = "/content/drive/MyDrive/rsna-intracranial-hemorrhage-detection/stage_2_sample_submission.csv"
PREDICTIONS_DIR = "/content/predictions/"
SUBMISSION_FILES = ["sub1.csv", "sub2.csv", "sub3.csv", "sub4.csv", "sub5.csv"] # specify the predictions files
SAVE_PREDICTIONS = True
OUTPUT_DIR = "/content/predictions/"
OUTPUT_FILE = "EfficientDenseNet_ensemble_submission.csv" # modify if you want to use another file name

In [None]:
sub_df = pd.read_csv(ORIG_SUBMISSION_DF_PATH)
y_sub = sub_df['Label'].values

submissions = np.zeros_like(y_sub).astype('float64')
for file in SUBMISSION_FILES:
    df = pd.read_csv(PREDICTIONS_DIR + file)
    submissions += df['Label'].values
submissions /= len(SUBMISSION_FILES)
sub_df['Label'] = submissions

if SAVE_PREDICTIONS:
    sub_df.to_csv(OUTPUT_DIR + OUTPUT_FILE, index=False)