In [2]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from tensorflow import data
from tensorflow.keras import Sequential, Input, Model
from tensorflow.keras.layers import Dense, Flatten, GlobalAveragePooling2D, Softmax
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.metrics import BinaryAccuracy
from tensorflow.keras.applications import resnet50

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [3]:
import sys
sys.path.append("../")
from util import data_prep as prep
from util import models as m

In [4]:
root_data_path = f"D:/malaria_data/cell_images_split"
seed = 123
# Resize using "area" method
# Multiples of 32 work best with ResNet
resnet_inp_shape = (128, 128, 3)
val_split = 0.2
batch_size = 32
n_epochs = 2
fine_tune_lr = 1e-5

In [5]:
model_0 = resnet50.ResNet50(weights="imagenet", include_top=False, input_shape=resnet_inp_shape)

# Functions

In [None]:
def build_model(base_model: Model, ll: int, img_shape: tuple) -> Model:
    # Pretend we want to get rid of last convolutional block
    # ll = [l.name for l in model_0.layers].index("conv5_block2_out")
    base_model_red = Model(inputs=base_model.input, outputs=base_model.layers[ll].output)
    base_model_red.trainable = False
    inputs = Input(shape=img_shape)
    x = base_model_red(inputs, training=False)
    x = GlobalAveragePooling2D()(x)
    outputs = Dense(1, activation="sigmoid")(x)
    model_1 = Model(inputs, outputs)
    return model_1

# Data

## Data split

In [None]:
train_datagen = ImageDataGenerator(
    horizontal_flip=True,
    vertical_flip=True,
    # Make sure to preprocess the same as the original model
    preprocessing_function=resnet50.preprocess_input,
    # Use 20% of data for validation
    validation_split=val_split
)

In [None]:
test_datagen = ImageDataGenerator(
    preprocessing_function=resnet50.preprocess_input
)

## Generators

In [None]:
train_generator = train_datagen.flow_from_directory(
    f"{root_data_path}/train",
    target_size=resnet_inp_shape[:2],
    batch_size=batch_size,
    # Want uninfected to be labeled "0"
    classes={"uninfected": 0, "parasitized": 1},
    class_mode="binary",
    seed=seed,
    subset="training"
)

val_generator = train_datagen.flow_from_directory(
    f"{root_data_path}/train",
    target_size=resnet_inp_shape[:2],
    batch_size=batch_size,
    classes={"uninfected": 0, "parasitized": 1},
    class_mode="binary",
    seed=seed,
    subset="validation"
)

In [None]:
test_generator = test_datagen.flow_from_directory(
    f"{root_data_path}/test",
    target_size=resnet_inp_shape[:2],
    batch_size=batch_size,
    classes={"uninfected": 0, "parasitized": 1},
    class_mode=None,
    shuffle=False
)

# Build model

In [None]:
model_1 = build_model(model_0, -1, resnet_inp_shape)
model_1.compile(optimizer=Adam(), loss=BinaryCrossentropy(), metrics=[BinaryAccuracy()])

In [None]:
model_1.summary()

# Train model (just new layers)

In [None]:
# model_1.fit(
#     train_generator,
# )
model_1.fit_generator(
    train_generator,
    validation_data=val_generator,
    steps_per_epoch=train_generator.n // batch_size,
    validation_steps=val_generator.n // batch_size,
    epochs=n_epochs
)

# Train model (all layers)

In [None]:
# Should this be model_0?
model_1.trainable = True

In [None]:
model_1.compile(
    optimizer=Adam(learning_rate=fine_tune_lr),
    loss=BinaryCrossentropy(),
    metrics=[BinaryAccuracy()]
)

In [None]:
model_1.fit_generator(
    train_generator,
    validation_data=val_generator,
    steps_per_epoch=train_generator.n // batch_size,
    validation_steps=val_generator.n // batch_size,
    epochs=n_epochs // 2
)

In [None]:
probs = model_1.predict(test_generator)

In [None]:
preds = np.squeeze((probs > 0.5).astype(int), axis=1)
true = test_generator.labels

In [None]:
(preds == true).sum() / test_generator.n