In [None]:
# https://www.kaggle.com/code/deannahedges/mammography-training-model

import tensorflow as tf
from tensorflow.keras import datasets, layers, models, optimizers
import numpy as np
import keras.backend as K
print("Tensorflow version " + tf.__version__)

In [None]:
import os
from pathlib import Path
import glob

In [None]:
data_dir = Path ("/kaggle/input/mammography-challenge-pngs/train_images_processed_cv2_dicomsdl_256/")

In [None]:
batch_size = 32
img_height = 256
img_width = 256
epochs = 5

In [None]:
train_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset='training',
    seed=123,
    image_size=(img_height,img_width),
    batch_size=batch_size)

In [None]:
val_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset='validation',
    seed=123,
    image_size=(img_height,img_width),
    batch_size=batch_size)

In [None]:
def get_f1(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    recall = true_positives / (possible_positives + K.epsilon())
    f1_val = 2*(precision*recall)/(precision+recall+K.epsilon())
    return f1_val

In [None]:
from keras.applications import VGG19
base_learning_rate = 0.0001


pretrained_model = VGG19(weights='imagenet', 
                include_top=False, 
                input_shape=(img_height, img_width, 3))
pretrained_model.trainable = False
    
model = tf.keras.Sequential([
    pretrained_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(1, activation='sigmoid')
])
    
model.compile(
    optimizer=optimizers.Adam(base_learning_rate),
    loss =tf.keras.losses.BinaryCrossentropy(from_logits=True),
    metrics=['binary_accuracy']
)

historical = model.fit(train_ds,
                      epochs=epochs,
                      validation_data = val_ds)

In [None]:
from matplotlib import pyplot as plt

def display_training_curves(training, validation, title, subplot):
    if subplot%10==1: # set up the subplots on the first call
        plt.subplots(figsize=(10,10), facecolor='#F0F0F0')
        plt.tight_layout()
    ax = plt.subplot(subplot)
    ax.set_facecolor('#F8F8F8')
    ax.plot(training)
    ax.plot(validation)
    ax.set_title('model '+ title)
    ax.set_ylabel(title)
    #ax.set_ylim(0.28,1.05)
    ax.set_xlabel('epoch')
    ax.legend(['train', 'valid.'])

In [None]:
display_training_curves(
    historical.history['loss'],
    historical.history['val_loss'],
    'loss',
    211,
)
display_training_curves(
    historical.history['binary_accuracy'],
    historical.history['val_binary_accuracy'],
    'accuracy',
    212,
)


In [None]:
model.save('model.h5')