In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session
import numpy as np
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from keras import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.models import * 
from tensorflow.keras.preprocessing import image

train_path= '/Users/wwang/Desktop/work/ultrasound breast classification/train'
test_path='/Users/wwang/Desktop/work/ultrasound breast classification/val'

In [12]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, Callback
import time
from sklearn.metrics import classification_report

class TimerAfterFirstIteration(Callback):
    def on_epoch_begin(self, epoch, logs=None):
        # Initialize a flag to check if it's the first epoch
        if epoch == 0 and self.model.history.epoch:  # check for first iteration
            self.start_time = time.time()  # Start timer after first iteration
            print(f"Timer started after the first iteration of epoch {epoch + 1}")

    def on_epoch_end(self, epoch, logs=None):
        # Stop the timer after the epoch ends and calculate time taken after the first iteration
        if hasattr(self, 'start_time'):
            self.end_time = time.time()
            elapsed_time = self.end_time - self.start_time
            print(f"Time taken after the first iteration for epoch {epoch + 1}: {elapsed_time:.2f} seconds")



def train_and_evaluate(target_size, model_name):
    start_time = time.time()  # Start timer
    
    # Create image generators for the given target_size
    train_datagen = image.ImageDataGenerator(
        rotation_range=15,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest',
        width_shift_range=0.1,
        height_shift_range=0.1
    )
    
    val_datagen = image.ImageDataGenerator(
        rotation_range=15,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest',
        width_shift_range=0.1,
        height_shift_range=0.1
    )
    
    # Train and validation generators with the current target size
    train_generator = train_datagen.flow_from_directory(
        train_path,
        target_size=target_size,
        batch_size=4,
        class_mode='binary'
    )
    
    validation_generator = val_datagen.flow_from_directory(
        test_path,
        target_size=target_size,
        batch_size=4,
        shuffle=True,
        class_mode='binary'
    )
    
    # Load and build the model
    base_model = tf.keras.applications.EfficientNetB3(weights='imagenet', input_shape=(target_size[0], target_size[1], 3), include_top=False)
    for layer in base_model.layers:
        layer.trainable = False

    model = Sequential()
    model.add(base_model)
    model.add(GaussianNoise(0.25))
    model.add(GlobalAveragePooling2D())
    model.add(Dense(256, activation='relu'))
    model.add(BatchNormalization())
    model.add(GaussianNoise(0.25))
    model.add(Dropout(0.25))
    model.add(Dense(1, activation='sigmoid'))
    
    # Compile the model
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', 'AUC', 'Precision', 'Recall'])
    
    # Set up callbacks
    es = EarlyStopping(patience=3, monitor='val_loss')
    filepath = f'{model_name}_best_model.keras'
    checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')

    # Add the Timer callback here
    timer_callback = TimerAfterFirstIteration()

    # Train the model
    history = model.fit(
        train_generator,
        epochs=10,
        validation_data=validation_generator,
        steps_per_epoch=24,
        callbacks=[checkpoint, es, timer_callback]  # Include the Timer callback
    )
    
    # End overall timer and calculate time taken for complete process
    end_time = time.time()  # Stop overall timer after training
    elapsed_time = end_time - start_time
    print(f"Total time taken for model {model_name}: {elapsed_time:.2f} seconds")
    
    # Evaluate the model on the validation set
    evaluation = model.evaluate(validation_generator)
    print(f"Evaluation results for {model_name}:")
    print(f"Loss: {evaluation[0]}, Accuracy: {evaluation[1]}, AUC: {evaluation[2]}, Precision: {evaluation[3]}, Recall: {evaluation[4]}")
    
    return evaluation, elapsed_time


results_224, time_224 = train_and_evaluate((224, 224), "model_224")
results_128, time_128 = train_and_evaluate((128, 128), "model_128")
results_64, time_64 = train_and_evaluate((64, 64), "model_64")

results_243, time_243 = train_and_evaluate((243, 243), "model_243")
results_162, time_162 = train_and_evaluate((162, 162), "model_162")
results_81, time_81 = train_and_evaluate((81, 81), "model_81")
results_54, time_54 = train_and_evaluate((54, 54), "model_54")






Found 8116 images belonging to 2 classes.
Found 900 images belonging to 2 classes.
Epoch 1/10


  self._warn_if_super_not_called()


[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 106ms/step - AUC: 0.6785 - Precision: 0.5993 - Recall: 0.5833 - accuracy: 0.6226 - loss: 0.8008
Epoch 1: val_accuracy improved from -inf to 0.67444, saving model to model_224_best_model.keras
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 1s/step - AUC: 0.6799 - Precision: 0.6013 - Recall: 0.5838 - accuracy: 0.6236 - loss: 0.8028 - val_AUC: 0.7630 - val_Precision: 0.6085 - val_Recall: 0.7500 - val_accuracy: 0.6744 - val_loss: 0.5890
Epoch 2/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 109ms/step - AUC: 0.7319 - Precision: 0.5568 - Recall: 0.6649 - accuracy: 0.6210 - loss: 0.8926
Epoch 2: val_accuracy improved from 0.67444 to 0.70556, saving model to model_224_best_model.keras
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 1s/step - AUC: 0.7303 - Precision: 0.5597 - Recall: 0.6625 - accuracy: 0.6212 - loss: 0.8940 - val_AUC: 0.7684 - val_Precision: 0.6934 - val_Recal

In [13]:

# Comparison of results with rounded metrics
print(f"\nComparison of models:")

print(f"Model with 224x224 resize: Accuracy={round(results_224[1], 3)}, AUC={round(results_224[2], 3)}, Precision={round(results_224[3], 3)}, Recall={round(results_224[4], 3)}, Time={time_64:.2f} seconds")
print(f"Model with 128x128 resize: Accuracy={round(results_128[1], 3)}, AUC={round(results_128[2], 3)}, Precision={round(results_128[3], 3)}, Recall={round(results_128[4], 3)}, Time={time_128:.2f} seconds")
print(f"Model with 64x64 resize: Accuracy={round(results_64[1], 3)}, AUC={round(results_64[2], 3)}, Precision={round(results_64[3], 3)}, Recall={round(results_64[4], 3)}, Time={time_64:.2f} seconds")
print("\n")

print(f"Model with 243x243 resize: Accuracy={round(results_243[1], 3)}, AUC={round(results_243[2], 3)}, Precision={round(results_243[3], 3)}, Recall={round(results_243[4], 3)}, Time={time_243:.2f} seconds")
print(f"Model with 162x162 resize: Accuracy={round(results_162[1], 3)}, AUC={round(results_162[2], 3)}, Precision={round(results_162[3], 3)}, Recall={round(results_162[4], 3)}, Time={time_162:.2f} seconds")
print(f"Model with 81x81 resize: Accuracy={round(results_81[1],3)}, AUC={round(results_81[2],3)}, Precision={round(results_81[3],3)}, Recall={round(results_81[4],3)}, Time={time_81:.2f} seconds")
print(f"Model with 54x54 resize: Accuracy={round(results_54[1],3)}, AUC={round(results_54[2],3)}, Precision={round(results_54[3],3)}, Recall={round(results_54[4],3)}, Time={time_54:.2f} seconds")




Comparison of models:
Model with 224x224 resize: Accuracy=0.771, AUC=0.869, Precision=0.708, Recall=0.825, Time=199.10 seconds
Model with 128x128 resize: Accuracy=0.761, AUC=0.824, Precision=0.797, Recall=0.62, Time=130.20 seconds
Model with 64x64 resize: Accuracy=0.68, AUC=0.762, Precision=0.715, Recall=0.465, Time=74.69 seconds


Model with 243x243 resize: Accuracy=0.759, AUC=0.858, Precision=0.715, Recall=0.76, Time=283.38 seconds
Model with 162x162 resize: Accuracy=0.722, AUC=0.805, Precision=0.795, Recall=0.505, Time=89.30 seconds
Model with 81x81 resize: Accuracy=0.727, AUC=0.79, Precision=0.698, Recall=0.678, Time=81.57 seconds
Model with 54x54 resize: Accuracy=0.637, AUC=0.708, Precision=0.566, Recall=0.788, Time=54.76 seconds
