In [1]:
import tensorflow as tf
from tensorflow.keras.applications import MobileNet, DenseNet121, VGG19
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import pandas as pd

In [17]:
import numpy as np


In [2]:
# Define constants
IMAGE_SIZE = (512, 512)
BATCH_SIZE = 32
NUM_CLASSES = 3  # Adjust according to your dataset, e.g., "normal" and "abnormal"

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
# Load and preprocess data
# train_df = pd.read_json('/content/drive/MyDrive/Data Bootcamp/machine-learning-project/df_train.json')
val_df = pd.read_json('/content/drive/MyDrive/Data Bootcamp/machine-learning-project/df_test.json')

In [6]:
val_df['image_array'].head()

Unnamed: 0,image_array
P_00016_LEFT_CC_1,"[[44447, 45371, 45602, 45098, 44804, 44867, 44..."
P_00016_LEFT_MLO_1,"[[51527, 51212, 50938, 50938, 51107, 51275, 51..."
P_00017_LEFT_CC_1,"[[31463, 31881, 31969, 31771, 31683, 31947, 30..."
P_00017_LEFT_MLO_1,"[[38178, 38662, 37385, 36791, 36548, 37055, 37..."
P_00032_RIGHT_CC_1,"[[33882, 33949, 33398, 33244, 33530, 33332, 33..."


In [7]:
val_df.count()

Unnamed: 0,0
mass shape,378
mass margins,361
breast_density,378
assessment,378
subtlety,378
image_array,378
pathology,378


In [10]:
random_sample = val_df.sample(n=76, random_state=42)
random_sample

Unnamed: 0,mass shape,mass margins,breast_density,assessment,subtlety,image_array,pathology
P_01395_RIGHT_CC_1,LOBULATED,CIRCUMSCRIBED,1,3,5,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 66, 0, 0...",0
P_01623_RIGHT_MLO_1,IRREGULAR,SPICULATED,3,5,2,"[[20266, 19951, 19195, 18627, 18417, 17682, 17...",0
P_01599_LEFT_MLO_1,LOBULATED,ILL_DEFINED,2,4,3,"[[29594, 29384, 29846, 29951, 29720, 29279, 28...",0
P_00629_RIGHT_CC_1,IRREGULAR,SPICULATED,3,5,5,"[[45831, 46162, 46580, 46602, 46448, 46492, 46...",0
P_00212_RIGHT_CC_1,LOBULATED,ILL_DEFINED,3,4,3,"[[33400, 33796, 33862, 35029, 34589, 34501, 35...",2
...,...,...,...,...,...,...,...
P_01090_LEFT_MLO_3,LOBULATED,CIRCUMSCRIBED,1,3,5,"[[39763, 40357, 40644, 41766, 41943, 41832, 40...",0
P_00837_RIGHT_MLO_1,OVAL,CIRCUMSCRIBED,4,4,4,"[[27976, 28060, 27640, 27157, 27115, 27136, 27...",0
P_01566_RIGHT_MLO_3,LOBULATED,CIRCUMSCRIBED,3,3,3,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",0
P_00482_LEFT_CC_1,LOBULATED,CIRCUMSCRIBED,3,4,2,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",0


In [11]:
train_df = val_df.drop(random_sample.index)
train_df

Unnamed: 0,mass shape,mass margins,breast_density,assessment,subtlety,image_array,pathology
P_00016_LEFT_MLO_1,IRREGULAR,SPICULATED,4,5,5,"[[51527, 51212, 50938, 50938, 51107, 51275, 51...",2
P_00017_LEFT_CC_1,ROUND,CIRCUMSCRIBED,2,4,4,"[[31463, 31881, 31969, 31771, 31683, 31947, 30...",2
P_00032_RIGHT_CC_1,ROUND,OBSCURED,3,0,2,"[[33882, 33949, 33398, 33244, 33530, 33332, 33...",0
P_00037_RIGHT_CC_1,ROUND,SPICULATED,3,5,5,"[[0, 661, 1167, 947, 1079, 815, 2158, 617, 107...",2
P_00037_RIGHT_MLO_1,IRREGULAR,SPICULATED,3,5,5,"[[39301, 39565, 38552, 40445, 41524, 40842, 39...",2
...,...,...,...,...,...,...,...
P_01815_RIGHT_MLO_1,IRREGULAR,ILL_DEFINED,3,4,2,"[[22656, 23404, 24197, 24153, 24505, 24461, 23...",2
P_01825_RIGHT_CC_1,OVAL,CIRCUMSCRIBED,2,3,3,"[[39983, 40798, 40930, 41370, 40864, 40357, 40...",1
P_01825_RIGHT_MLO_1,LOBULATED,MICROLOBULATED,2,3,3,"[[34479, 34369, 34457, 35205, 35668, 35888, 35...",1
P_01865_LEFT_MLO_1,IRREGULAR,ILL_DEFINED,2,4,2,"[[39762, 39762, 39636, 39405, 39384, 39489, 39...",2


In [None]:
train0 = [np.array(x)*(1/256) for x in train_df['image_array']]
train1 = np.array([tf.keras.applications.densenet.preprocess_input(np.stack((x,)*3, axis=-1)) for x in train0])
train2 = np.array([tf.keras.applications.mobilenet.preprocess_input(np.stack((x,)*3, axis=-1)) for x in train0])

In [None]:
# Function to build and compile models
def build_model(base_model):
    # Add global average pooling layer
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    # Add fully connected layer
    x = Dense(1024, activation='relu')(x)
    x = Dropout(0.5)(x)
    # Add final output layer
    predictions = Dense(NUM_CLASSES, activation='softmax')(x)

    # Compile model
    model = Model(inputs=base_model.input, outputs=predictions)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Create MobileNet model
base_model_mobilenet = MobileNet(weights='imagenet', include_top=False, input_shape=(512,512,3))
mobilenet_model = build_model(base_model_mobilenet)

# Create DenseNet121 model
# base_model_densenet = DenseNet121(weights='imagenet', include_top=False, input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 1))
# densenet_model = build_model(base_model_densenet)

# Create VGG19 model
# base_model_vgg19 = VGG19(weights='imagenet', include_top=False, input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 1))
# vgg19_model = build_model(base_model_vgg19)

# Train models
EPOCHS = 10

# Training MobileNet model
mobilenet_model.fit(
    x=train2,
    y=train_df.pathology,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS
)

# Training DenseNet121 model
# densenet_model.fit(
#     DenseNet121.preprocess_input(train_df.image_array),
#     epochs=EPOCHS,
#     validation_data=DenseNet121.preprocess_input(random_sample.image_array)
# )

# Training VGG19 model
# vgg19_model.fit(
#     MobileNet.preprocess_input(train_df.image_array),
#     epochs=EPOCHS,
#     validation_data=MobileNet.preprocess_input(random_sample.image_array)
# )

In [None]:
# values for the convnet

# number of convolutional filters to use
filters = 32
# size of pooling area
pooling_area = 2
# conv kernel size
conv_kernel = 3
# We define the cnn model
def buildModelStructure():
    model = Sequential()
    model.add(Conv2D(filters, (conv_kernel, conv_kernel), padding='valid',
                     input_shape=input_shape))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(pooling_area, pooling_area)))
    model.add(Conv2D(filters, (conv_kernel, conv_kernel)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(pooling_area, pooling_area)))

    model.add(Conv2D(64, (conv_kernel, conv_kernel)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(pooling_area, pooling_area)))

    model.add(Flatten())
    model.add(Dense(64))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))

    return model

In [None]:
Mobilenet.preprocess_input()

In [None]:
MobileNet(
    input_shape=(512,512),
    alpha=1.0,
    depth_multiplier=1,
    dropout=0.001,
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
    name=None,
)