<a href="https://colab.research.google.com/github/ash8m/cost-analysis/blob/main/BinaryClassification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Library Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import get_file, to_categorical

In [2]:
DATA_NAME = "PneumoniaMNIST"
!wget https://raw.githubusercontent.com/MedMNIST/MedMNIST/main/medmnist/info.py
from info import INFO
data = INFO[DATA_NAME.lower()]

--2024-03-11 18:15:50--  https://raw.githubusercontent.com/MedMNIST/MedMNIST/main/medmnist/info.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 27766 (27K) [text/plain]
Saving to: ‘info.py’


2024-03-11 18:15:51 (98.3 MB/s) - ‘info.py’ saved [27766/27766]



In [3]:
from google.colab import drive
drive.mount('/content/drive')

dataset = np.load('/content/drive/MyDrive/'+DATA_NAME.lower()+'.npz')


Mounted at /content/drive


In [4]:
# Gets the training images and labels from the NumPy object.
train_x = dataset["train_images"]
train_x = np.expand_dims(train_x, axis=-1)  # Add an extra dimension for the color channel
train_x = tf.image.resize(train_x, (32, 32))  # Resize the images to 32x32
train_x = tf.repeat(train_x, 3, axis=-1)  # Convert grayscale images to RGB
train_x = train_x / 255.0  # Normalize the images
train_y = dataset["train_labels"]

# Gets the validation images and labels from the NumPy object.
val_x = dataset["val_images"]
val_x = np.expand_dims(val_x, axis=-1)
val_x = tf.image.resize(val_x, (32, 32))
val_x = tf.repeat(val_x, 3, axis=-1)
val_x = val_x / 255.0
val_y = dataset["val_labels"]

# Gets the testing images and labels from the NumPy object.
test_x = dataset["test_images"]
test_x = np.expand_dims(test_x, axis=-1)
test_x = tf.image.resize(test_x, (32, 32))
test_x = tf.repeat(test_x, 3, axis=-1)
test_x = test_x / 255.0
test_y = dataset["test_labels"]

In [5]:
# Declares a list of labels.
labels = list(data["label"].values()) + ["total"]
print(labels)
# Gets the counts for each label in each of our datasets.
_, train_counts = np.unique(train_y, return_counts=True)
_, val_counts = np.unique(val_y, return_counts=True)
_, test_counts = np.unique(test_y, return_counts=True)

# Prints the counts for each label from each dataset.
print(pd.DataFrame(list(zip(np.append(train_counts, [sum(train_counts)]),
                            np.append(val_counts, [sum(val_counts)]),
                            np.append(test_counts, [sum(test_counts)]))),
                   index=labels, columns=["Train", "Val", "Test"]))

['normal', 'pneumonia', 'total']
           Train  Val  Test
normal      1214  135   234
pneumonia   3494  389   390
total       4708  524   624


In [6]:
# Load the pre-trained model
base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(32, 32, 3))
base_model.summary()

# Freeze the base model
base_model.trainable = False

# Create new model on top
inputs = tf.keras.Input(shape=(32, 32, 3))
x = base_model(inputs, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(1024, activation='relu')(x)
outputs = layers.Dense(1, activation='sigmoid')(x)
model = tf.keras.Model(inputs, outputs)

model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=[tf.keras.metrics.BinaryAccuracy()])
model.summary()


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
Model: "efficientnetb0"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 32, 32, 3)]          0         []                            
                                                                                                  
 rescaling (Rescaling)       (None, 32, 32, 3)            0         ['input_1[0][0]']             
                                                                                                  
 normalization (Normalizati  (None, 32, 32, 3)            7         ['rescaling[0][0]']           
 on)                                                                                              
                                                                                        

In [7]:
# Train the model
history = model.fit(train_x, train_y, epochs=5, validation_data=(val_x, val_y))


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [8]:
# Unfreeze the whole base model for fine-tuning
base_model.trainable = True
# unfreeze the last 15 layers
#num_layers = len(base_model.layers)
#for layer in base_model.layers[:num_layers - 15]:
#    layer.trainable = False
#for layer in base_model.layers[num_layers - 15:]:
#    layer.trainable = True


model.compile(optimizer=tf.keras.optimizers.Adam(1e-5),  # Low learning rate
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=[tf.keras.metrics.BinaryAccuracy()])

# Fine-tune the model
fine_tune_epochs = 15
total_epochs = 5+15


model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 32, 32, 3)]       0         
                                                                 
 efficientnetb0 (Functional  (None, 1, 1, 1280)        4049571   
 )                                                               
                                                                 
 global_average_pooling2d (  (None, 1280)              0         
 GlobalAveragePooling2D)                                         
                                                                 
 dense (Dense)               (None, 1024)              1311744   
                                                                 
 dense_1 (Dense)             (None, 1)                 1025      
                                                                 
Total params: 5362340 (20.46 MB)
Trainable params: 5320317 (2

In [None]:
history_fine = model.fit(train_x, train_y,
                         epochs=total_epochs,
                         initial_epoch=history.epoch[-1],
                         validation_data=(val_x, val_y))

Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
  1/148 [..............................] - ETA: 58s - loss: 0.4147 - binary_accuracy: 0.7812