# Binary Classification Task

## Model A (CNN from TensorFlow Docs)

In [None]:
import tensorflow as tf
from tensorflow.keras import models, layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix, precision_score, recall_score
import pandas as pd

# Define directories for heat maps
train_dir = './binary_test/training_set'
test_dir = './binary_test/test_set'


In [None]:
# Define parameters
target_size = (224, 224)
img_height = 224
img_width = 224
batch_size = 32
validation_split = 0.2

# Define ImageDataGenerator
training_gen = ImageDataGenerator(validation_split=validation_split)
test_gen = ImageDataGenerator()
training_gen.rescale = 1. / 255.0
test_gen.rescale = 1. /255.0

In [None]:
# Pull images from directories and assign labels
train_generator = training_gen.flow_from_directory(
    train_dir,
    target_size=target_size,
    batch_size=batch_size,
    class_mode='binary',
    subset='training')

validation_generator = training_gen.flow_from_directory(
    train_dir,
    target_size=target_size,
    batch_size=batch_size,
    class_mode='binary',
    subset='validation')

test_generator = test_gen.flow_from_directory(
    test_dir,
    target_size=target_size,
    batch_size=batch_size,
    class_mode='binary',
    shuffle=False)

In [None]:
# Model architecture (from Tensorflow documentation)
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(64, (3, 3), activation='relu'))

# Flatten Layer
model.add(layers.Flatten())

# Dense Layers
model.add(layers.Dense(64, activation='relu'))

# Output Layer
model.add(layers.Dense(1, activation='sigmoid'))

In [None]:
# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
# Training
history = model.fit(train_generator, epochs=10,
                    validation_data=validation_generator)

### Evaluation

In [None]:
# Accuracy
test_acc = model.evaluate(test_generator)
print('Test accuracy:', test_acc)

In [None]:
# Make Predictions
predictions = model.predict(test_generator)

In [None]:
# Get expected labels
true_labels = []

for i in range(0,15):
    true_labels.extend(np.array(test_generator[i][1]).astype(int))


# Convert to binary predictions
binary_predictions = np.where(predictions > 0.5, 1, 0).flatten()

In [None]:
# Evaluation
TN, FP, FN, TP = confusion_matrix(true_labels, binary_predictions).ravel()
FPR = FP / (FP + TN)

print("True Positives:", TP)
print("True Negatives:", TN)
print("False Positives:", FP)
print("False Negatives:", FN)
print("False Positive Rate:", FPR)

In [None]:
precision = precision_score(true_labels, binary_predictions)
recall = recall_score(true_labels, binary_predictions)

# Proportion of true positive predictions out of all positive predictions
print("Precision:", precision)

# Proportion of true positive predictions out of all actual positive cases
print("Recall:", recall)

## Model B (ResNet50 Transfer Learning)

In [None]:
import tensorflow as tf
from tensorflow.keras import models, layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix, precision_score, recall_score
from tensorflow.keras.applications import ResNet50
import pandas as pd

# Define directories for heat maps
train_dir = './binary_test/training_set'
test_dir = './binary_test/test_set'

In [None]:
# Define parameters
target_size = (224, 224)
img_height = 224
img_width = 224
batch_size = 32
validation_split = 0.2

# Define ImageDataGenerator
training_gen = ImageDataGenerator(validation_split=validation_split)
test_gen = ImageDataGenerator()
training_gen.rescale = 1. / 255.0
test_gen.rescale = 1. /255.0

In [None]:
# Pull images from directories and assign labels
train_generator = training_gen.flow_from_directory(
    train_dir,
    target_size=target_size,
    batch_size=batch_size,
    class_mode='binary',
    subset='training')

validation_generator = training_gen.flow_from_directory(
    train_dir,
    target_size=target_size,
    batch_size=batch_size,
    class_mode='binary',
    subset='validation')

test_generator = test_gen.flow_from_directory(
    test_dir,
    target_size=target_size,
    batch_size=batch_size,
    class_mode='binary',
    shuffle=False)

In [None]:
# Load the ResNet50 base model with imagenet weights
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(img_height, img_width, 3))

# Freeze the base model layers
for layer in base_model.layers:
    layer.trainable = False

# Create a new model using ResNet50 base as a feature extractor
inputs = tf.keras.Input(shape=(img_height, img_width, 3))
x = base_model(inputs, training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dense(64, activation='relu')(x)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)
model = tf.keras.Model(inputs, outputs)

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Training
history = model.fit(train_generator, epochs=10, validation_data=validation_generator)

### Evaluation

In [None]:
# Accuracy
test_acc = model.evaluate(test_generator)
print('Test accuracy:', test_acc)

In [None]:
# Make predictions
predictions = model.predict(test_generator)

In [None]:
# Get expected labels
true_labels = []

for i in range(0,15):
    true_labels.extend(np.array(test_generator[i][1]).astype(int))


# Convert probabilities to binary predictions
binary_predictions = np.where(predictions > 0.5, 1, 0).flatten()

In [None]:
# Calulate confusion matrix
TN, FP, FN, TP = confusion_matrix(true_labels, binary_predictions).ravel()

FPR = FP / (FP + TN)

print("True Positives:", TP)
print("True Negatives:", TN)
print("False Positives:", FP)
print("False Negatives:", FN)
print("False Positive Rate:", FPR)

In [None]:
precision = precision_score(true_labels, binary_predictions)
recall = recall_score(true_labels, binary_predictions)

# Proportion of true positive predictions out of all positive predictions
print("Precision:", precision)

# Proportion of true positive predictions out of all actual positive cases
print("Recall:", recall)