In [None]:
 # Import libraries
!pip install split-folders
import splitfolders
import tensorflow as tf
import tensorflow_hub as hub
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
from IPython.display import Image
from sklearn.model_selection import train_test_split
from matplotlib.pyplot import imread
from osgeo import gdal
from google.colab import drive
from google.colab import files

# !unzip "/content/drive/MyDrive/bioinformatics/data.zip" -d "/content/drive/MyDrive/bioinformatics/"


# Check GPU availability
if tf.config.list_physical_devices("GPU"):
  print("GPU available!")
else:
  print("GPU not available!")

# try:
#   tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
#   print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])
# except ValueError:
#   raise BaseException('ERROR: Not connected to a TPU runtime; please see the previous cell in this notebook for instructions!')

# tf.config.experimental_connect_to_cluster(tpu)
# tf.tpu.experimental.initialize_tpu_system(tpu)
# tpu_strategy = tf.distribute.TPUStrategy(tpu)

# Getting Familiar with the Data

## Putting the training data filenames and labels into a CSV file
This step is performed once to get all the data in a CSV file with the appropriate label. Once that's done, the code is commented out!

In [None]:
# Create data frame of data with labels
def create_dataframe(benign_directory, malignant_directory):
  
  BENIGN = 0
  MALIGNANT = 1
  benign_tumors = []
  malignant_tumors = []
  benign_dictionary = {}
  malignant_dictionary = {}
  tumor_dictionary = {}

  if malignant_directory == 0:
    for benign_name in (os.listdir(benign_directory)):
      benign_tumors.append(benign_name)
    for key in benign_tumors:
      benign_dictionary[key] = BENIGN

    # Create a data frame of labels
    labels_unshuffled = pd.DataFrame()
    labels_unshuffled["ID Benign"] = benign_dictionary.keys()
    labels_unshuffled["Target"] = benign_dictionary.values()
    labels = labels_unshuffled.sample(frac = 1).reset_index(drop = True)
    return labels

  elif benign_directory == 0:
    for malignant_name in (os.listdir(malignant_directory)):
      malignant_tumors.append(malignant_name)

    for key2 in malignant_tumors:
      malignant_dictionary[key2] = MALIGNANT

    # Create a data frame of labels
    labels_unshuffled = pd.DataFrame()
    labels_unshuffled["ID Malignant"] = malignant_dictionary.keys()
    labels_unshuffled["Target"] = malignant_dictionary.values()
    labels = labels_unshuffled.sample(frac = 1).reset_index(drop = True)
    return labels

  else:
    for benign_name in (os.listdir(benign_directory)):
      benign_tumors.append(benign_name)
    for key in benign_tumors:
      tumor_dictionary[key] = BENIGN

    for malignant_name in (os.listdir(malignant_directory)):
      malignant_tumors.append(malignant_name)

    for key2 in malignant_tumors:
      tumor_dictionary[key2] = MALIGNANT
    
    # Create a data frame of labels
    labels_unshuffled = pd.DataFrame()
    labels_unshuffled["ID"] = tumor_dictionary.keys()
    labels_unshuffled["Target"] = tumor_dictionary.values()
    labels = labels_unshuffled.sample(frac = 1).reset_index(drop = True)
    return labels

In [None]:
# Uncomment to save as CSV file and to download
benign_directory = "/content/drive/MyDrive/bioinformatics/data/train/benign"
malignant_directory = "/content/drive/MyDrive/bioinformatics/data/train/malignant"
# labels = create_dataframe(benign_directory, malignant_directory)
# labels.to_csv('labels.csv')
# files.download('labels.csv')

In [None]:
labels = pd.read_csv("/content/drive/MyDrive/bioinformatics/data/labels.csv", index_col = 0)
labels

In [None]:
benign = 0
malignant = 0
for tumor_type in labels["Target"]:
  if tumor_type == 0:
    benign += 1
  elif tumor_type == 1:
    malignant += 1

print(f"There are {benign} benign tumors and {malignant} malignant tumors in the training data.")

## Visualizing the Data

In [None]:
# Turn images into tensors
def image_to_tensor(image_path):
  
  image = imread(image_path)
  image = tf.constant(image)
  return image

In [None]:
BENIGN = 0
MALIGNANT = 1
# from tensorflow.python.ops.gen_image_ops import image_projective_transform_v2
directory = "/content/drive/MyDrive/bioinformatics/data/train_all/"
paths = [directory + filename for filename in labels["ID"] ]
paths

def visualize_images(paths, num_images):
  plt.figure(figsize = (15, 15))

  for i in range(num_images):
    image = image_to_tensor(paths[i])
    ax = plt.subplot(5, 4, i + 1)
    plt.xlabel(str(i))
    plt.imshow(image)

    if labels["Target"][i] == BENIGN:
      plt.title("Benign")
    elif labels["Target"][i] == MALIGNANT:
      plt.title("Malignant")
    plt.axis(True)


In [None]:
visualize_images(paths, 20)

### Image Shape

In [None]:
image = image_to_tensor(paths[0])
image.shape

# Building the Model

In [None]:
# Define the layers to use
layers = [tf.keras.layers.Conv2D(filters = 32, kernel_size = (3,3), activation = "relu", input_shape = (460, 700, 3)),
          
          tf.keras.layers.Conv2D(filters = 64, kernel_size = (3,3), activation = "relu"),
          tf.keras.layers.MaxPool2D(pool_size = (2,2)),

          tf.keras.layers.Conv2D(filters = 128, kernel_size = (3,3), activation = "relu"),
          tf.keras.layers.MaxPool2D(pool_size = (2,2)),

          tf.keras.layers.Conv2D(filters = 256, kernel_size = (3,3), activation = "relu"),
          tf.keras.layers.MaxPool2D(pool_size = (2,2)),

          tf.keras.layers.Dropout(rate = 0.25),
          tf.keras.layers.Flatten(),

          tf.keras.layers.Dense(units = 64, activation = "relu"),
          tf.keras.layers.Dropout(rate = 0.25),

          tf.keras.layers.Dense(units = 1, activation = "sigmoid")
          ]

# Feed the layers to the model
metric_list = ["accuracy", tf.keras.metrics.Precision(), tf.keras.metrics.Recall(),
           tf.keras.metrics.TruePositives(), tf.keras.metrics.FalsePositives(),
           tf.keras.metrics.TrueNegatives(), tf.keras.metrics.FalseNegatives()]
model = tf.keras.Sequential(layers)
model.compile(loss = tf.keras.losses.BinaryCrossentropy(), optimizer = tf.keras.optimizers.Adam(), metrics = metric_list)
print(model.summary())

## Setting Up the Directories and Data Generators

Split the training data into training and validation data. Comment out when directory is set up.

In [None]:
# main_directory = "/content/drive/MyDrive/bioinformatics/data/train"
# splitfolders.ratio(main_directory, output = "/content/drive/MyDrive/bioinformatics/data/train_val_split", seed = 42, ratio = (0.85, 0.15), group_prefix = None)

In [None]:
# train_directory = "/content/drive/MyDrive/bioinformatics/data/train"

In [None]:
train = "/content/drive/MyDrive/bioinformatics/data/train_val_split/train"
validate = "/content/drive/MyDrive/bioinformatics/data/train_val_split/val"
test = "/content/drive/MyDrive/bioinformatics/data/test"

In [None]:
# Create data generators
train_data_generation = tf.keras.preprocessing.image.ImageDataGenerator(zoom_range = 0.2, shear_range = 0.2, rescale = 1. / 255, horizontal_flip = True)
validate_data_generation = tf.keras.preprocessing.image.ImageDataGenerator(rescale = 1. / 255)
test_data_generation = tf.keras.preprocessing.image.ImageDataGenerator(rescale = 1. / 255)

train_data = train_data_generation.flow_from_directory(directory = train, target_size = (460, 700), batch_size = 16, class_mode = "binary")
validate_data = validate_data_generation.flow_from_directory(directory = validate, target_size = (460, 700), batch_size = 16, class_mode = "binary")
test_data = train_data_generation.flow_from_directory(directory = test, target_size = (460, 700), batch_size = 16, class_mode = "binary")

print("Train Data Classes: ", train_data.class_indices)
print("Validate Data Classes: ", validate_data.class_indices)
print("Test Data Classes: ", test_data.class_indices)

## Creating Early Stopping Callback and Checkpoint for Saving the Model

Helps prevent the model from overfitting by putting a stop to training if there are no improvements to the evaluation metrics passed to it.

In [None]:
# Define the callbacks
early_stopping = tf.keras.callbacks.EarlyStopping(monitor = "val_accuracy", min_delta = 0.01, patience = 5, verbose = 1, mode = "auto")
checkpoint = tf.keras.callbacks.ModelCheckpoint("/content/drive/MyDrive/bioinformatics/breast-cancer-CNN.h5", monitor = "val_accuracy", verbose = 1, mode = "auto", save_best_only = True)
call_backs = [early_stopping, checkpoint]

In [None]:
# Fit the model
model_fit = model.fit(x = train_data, epochs = 30, verbose = 1, validation_data = validate_data, callbacks = call_backs)
history = model_fit.history

## Storing Metric Values for Training and Validation Data

In [None]:
train_accuracy = history["accuracy"]
train_loss = history["loss"]
train_precision = history["precision"]
train_recall = history["recall"]
train_tp = history["true_positives"]
train_fp = history["false_positives"]
train_tn = history["true_negatives"]
train_fn = history["false_negatives"]

validation_accuracy = history["val_accuracy"]
validation_loss = history["val_loss"]
validation_precision = history["val_precision"]
validation_recall = history["val_recall"]
validation_tp = history["val_true_positives"]
validation_fp = history["val_false_positives"]
validation_tn = history["val_true_negatives"]
validation_fn = history["val_false_negatives"]

### Plotting Results
* Train Accuracy vs. Validation Accuracy
* Train Loss vs. Validation Loss

In [None]:
train_acc, = plt.plot(train_accuracy, c = "green")
validate_acc, = plt.plot(validation_accuracy, c = "red")
plt.title("Train Accuracy vs. Validation Accuracy")
plt.legend([train_acc, validate_acc],["Training", "Validation"])
plt.show()

In [None]:
train_los, = plt.plot(train_loss, c = "green")
validate_los, = plt.plot(validation_loss, c = "red")
plt.title("Train Loss vs. Validation Loss")
plt.legend([train_los, validate_los],["Training", "Validation"])
plt.show()

# Loading the Model and Using it on the Test Data

In [None]:
model_load = tf.keras.models.load_model("/content/drive/MyDrive/bioinformatics/breast-cancer-CNN.h5")
model_load.summary()

In [None]:
test_evaluation = model_load.evaluate(x = test_data)

## Storing Metric Values for Test Data

In [None]:
LOSS = 0
ACCURACY = 1
PRECISION = 2
RECALL =  3
TP = 4
FP = 5
TN = 6
FN = 7

test_loss = test_evaluation[LOSS]
test_accuracy = test_evaluation[ACCURACY]
test_precision = test_evaluation[PRECISION]
test_recall = test_evaluation[RECALL]
test_tp = test_evaluation[TP]
test_fp = test_evaluation[FP]
test_tn = test_evaluation[TN]
test_fn = test_evaluation[FN]

# Creating Reports for Each Trial

In [None]:
%%capture cap
best_train_accuracy = max(train_accuracy)
best_train_loss = min(train_loss)

best_val_accuracy = max(validation_accuracy)
best_val_loss = min(validation_loss)

good_train_index = train_accuracy.index(best_train_accuracy)
good_train_loss_index = train_loss.index(best_train_loss)
good_train_epoch = good_train_index + 1
good_train_loss_epoch = good_train_loss_index + 1

good_val_index = validation_accuracy.index(best_val_accuracy)
good_val_loss_index = validation_loss.index(best_val_loss)
good_val_epoch = good_val_index + 1
good_val_loss_epoch = good_val_loss_index + 1


print("\n============================ REPORT ==================================")
print(model.summary())
print("\t\t\tTrain")
print("\t\tAccuracy\tLoss")
for i, accuracy_train in enumerate(train_accuracy):
  print(f"Epoch {i + 1}: {accuracy_train}\t{train_loss[i]}")



print(f"\nThe highest training accuracy value is {round(best_train_accuracy * 100, 2)}% at Epoch {good_train_epoch}")
print(f"The lowest training loss value is {round(best_train_loss, 2)} at Epoch {good_train_loss_epoch}")


print("\n\t\t\tValidate")

for j, accuracy_validate in enumerate(validation_accuracy):
  print(f"Epoch {j + 1}: {accuracy_validate}\t{validation_loss[j]}")


print(f"\nThe highest validation accuracy value is {round(best_val_accuracy * 100, 2)}% at Epoch {good_val_epoch}")
print(f"The lowest validation loss value is {round(best_val_loss, 2)} at Epoch {good_val_loss_epoch}")

print("------------------------------------------------------------------------")
print("\nTrain Accuracy: ", train_accuracy)
print("\nTrain Loss: ", train_loss)
print("\nTrain Precision: ", train_precision)
print("\nTrain Recall: ", train_recall)
print("\nTrain True Positives: ", train_tp)
print("\nTrain False Positives: ", train_fp)
print("\nTrain True Negatives: ", train_tn)
print("\nTrain False Negatives: ", train_fn)

print("\n")
print("\nValidation Accuracy: ", validation_accuracy)
print("\nValidation Loss: ", validation_loss)
print("\nValidation Precision: ", validation_precision)
print("\nValidation Recall: ", validation_recall)
print("\nValidation True Positives: ", validation_tp)
print("\nValidation False Positives: ", validation_fp)
print("\nValidation True Negatives: ", validation_tn)
print("\nValidation False Negatives: ", validation_fn)

print("\n")
print("\t\t\tTest")
print("------------------------------------------------------------------------")
print("\nTest Accuracy: ", test_accuracy)
print("\nTest Loss: ", test_loss)
print("\nTest Precision: ", test_precision)
print("\nTest Recall: ", test_recall)
print("\nTest True Positives: ", test_tp)
print("\nTest False Positives: ", test_fp)
print("\nTest True Negatives: ", test_tn)
print("\nTest False Negatives: ", test_fn)

In [None]:
_# Uncomment to save file of output
f = open("trial4.txt", "w") 
print(cap, file=f)
files.download("/content/trial4.txt")
f.close()

## Testing on Random Images

In [None]:
# Uncomment to obtain CSV files
# Visualizing on benign tumors
test_directory_benign = "/content/drive/MyDrive/bioinformatics/data/test/benign"
test_directory_malignant = "/content/drive/MyDrive/bioinformatics/data/test/malignant"


# Create a data frame of benign tumors and show what the model predicts
test_labels_benign = create_dataframe(test_directory_benign, 0)
test_labels_malignant = create_dataframe(0, test_directory_malignant)


# Uncomment to save as CSV file and to download
# test_labels_benign.to_csv('test_labels_benign.csv')
# files.download('test_labels_benign.csv')

# test_labels_malignant.to_csv('test_labels_malignant.csv')
# files.download('test_labels_malignant.csv')

In [None]:
test_directory_benign = "/content/drive/MyDrive/bioinformatics/data/test/benign/"
test_directory_malignant = "/content/drive/MyDrive/bioinformatics/data/test/malignant/"

test_labels_benign_CSV = pd.read_csv("/content/drive/MyDrive/bioinformatics/data/test_labels_benign.csv", index_col = 0)
test_paths_benign = [test_directory_benign + filename for filename in test_labels_benign_CSV["ID Benign"]]

test_labels_malignant_CSV = pd.read_csv("/content/drive/MyDrive/bioinformatics/data/test_labels_malignant.csv", index_col = 0)
test_paths_malignant = [test_directory_malignant + filename for filename in test_labels_malignant_CSV["ID Malignant"]]

test_paths_benign[:5], test_paths_malignant[:5]

In [None]:
# np.random.seed(42)
from PIL import Image
def visualize_test_images(paths, num_images):
  plt.figure(figsize = (15, 15))

  for i in range(num_images):
    # Normalize image
    image = tf.keras.utils.load_img(paths[np.random.randint(0, num_images)])
    image = tf.keras.utils.img_to_array(image)
    image = image / 255

    image_array = np.array(image)
    image_array.resize((1, 460, 700, 3))


    prediction = model_load.predict(image_array)
    result = round(prediction[0][0])

    if result == BENIGN:
      ax = plt.subplot(5, 4, i + 1)
      plt.xlabel(str(i))
      plt.title("Benign")
      plt.imshow(image)
    else:
      ax = plt.subplot(5, 4, i + 1)
      plt.xlabel(str(i))
      plt.title("Malignant", color = "red")
      plt.imshow(image)

    plt.axis(True)
visualize_test_images(test_paths_benign, 20)


# Using Transfer Learning to Compare Metrics with Pre-Built Models from TensorFlow Hub

In [None]:
NUM_ROWS = 460
NUM_COLS = 700 
INPUT_SHAPE = [None, NUM_ROWS, NUM_COLS, 3]
OUTPUT_SHAPE = 1

In [None]:
def def_call_backs(model_name):
  early_stopping = tf.keras.callbacks.EarlyStopping(monitor = "val_accuracy", min_delta = 0.01, patience = 5, verbose = 1, mode = "auto")
  checkpoint = tf.keras.callbacks.ModelCheckpoint("/content/drive/MyDrive/bioinformatics/" + model_name + ".h5", monitor = "val_accuracy", verbose = 1, mode = "auto", save_best_only = True)
  call_backs = [early_stopping, checkpoint]
  return call_backs

In [None]:
def transfer_learning(model_url):
  model = tf.keras.Sequential([hub.KerasLayer(model_url),
                                   tf.keras.layers.Dense(units = OUTPUT_SHAPE, activation = "sigmoid")])
  model.compile(loss = tf.keras.losses.BinaryCrossentropy(), optimizer = tf.keras.optimizers.Adam(), metrics = metric_list)
  model.build(INPUT_SHAPE)
  return model

In [None]:
def fit_model(model, model_name):
  # Fit the model
  call_backs = def_call_backs(model_name)
  model_fit = model.fit(x = train_data, epochs = 30, validation_data = validate_data, verbose = 1, callbacks = call_backs)
  model_history = model_fit.history
  return model_history

## Inception ResNet V2

In [None]:
inc_res_v2_model_url = "https://tfhub.dev/google/imagenet/inception_resnet_v2/classification/5"
IncResV2 = transfer_learning(inc_res_v2_model_url)
IncResV2_history = fit_model(IncResV2, "IncResV2")

In [None]:
train_accuracy = IncResV2_history["accuracy"]
train_loss = IncResV2_history["loss"]
train_precision = IncResV2_history["precision_2"]
train_recall = IncResV2_history["recall_2"]
train_tp = IncResV2_history["true_positives_2"]
train_fp = IncResV2_history["false_positives_2"]
train_tn = IncResV2_history["true_negatives_2"]
train_fn = IncResV2_history["false_negatives_2"]

validation_accuracy = IncResV2_history["val_accuracy"]
validation_loss = IncResV2_history["val_loss"]
validation_precision = IncResV2_history["val_precision_2"]
validation_recall = IncResV2_history["val_recall_2"]
validation_tp = IncResV2_history["val_true_positives_2"]
validation_fp = IncResV2_history["val_false_positives_2"]
validation_tn = IncResV2_history["val_true_negatives_2"]
validation_fn = IncResV2_history["val_false_negatives_2"]

In [None]:
train_acc, = plt.plot(train_accuracy, c = "green")
validate_acc, = plt.plot(validation_accuracy, c = "red")
plt.title("Train Accuracy vs. Validation Accuracy")
plt.legend([train_acc, validate_acc],["Training", "Validation"])
plt.show()

In [None]:
train_los, = plt.plot(train_loss, c = "green")
validate_los, = plt.plot(validation_loss, c = "red")
plt.title("Train Loss vs. Validation Loss")
plt.legend([train_los, validate_los],["Training", "Validation"])
plt.show()

In [None]:
IncResV2_load = tf.keras.models.load_model("/content/drive/MyDrive/bioinformatics/IncResV2.h5", custom_objects = {"KerasLayer":hub.KerasLayer})
IncResV2_load.summary()

In [None]:
IncResV2_test_evaluation = IncResV2_load.evaluate(x = test_data)

In [None]:
LOSS = 0
ACCURACY = 1
PRECISION = 2
RECALL =  3
TP = 4
FP = 5
TN = 6
FN = 7

test_loss = IncResV2_test_evaluation[LOSS]
test_accuracy = IncResV2_test_evaluation[ACCURACY]
test_precision = IncResV2_test_evaluation[PRECISION]
test_recall = IncResV2_test_evaluation[RECALL]
test_tp = IncResV2_test_evaluation[TP]
test_fp = IncResV2_test_evaluation[FP]
test_tn = IncResV2_test_evaluation[TN]
test_fn = IncResV2_test_evaluation[FN]

In [None]:
%%capture cap
best_train_accuracy = max(train_accuracy)
best_train_loss = min(train_loss)

best_val_accuracy = max(validation_accuracy)
best_val_loss = min(validation_loss)

good_train_index = train_accuracy.index(best_train_accuracy)
good_train_loss_index = train_loss.index(best_train_loss)
good_train_epoch = good_train_index + 1
good_train_loss_epoch = good_train_loss_index + 1

good_val_index = validation_accuracy.index(best_val_accuracy)
good_val_loss_index = validation_loss.index(best_val_loss)
good_val_epoch = good_val_index + 1
good_val_loss_epoch = good_val_loss_index + 1


print("\n============================ REPORT ==================================")
print("\t\t\tTrain")
print("\t\tAccuracy\tLoss")
for i, accuracy_train in enumerate(train_accuracy):
  print(f"Epoch {i + 1}: {accuracy_train}\t{train_loss[i]}")



print(f"\nThe highest training accuracy value is {round(best_train_accuracy * 100, 2)}% at Epoch {good_train_epoch}")
print(f"The lowest training loss value is {round(best_train_loss, 2)} at Epoch {good_train_loss_epoch}")


print("\n\t\t\tValidate")

for j, accuracy_validate in enumerate(validation_accuracy):
  print(f"Epoch {j + 1}: {accuracy_validate}\t{validation_loss[j]}")


print(f"\nThe highest validation accuracy value is {round(best_val_accuracy * 100, 2)}% at Epoch {good_val_epoch}")
print(f"The lowest validation loss value is {round(best_val_loss, 2)} at Epoch {good_val_loss_epoch}")

print("------------------------------------------------------------------------")
print("\nTrain Accuracy: ", train_accuracy)
print("\nTrain Loss: ", train_loss)
print("\nTrain Precision: ", train_precision)
print("\nTrain Recall: ", train_recall)
print("\nTrain True Positives: ", train_tp)
print("\nTrain False Positives: ", train_fp)
print("\nTrain True Negatives: ", train_tn)
print("\nTrain False Negatives: ", train_fn)

print("\n")
print("\nValidation Accuracy: ", validation_accuracy)
print("\nValidation Loss: ", validation_loss)
print("\nValidation Precision: ", validation_precision)
print("\nValidation Recall: ", validation_recall)
print("\nValidation True Positives: ", validation_tp)
print("\nValidation False Positives: ", validation_fp)
print("\nValidation True Negatives: ", validation_tn)
print("\nValidation False Negatives: ", validation_fn)

print("\n")
print("\t\t\tTest")
print("------------------------------------------------------------------------")
print("\nTest Accuracy: ", test_accuracy)
print("\nTest Loss: ", test_loss)
print("\nTest Precision: ", test_precision)
print("\nTest Recall: ", test_recall)
print("\nTest True Positives: ", test_tp)
print("\nTest False Positives: ", test_fp)
print("\nTest True Negatives: ", test_tn)
print("\nTest False Negatives: ", test_fn)

In [None]:
f = open("trial5.txt", "w") 
print(cap, file=f)
files.download("/content/trial5.txt")
f.close()

In [None]:
# Uncomment to obtain CSV files
# Visualizing on benign tumors
test_directory_benign = "/content/drive/MyDrive/bioinformatics/data/test/benign"
test_directory_malignant = "/content/drive/MyDrive/bioinformatics/data/test/malignant"


# Create a data frame of benign tumors and show what the model predicts
test_labels_benign = create_dataframe(test_directory_benign, 0)
test_labels_malignant = create_dataframe(0, test_directory_malignant)


# Uncomment to save as CSV file and to download
# test_labels_benign.to_csv('test_labels_benign.csv')
# files.download('test_labels_benign.csv')

# test_labels_malignant.to_csv('test_labels_malignant.csv')
# files.download('test_labels_malignant.csv')

### Making Random Predictions

In [None]:
# np.random.seed(42)
from PIL import Image
def visualize_test_images(paths, num_images, model):
  plt.figure(figsize = (15, 15))

  for i in range(num_images):
    # Normalize image
    image = tf.keras.utils.load_img(paths[np.random.randint(0, num_images)])
    image = tf.keras.utils.img_to_array(image)
    image = image / 255

    image_array = np.array(image)
    image_array.resize((1, 460, 700, 3))


    prediction = model.predict(image_array)
    result = round(prediction[0][0])

    if result == BENIGN:
      ax = plt.subplot(5, 4, i + 1)
      plt.xlabel(str(i))
      plt.title("Benign")
      plt.imshow(image)
    else:
      ax = plt.subplot(5, 4, i + 1)
      plt.xlabel(str(i))
      plt.title("Malignant", color = "red")
      plt.imshow(image)


    plt.axis(True)
visualize_test_images(test_paths_malignant, 20, IncResV2_load)

## Efficient Net V2

In [None]:
eff_net_v2_model_url = "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_s/classification/2"
EffNetV2 = transfer_learning(eff_net_v2_model_url)
EffNetV2_history = fit_model(EffNetV2, "EffNetV2")

In [None]:
train_accuracy = EffNetV2_history["accuracy"]
train_loss = EffNetV2_history["loss"]
train_precision = EffNetV2_history["precision_2"]
train_recall = EffNetV2_history["recall_2"]
train_tp = EffNetV2_history["true_positives_2"]
train_fp = EffNetV2_history["false_positives_2"]
train_tn = EffNetV2_history["true_negatives_2"]
train_fn = EffNetV2_history["false_negatives_2"]

validation_accuracy = EffNetV2_history["val_accuracy"]
validation_loss = EffNetV2_history["val_loss"]
validation_precision = EffNetV2_history["val_precision_2"]
validation_recall = EffNetV2_history["val_recall_2"]
validation_tp = EffNetV2_history["val_true_positives_2"]
validation_fp = EffNetV2_history["val_false_positives_2"]
validation_tn = EffNetV2_history["val_true_negatives_2"]
validation_fn = EffNetV2_history["val_false_negatives_2"]

In [None]:
train_acc, = plt.plot(train_accuracy, c = "green")
validate_acc, = plt.plot(validation_accuracy, c = "red")
plt.title("Train Accuracy vs. Validation Accuracy")
plt.legend([train_acc, validate_acc],["Training", "Validation"])
plt.show()

In [None]:
train_los, = plt.plot(train_loss, c = "green")
validate_los, = plt.plot(validation_loss, c = "red")
plt.title("Train Loss vs. Validation Loss")
plt.legend([train_los, validate_los],["Training", "Validation"])
plt.show()

In [None]:
EffNetV2_load = tf.keras.models.load_model("/content/drive/MyDrive/bioinformatics/EffNetV2.h5", custom_objects = {"KerasLayer":hub.KerasLayer})
EffNetV2_load.summary()

In [None]:
EffNetV2_test_evaluation = EffNetV2_load.evaluate(x = test_data)

In [None]:
LOSS = 0
ACCURACY = 1
PRECISION = 2
RECALL =  3
TP = 4
FP = 5
TN = 6
FN = 7

test_loss = EffNetV2_test_evaluation[LOSS]
test_accuracy = EffNetV2_test_evaluation[ACCURACY]
test_precision = EffNetV2_test_evaluation[PRECISION]
test_recall = EffNetV2_test_evaluation[RECALL]
test_tp = EffNetV2_test_evaluation[TP]
test_fp = EffNetV2_test_evaluation[FP]
test_tn = EffNetV2_test_evaluation[TN]
test_fn = EffNetV2_test_evaluation[FN]

In [None]:
%%capture cap
best_train_accuracy = max(train_accuracy)
best_train_loss = min(train_loss)

best_val_accuracy = max(validation_accuracy)
best_val_loss = min(validation_loss)

good_train_index = train_accuracy.index(best_train_accuracy)
good_train_loss_index = train_loss.index(best_train_loss)
good_train_epoch = good_train_index + 1
good_train_loss_epoch = good_train_loss_index + 1

good_val_index = validation_accuracy.index(best_val_accuracy)
good_val_loss_index = validation_loss.index(best_val_loss)
good_val_epoch = good_val_index + 1
good_val_loss_epoch = good_val_loss_index + 1


print("\n============================ REPORT ==================================")
print("\t\t\tTrain")
print("\t\tAccuracy\tLoss")
for i, accuracy_train in enumerate(train_accuracy):
  print(f"Epoch {i + 1}: {accuracy_train}\t{train_loss[i]}")



print(f"\nThe highest training accuracy value is {round(best_train_accuracy * 100, 2)}% at Epoch {good_train_epoch}")
print(f"The lowest training loss value is {round(best_train_loss, 2)} at Epoch {good_train_loss_epoch}")


print("\n\t\t\tValidate")

for j, accuracy_validate in enumerate(validation_accuracy):
  print(f"Epoch {j + 1}: {accuracy_validate}\t{validation_loss[j]}")


print(f"\nThe highest validation accuracy value is {round(best_val_accuracy * 100, 2)}% at Epoch {good_val_epoch}")
print(f"The lowest validation loss value is {round(best_val_loss, 2)} at Epoch {good_val_loss_epoch}")

print("------------------------------------------------------------------------")
print("\nTrain Accuracy: ", train_accuracy)
print("\nTrain Loss: ", train_loss)
print("\nTrain Precision: ", train_precision)
print("\nTrain Recall: ", train_recall)
print("\nTrain True Positives: ", train_tp)
print("\nTrain False Positives: ", train_fp)
print("\nTrain True Negatives: ", train_tn)
print("\nTrain False Negatives: ", train_fn)

print("\n")
print("\nValidation Accuracy: ", validation_accuracy)
print("\nValidation Loss: ", validation_loss)
print("\nValidation Precision: ", validation_precision)
print("\nValidation Recall: ", validation_recall)
print("\nValidation True Positives: ", validation_tp)
print("\nValidation False Positives: ", validation_fp)
print("\nValidation True Negatives: ", validation_tn)
print("\nValidation False Negatives: ", validation_fn)

print("\n")
print("\t\t\tTest")
print("------------------------------------------------------------------------")
print("\nTest Accuracy: ", test_accuracy)
print("\nTest Loss: ", test_loss)
print("\nTest Precision: ", test_precision)
print("\nTest Recall: ", test_recall)
print("\nTest True Positives: ", test_tp)
print("\nTest False Positives: ", test_fp)
print("\nTest True Negatives: ", test_tn)
print("\nTest False Negatives: ", test_fn)

In [None]:
f = open("trial6.txt", "w") 
print(cap, file=f)
files.download("/content/trial6.txt")
f.close()

### Making Random Predictions

In [None]:
# Uncomment to obtain CSV files
# Visualizing on benign tumors
test_directory_benign = "/content/drive/MyDrive/bioinformatics/data/test/benign"
test_directory_malignant = "/content/drive/MyDrive/bioinformatics/data/test/malignant"


# Create a data frame of benign tumors and show what the model predicts
test_labels_benign = create_dataframe(test_directory_benign, 0)
test_labels_malignant = create_dataframe(0, test_directory_malignant)


# Uncomment to save as CSV file and to download
# test_labels_benign.to_csv('test_labels_benign.csv')
# files.download('test_labels_benign.csv')

# test_labels_malignant.to_csv('test_labels_malignant.csv')
# files.download('test_labels_malignant.csv')

In [None]:
# np.random.seed(42)
from PIL import Image
def visualize_test_images(paths, num_images, model):
  plt.figure(figsize = (15, 15))

  for i in range(num_images):
    # Normalize image
    image = tf.keras.utils.load_img(paths[np.random.randint(0, num_images)])
    image = tf.keras.utils.img_to_array(image)
    image = image / 255

    image_array = np.array(image)
    image_array.resize((1, 460, 700, 3))


    prediction = model.predict(image_array)
    result = round(prediction[0][0])

    if result == BENIGN:
      ax = plt.subplot(5, 4, i + 1)
      plt.xlabel(str(i))
      plt.title("Benign")
      plt.imshow(image)
    else:
      ax = plt.subplot(5, 4, i + 1)
      plt.xlabel(str(i))
      plt.title("Malignant", color = "red")
      plt.imshow(image)


    plt.axis(True)
visualize_test_images(test_paths_malignant, 20, EffNetV2_load)

## Efficient Net B7

In [None]:
eff_net_b7_model_url = "https://tfhub.dev/tensorflow/efficientnet/b7/classification/1"
EffNetB7 = transfer_learning(eff_net_b7_model_url)
EffNetB7_history = fit_model(EffNetB7, "EffNetB7")

In [None]:
train_accuracy = EffNetB7_history["accuracy"]
train_loss = EffNetB7_history["loss"]
train_precision = EffNetB7_history["precision_2"]
train_recall = EffNetB7_history["recall_2"]
train_tp = EffNetB7_history["true_positives_2"]
train_fp = EffNetB7_history["false_positives_2"]
train_tn = EffNetB7_history["true_negatives_2"]
train_fn = EffNetB7_history["false_negatives_2"]

validation_accuracy = EffNetB7_history["val_accuracy"]
validation_loss = EffNetB7_history["val_loss"]
validation_precision = EffNetB7_history["val_precision_2"]
validation_recall = EffNetB7_history["val_recall_2"]
validation_tp = EffNetB7_history["val_true_positives_2"]
validation_fp = EffNetB7_history["val_false_positives_2"]
validation_tn = EffNetB7_history["val_true_negatives_2"]
validation_fn = EffNetB7_history["val_false_negatives_2"]

In [None]:
train_acc, = plt.plot(train_accuracy, c = "green")
validate_acc, = plt.plot(validation_accuracy, c = "red")
plt.title("Train Accuracy vs. Validation Accuracy")
plt.legend([train_acc, validate_acc],["Training", "Validation"])
plt.show()

In [None]:
train_los, = plt.plot(train_loss, c = "green")
validate_los, = plt.plot(validation_loss, c = "red")
plt.title("Train Loss vs. Validation Loss")
plt.legend([train_los, validate_los],["Training", "Validation"])
plt.show()

In [None]:
EffNetB7_load = tf.keras.models.load_model("/content/drive/MyDrive/bioinformatics/EffNetB7.h5", custom_objects = {"KerasLayer":hub.KerasLayer})
EffNetB7_load.summary()

In [None]:
EffNetB7_test_evaluation = EffNetB7_load.evaluate(x = test_data)

In [None]:
LOSS = 0
ACCURACY = 1
PRECISION = 2
RECALL =  3
TP = 4
FP = 5
TN = 6
FN = 7

test_loss = EffNetB7_test_evaluation[LOSS]
test_accuracy = EffNetB7_test_evaluation[ACCURACY]
test_precision = EffNetB7_test_evaluation[PRECISION]
test_recall = EffNetB7_test_evaluation[RECALL]
test_tp = EffNetB7_test_evaluation[TP]
test_fp = EffNetB7_test_evaluation[FP]
test_tn = EffNetB7_test_evaluation[TN]
test_fn = EffNetB7_test_evaluation[FN]

In [None]:
%%capture cap
best_train_accuracy = max(train_accuracy)
best_train_loss = min(train_loss)

best_val_accuracy = max(validation_accuracy)
best_val_loss = min(validation_loss)

good_train_index = train_accuracy.index(best_train_accuracy)
good_train_loss_index = train_loss.index(best_train_loss)
good_train_epoch = good_train_index + 1
good_train_loss_epoch = good_train_loss_index + 1

good_val_index = validation_accuracy.index(best_val_accuracy)
good_val_loss_index = validation_loss.index(best_val_loss)
good_val_epoch = good_val_index + 1
good_val_loss_epoch = good_val_loss_index + 1


print("\n============================ REPORT ==================================")
print("\t\t\tTrain")
print("\t\tAccuracy\tLoss")
for i, accuracy_train in enumerate(train_accuracy):
  print(f"Epoch {i + 1}: {accuracy_train}\t{train_loss[i]}")



print(f"\nThe highest training accuracy value is {round(best_train_accuracy * 100, 2)}% at Epoch {good_train_epoch}")
print(f"The lowest training loss value is {round(best_train_loss, 2)} at Epoch {good_train_loss_epoch}")


print("\n\t\t\tValidate")

for j, accuracy_validate in enumerate(validation_accuracy):
  print(f"Epoch {j + 1}: {accuracy_validate}\t{validation_loss[j]}")


print(f"\nThe highest validation accuracy value is {round(best_val_accuracy * 100, 2)}% at Epoch {good_val_epoch}")
print(f"The lowest validation loss value is {round(best_val_loss, 2)} at Epoch {good_val_loss_epoch}")

print("------------------------------------------------------------------------")
print("\nTrain Accuracy: ", train_accuracy)
print("\nTrain Loss: ", train_loss)
print("\nTrain Precision: ", train_precision)
print("\nTrain Recall: ", train_recall)
print("\nTrain True Positives: ", train_tp)
print("\nTrain False Positives: ", train_fp)
print("\nTrain True Negatives: ", train_tn)
print("\nTrain False Negatives: ", train_fn)

print("\n")
print("\nValidation Accuracy: ", validation_accuracy)
print("\nValidation Loss: ", validation_loss)
print("\nValidation Precision: ", validation_precision)
print("\nValidation Recall: ", validation_recall)
print("\nValidation True Positives: ", validation_tp)
print("\nValidation False Positives: ", validation_fp)
print("\nValidation True Negatives: ", validation_tn)
print("\nValidation False Negatives: ", validation_fn)

print("\n")
print("\t\t\tTest")
print("------------------------------------------------------------------------")
print("\nTest Accuracy: ", test_accuracy)
print("\nTest Loss: ", test_loss)
print("\nTest Precision: ", test_precision)
print("\nTest Recall: ", test_recall)
print("\nTest True Positives: ", test_tp)
print("\nTest False Positives: ", test_fp)
print("\nTest True Negatives: ", test_tn)
print("\nTest False Negatives: ", test_fn)

In [None]:
f = open("trial7.txt", "w") 
print(cap, file=f)
files.download("/content/trial7.txt")
f.close()

### Making Random Predictions

In [None]:
# Uncomment to obtain CSV files
# Visualizing on benign tumors
test_directory_benign = "/content/drive/MyDrive/bioinformatics/data/test/benign"
test_directory_malignant = "/content/drive/MyDrive/bioinformatics/data/test/malignant"


# Create a data frame of benign tumors and show what the model predicts
test_labels_benign = create_dataframe(test_directory_benign, 0)
test_labels_malignant = create_dataframe(0, test_directory_malignant)


# Uncomment to save as CSV file and to download
# test_labels_benign.to_csv('test_labels_benign.csv')
# files.download('test_labels_benign.csv')

# test_labels_malignant.to_csv('test_labels_malignant.csv')
# files.download('test_labels_malignant.csv')

In [None]:
# np.random.seed(42)
from PIL import Image
def visualize_test_images(paths, num_images, model):
  plt.figure(figsize = (15, 15))

  for i in range(num_images):
    # Normalize image
    image = tf.keras.utils.load_img(paths[np.random.randint(0, num_images)])
    image = tf.keras.utils.img_to_array(image)
    image = image / 255

    image_array = np.array(image)
    image_array.resize((1, 460, 700, 3))


    prediction = model.predict(image_array)
    result = round(prediction[0][0])

    if result == BENIGN:
      ax = plt.subplot(5, 4, i + 1)
      plt.xlabel(str(i))
      plt.title("Benign")
      plt.imshow(image)
    else:
      ax = plt.subplot(5, 4, i + 1)
      plt.xlabel(str(i))
      plt.title("Malignant", color = "red")
      plt.imshow(image)

    plt.axis(True)
visualize_test_images(test_paths_malignant, 20, EffNetB7_load)

## Inception V3

In [None]:
inc_v3_model_url = "https://tfhub.dev/google/imagenet/inception_v3/classification/5"
IncV3 = transfer_learning(inc_v3_model_url)
IncV3_history = fit_model(IncV3, "IncV3")

In [None]:
train_accuracy = IncV3_history["accuracy"]
train_loss = IncV3_history["loss"]
train_precision = IncV3_history["precision_2"]
train_recall = IncV3_history["recall_2"]
train_tp = IncV3_history["true_positives_2"]
train_fp = IncV3_history["false_positives_2"]
train_tn = IncV3_history["true_negatives_2"]
train_fn = IncV3_history["false_negatives_2"]

validation_accuracy = IncV3_history["val_accuracy"]
validation_loss = IncV3_history["val_loss"]
validation_precision = IncV3_history["val_precision_2"]
validation_recall = IncV3_history["val_recall_2"]
validation_tp = IncV3_history["val_true_positives_2"]
validation_fp = IncV3_history["val_false_positives_2"]
validation_tn = IncV3_history["val_true_negatives_2"]
validation_fn = IncV3_history["val_false_negatives_2"]

In [None]:
train_acc, = plt.plot(train_accuracy, c = "green")
validate_acc, = plt.plot(validation_accuracy, c = "red")
plt.title("Train Accuracy vs. Validation Accuracy")
plt.legend([train_acc, validate_acc],["Training", "Validation"])
plt.show()

In [None]:
train_los, = plt.plot(train_loss, c = "green")
validate_los, = plt.plot(validation_loss, c = "red")
plt.title("Train Loss vs. Validation Loss")
plt.legend([train_los, validate_los],["Training", "Validation"])
plt.show()

In [None]:
IncV3_load = tf.keras.models.load_model("/content/drive/MyDrive/bioinformatics/IncV3.h5", custom_objects = {"KerasLayer":hub.KerasLayer})
IncV3_load.summary()

In [None]:
IncV3_test_evaluation = IncV3_load.evaluate(x = test_data)

In [None]:
LOSS = 0
ACCURACY = 1
PRECISION = 2
RECALL =  3
TP = 4
FP = 5
TN = 6
FN = 7

test_loss = IncV3_test_evaluation[LOSS]
test_accuracy = IncV3_test_evaluation[ACCURACY]
test_precision = IncV3_test_evaluation[PRECISION]
test_recall = IncV3_test_evaluation[RECALL]
test_tp = IncV3_test_evaluation[TP]
test_fp = IncV3_test_evaluation[FP]
test_tn = IncV3_test_evaluation[TN]
test_fn = IncV3_test_evaluation[FN]

In [None]:
%%capture cap
best_train_accuracy = max(train_accuracy)
best_train_loss = min(train_loss)

best_val_accuracy = max(validation_accuracy)
best_val_loss = min(validation_loss)

good_train_index = train_accuracy.index(best_train_accuracy)
good_train_loss_index = train_loss.index(best_train_loss)
good_train_epoch = good_train_index + 1
good_train_loss_epoch = good_train_loss_index + 1

good_val_index = validation_accuracy.index(best_val_accuracy)
good_val_loss_index = validation_loss.index(best_val_loss)
good_val_epoch = good_val_index + 1
good_val_loss_epoch = good_val_loss_index + 1


print("\n============================ REPORT ==================================")
print("\t\t\tTrain")
print("\t\tAccuracy\tLoss")
for i, accuracy_train in enumerate(train_accuracy):
  print(f"Epoch {i + 1}: {accuracy_train}\t{train_loss[i]}")



print(f"\nThe highest training accuracy value is {round(best_train_accuracy * 100, 2)}% at Epoch {good_train_epoch}")
print(f"The lowest training loss value is {round(best_train_loss, 2)} at Epoch {good_train_loss_epoch}")


print("\n\t\t\tValidate")

for j, accuracy_validate in enumerate(validation_accuracy):
  print(f"Epoch {j + 1}: {accuracy_validate}\t{validation_loss[j]}")


print(f"\nThe highest validation accuracy value is {round(best_val_accuracy * 100, 2)}% at Epoch {good_val_epoch}")
print(f"The lowest validation loss value is {round(best_val_loss, 2)} at Epoch {good_val_loss_epoch}")

print("------------------------------------------------------------------------")
print("\nTrain Accuracy: ", train_accuracy)
print("\nTrain Loss: ", train_loss)
print("\nTrain Precision: ", train_precision)
print("\nTrain Recall: ", train_recall)
print("\nTrain True Positives: ", train_tp)
print("\nTrain False Positives: ", train_fp)
print("\nTrain True Negatives: ", train_tn)
print("\nTrain False Negatives: ", train_fn)

print("\n")
print("\nValidation Accuracy: ", validation_accuracy)
print("\nValidation Loss: ", validation_loss)
print("\nValidation Precision: ", validation_precision)
print("\nValidation Recall: ", validation_recall)
print("\nValidation True Positives: ", validation_tp)
print("\nValidation False Positives: ", validation_fp)
print("\nValidation True Negatives: ", validation_tn)
print("\nValidation False Negatives: ", validation_fn)

print("\n")
print("\t\t\tTest")
print("------------------------------------------------------------------------")
print("\nTest Accuracy: ", test_accuracy)
print("\nTest Loss: ", test_loss)
print("\nTest Precision: ", test_precision)
print("\nTest Recall: ", test_recall)
print("\nTest True Positives: ", test_tp)
print("\nTest False Positives: ", test_fp)
print("\nTest True Negatives: ", test_tn)
print("\nTest False Negatives: ", test_fn)

In [None]:
f = open("trial8.txt", "w") 
print(cap, file=f)
files.download("/content/trial8.txt")
f.close()

### Making Random Predictions

In [None]:
# Uncomment to obtain CSV files
# Visualizing on benign tumors
test_directory_benign = "/content/drive/MyDrive/bioinformatics/data/test/benign"
test_directory_malignant = "/content/drive/MyDrive/bioinformatics/data/test/malignant"


# Create a data frame of benign tumors and show what the model predicts
test_labels_benign = create_dataframe(test_directory_benign, 0)
test_labels_malignant = create_dataframe(0, test_directory_malignant)


# Uncomment to save as CSV file and to download
# test_labels_benign.to_csv('test_labels_benign.csv')
# files.download('test_labels_benign.csv')

# test_labels_malignant.to_csv('test_labels_malignant.csv')
# files.download('test_labels_malignant.csv')

In [None]:
# np.random.seed(42)
from PIL import Image
def visualize_test_images(paths, num_images, model):
  plt.figure(figsize = (15, 15))

  for i in range(num_images):
    # Normalize image
    image = tf.keras.utils.load_img(paths[np.random.randint(0, num_images)])
    image = tf.keras.utils.img_to_array(image)
    image = image / 255

    image_array = np.array(image)
    image_array.resize((1, 460, 700, 3))


    prediction = model.predict(image_array)
    result = round(prediction[0][0])

    if result == BENIGN:
      ax = plt.subplot(5, 4, i + 1)
      plt.xlabel(str(i))
      plt.title("Benign")
      plt.imshow(image)
    else:
      ax = plt.subplot(5, 4, i + 1)
      plt.xlabel(str(i))
      plt.title("Malignant", color = "red")
      plt.imshow(image)

    plt.axis(True)
visualize_test_images(test_paths_malignant, 20, IncV3_load)