# Mount Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive',force_remount = True)

Mounted at /content/drive


In [2]:
name_output = 'firstvgg' #input('enter the name of your output file ').strip()

# Import Modules

In [3]:
import tensorflow as tf

In [4]:
# import tensorflow as tf
# from tensorflow import keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Reshape, Activation,Flatten,BatchNormalization,Conv2D,MaxPool2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import confusion_matrix
import itertools
%matplotlib inline
from tensorflow.keras.models import Model
from tensorflow.keras.metrics import Recall, Precision,TruePositives,TrueNegatives, FalsePositives,FalseNegatives,Accuracy

In [5]:
import os
import shutil
import glob
import random
import warnings
warnings.simplefilter(action='ignore',category='FutureWarning')

In [None]:
# print("GPU Name:", tf.config.list_physical_devices('GPU')[0])

# Add GPU

In [None]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
print('Num GPUs Available: ', len(physical_devices))
if len(physical_devices) >=1:
    tf.config.experimental.set_memory_growth(physical_devices[0],True)

# Make your Edits

In [None]:
## change home directory
home = '/content/drive/MyDrive/Projects/CXR'

image_zip = 'chest_xray.zip'

model_name = 'vgg16'

this_model_name =  name_output+'.h5'

In [None]:
## move test folder from the unzipped folder to the data folder.
## create 3 more folders in the data folder - train, validate, withhold - 80% 10% 10%
## move data from the train directory to the 3 directories in data folder (tran, validate, withhold) use a random see value
## augment using the created function the data
## use a data generator with augmentation as well
##


# Pull Data and Setup

In [None]:
image_path =  'data'
os.mkdir(image_path)

shutil.copy(os.path.join(home,'data',image_zip),image_zip)

In [None]:
shutil.unpack_archive(image_zip,os.getcwd(),'zip')

## Create Path and arrange

In [None]:
os.mkdir(os.path.join(image_path,'train'))
# os.mkdir(os.path.join(image_path,'test'))
os.mkdir(os.path.join(image_path,'valid'))

In [None]:
# prompt: move test folder from home firectory to data

shutil.move(os.path.join(home,'data','test'),os.path.join(image_path,'test'))


In [None]:
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array

def augment_images(input_dir, output_dir, augmentations_per_image=5):
    # Set up the ImageDataGenerator with the desired augmentations
    datagen = ImageDataGenerator(
        rotation_range=10,        # Rotate images by up to 10 degrees both ways
        shear_range=0.1,          # Apply a shear of 0.1
        zoom_range=0.1,           # Zoom in or out by up to 10%
        horizontal_flip=True,     # Flip the images horizontally
        height_shift_range=0.05,   # Shift the height by up to 10%
        width_shift_range=0.05,    # Shift the width by up to 10%
        fill_mode='nearest'       # Fill any pixels lost during transformation
    )

    # Create the output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    # Loop through each file in the input directory
    for img_filename in os.listdir(input_dir):
        if img_filename.endswith(('.png', '.jpg', '.jpeg')):
            img_path = os.path.join(input_dir, img_filename)

            # Load the image
            img = load_img(img_path)
            x = img_to_array(img)  # Convert image to numpy array
            x = x.reshape((1,) + x.shape)  # Reshape array for data augmentation

            # Generate and save augmented images
            i = 0
            for batch in datagen.flow(x, batch_size=1, save_to_dir=output_dir,
                                      save_prefix='aug_' + os.path.splitext(img_filename)[0],
                                      save_format='jpeg'):
                i += 1
                if i >= augmentations_per_image:
                    break  # Stop after generating the specified number of augmentations

    print(f"Augmentation complete. Augmented images are saved in {output_dir}")

# Example usage:
# augment_images('path_to_input_directory', 'path_to_output_directory', augmentations_per_image=5)


# Funtions

### Plot Confusion matrix

In [None]:
def plot_confusion_matrix(cm,classes,
                          normalize = False,
                          title='Confusion matrix',
                          cmap = plt.cm.Blues):
  """
  this function prints and plots the confusion matrix
  normalization can be applied by setting normalize=True
  """
  plt.imshow(cm,interpolation='nearest',cmap=cmap)
  plt.title(title)
  plt.colorbar()
  tickmarks=  np.arange(len(classes))
  plt.xticks(tickmarks,classes,rotation=45)
  plt.yticks(tickmarks,classes)
  if normalize:
    cm = cm.astype('float')/cm.sum(axis=1) [:,np.newaxis]
    print('Normalized confusion matrix')
  else:
    print('confusion matrix without normalization')

  thresh = cm.max()/2
  for i,j in itertools.product(range(cm.shape[0]),range(cm.shape[1])):
    plt.text(i,j,cm[i,j],
             horizontalalignment='center',
             color = 'white' if cm[i,j] > thresh else 'black'
             )
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('predicted label')

# Instantiate the Base Model

In [None]:
Home_model = tf.keras.applications.vgg16.VGG16()

# Preprocess

In [None]:
def prepare_image(file):
  img = image.load_img(file, target_size = (224,224))
  img_array = image.img_to_array(img)
  # print(img_array.shape)
  img_array_expanded = np.expand_dims(img_array,axis=0)
  # print(img_array_expanded.shape)
  return tf.keras.applications.vgg16.preprocess_input(img_array_expanded)

## Pull data

In [None]:
train_path = os.path.join(image_path,'train')
valid_path = os.path.join(image_path,'valid')
test_path = os.path.join(image_path,'test')

In [None]:
## add augmentations similar to above one  here

train_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.vgg16.preprocess_input)\
              .flow_from_directory(directory= train_path,target_size=(224,224),batch_size=10)

valid_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.vgg16.preprocess_input)\
              .flow_from_directory(directory= valid_path,target_size=(224,224),batch_size=10)

test_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.vgg16.preprocess_input)\
              .flow_from_directory(directory= test_path,target_size=(224,224),batch_size=10, shuffle = False)


## Transfer Learning

In [None]:
Home_model.summary()

In [None]:
len(Home_model.layers)

In [None]:
x = Home_model.layers[-3].output
s2l = Reshape((-1,))(x)
s2l = Model(inputs= Home_model.input,outputs = s2l)
output = Dense(units=4,activation='softmax')(s2l.output)


In [None]:
model = Model(inputs= Home_model.input,outputs = output)

In [None]:
for layer in model.layers[:-5]:
  layer.trainable= False

In [None]:
model.summary()

In [None]:
# prompt: plot the model

tf.keras.utils.plot_model(model)


## Train The Model

In [None]:
model.compile(
    optimizer =Adam(learning_rate=0.0001),
    loss='binary_crossentropy',
    metrics=['accuracy',Recall(), Precision(),TruePositives(),TrueNegatives(), FalsePositives(),FalseNegatives()]
)

In [None]:
epochs=  20

In [None]:
History = model.fit(
    x = train_batches,
    validation_data = valid_batches,
    epochs = epochs,
    verbose=2
    )

In [None]:
history_df = pd.DataFrame(History.history)

In [None]:
history_df.to_csv(os.path.join(home,'output',name_output+'.csv'))

## Optionally Load Model

In [None]:
# # prompt: Load a saved model and use

# from tensorflow.keras.models import load_model
# # Load the saved model
# model = load_model(os.path.join(home,'models',this_model_name))

# # Now you can use the loaded_model for predictions or further training
# # predictions = loaded_model.predict(test_batches)


# Predict

In [None]:
test_labels= test_batches.classes

In [None]:
x_test =  np.concatenate([test_batches.next()[0] for i in range(test_batches.__len__())])

In [None]:
predictions = model.predict(  x = x_test,verbose=0)

In [None]:
dff = pd.DataFrame(predictions)

In [None]:
dff['test'] = test_labels
dff.head(10)

# Taking prediction proabilities to CSV

In [None]:
dff.to_csv(os.path.join(home,'output',name_output+'_prediction_probabilities.csv'))

## Confusion Matrix

In [None]:
y_pred = np.argmax(predictions,axis=-1)
y_true=test_labels

In [None]:
test_batches.class_indices
cm_plot_labels = [i for i in test_batches.class_indices.keys()]

In [None]:
plot_confusion_matrix(cm=cm,classes=cm_plot_labels,title = 'confusion matrix')

## Metrics

In [None]:
# prompt: given y_pred and y_true calculate importatnt metrics to 4 dps

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Calculate metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='macro')  # Use 'macro' for multiclass
recall = recall_score(y_true, y_pred, average='macro')
f1 = f1_score(y_true, y_pred, average='macro')

# Print metrics to 4 decimal places
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")


# Save Model

In [None]:
model.save(this_model_name)

In [None]:
model_path = os.path.join(home,'models')
# home = '/content/drive/MyDrive/project/Tensorflow And Keras' #/content/drive/MyDrive/project/Project2.1/Script2_pure classification

In [None]:
shutil.move(this_model_name,model_path)