<a href="https://colab.research.google.com/github/chairiq/Pulmonary-embolism-in-ctscans-dl/blob/main/Transfer_Learning_Pipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# CT-Scan images clasification with Transfer Learning 

# 0) Connect to your cloud storage

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## 1) Import libraries



In [None]:
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras import regularizers
from tensorflow.keras.applications import InceptionV3,VGG16,ResNet50,MobileNetV2, NASNetMobile
from tensorflow.keras.applications import NASNetLarge, InceptionResNetV2, DenseNet121, EfficientNetB2, EfficientNetB2
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras import layers as lay

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.regularizers import l2
from tensorflow import keras
import numpy as np

import os
import numpy as np
import matplotlib.pyplot as plt
import pathlib

import shutil
import zipfile
import urllib
import urllib.request
import glob
from google.colab import files

K.clear_session()
print("Tensorflow Version: {}".format(tf.__version__))
print("Keras Version: {}".format(keras.__version__))

## 2) Get dataset
The dataset should cosist of two classes (two different subfolders, one for each class):
1.   First class: Images (from CT-Scans) with pulmonary embolism
2.   Second class: Images (from CT-Scans) without pulmonary embolism



In [None]:
G_DRIVE_DIR = 'my_path_to_the_dataset' # Path of the dataset inside the cloud storage
DATASET_DIR = 'dataset'
DATASET_NAME = 'ctscans'
dataset_sub_folder = ''
!cp -r {G_DRIVE_DIR} {DATASET_DIR}


In [None]:
#get list of classes
def listdir_nohidden(path):
  return [f for f in os.listdir(path) if not f.startswith('.')]

CLASSES_LIST = sorted(listdir_nohidden(DATASET_DIR))
print(CLASSES_LIST)

## 3) Preprocess the data

In [None]:
img_width, img_height = 224, 224
train_data_dir = DATASET_DIR 

batch_size = 8
seed =13

train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    rotation_range=10,
    width_shift_range=0.05,
    height_shift_range=0.05,
    shear_range=0.2,
    zoom_range=0.2,
    channel_shift_range=10,
    horizontal_flip=True,
    fill_mode='constant',
    validation_split = 0.2,
    )
train_generator = train_datagen.flow_from_directory(
    directory = train_data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    seed = seed,
    class_mode='categorical',
    subset = 'training')

validation_generator  = train_datagen.flow_from_directory(
    directory = train_data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    seed = seed,
    class_mode='categorical',
    subset = 'validation')

nb_train_samples = train_generator.n
nb_validation_samples = validation_generator.n
n_classes = train_generator.num_classes

## 4) Pretrained model selection (Transfer Learning approach)


In [None]:
use_the_model = 8
model_name = 'train'
epoch_num = 20

if use_the_model is 1:
    base_model = InceptionV3(weights='imagenet', include_top=False)
    model_name = 'InceptionV3'
    
elif use_the_model is 2: 
    base_model = VGG16(weights='imagenet', include_top=False)
    model_name = 'VGG16'
    
elif use_the_model is 3: 
    base_model = ResNet50(weights='imagenet', include_top=False)
    model_name = 'ResNet50'
    
elif use_the_model is 4: 
    base_model = InceptionResNetV2(weights='imagenet', include_top=False)
    model_name = 'InceptionResNetV2'

elif use_the_model is 5: 
    base_model = NASNetLarge(input_shape=(331,331,3), weights='imagenet', include_top=False)
    model_name = 'NASNetLarge'

elif use_the_model is 6: 
    base_model = MobileNetV2(weights='imagenet', include_top=False)
    model_name = 'MobileNetV2'
    
elif use_the_model is 7: 
    base_model = DenseNet121(weights='imagenet', include_top=False)
    model_name = 'DenseNet121'

elif use_the_model is 8:
   base_model = EfficientNetB2(weights="imagenet", include_top=False)
   model_name = 'EfficientNetB2'

elif use_the_model is 9:
   base_model = EfficientNetB3(weights="imagenet", include_top=False)
   model_name = 'EfficientNet3'

print("({}) {} model loaded with {} epochs.".format(model_name,use_the_model, epoch_num))

##### 4.1) Add new top layers to the selected model

In [None]:
x = base_model.output
x = lay.GlobalAveragePooling2D()(x)
x = lay.Dense(512,activation='relu')(x)
x = lay.Dropout(0.2)(x)

predictions = lay.Dense(n_classes,
                    kernel_regularizer=regularizers.l2(0.005), 
                    activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)
model.summary()

## 5) Compile the model
#### Compile the model with SGD optimazer, and use top 1 accuracy metrics. Initialize one callback for the training logs

In [None]:
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

RESULTS_DIR = 'results'
os.makedirs(RESULTS_DIR, exist_ok=True)

csv_filename = os.path.join(RESULTS_DIR,model_name+'_training_log.csv')
csv_logger = tf.keras.callbacks.CSVLogger(csv_filename, separator=',', append=True)

## 6)Train model

In [None]:
hist = model.fit(train_generator,
                 steps_per_epoch = nb_train_samples // batch_size,
                 validation_data = validation_generator,
                 validation_steps = nb_validation_samples // batch_size,
                 epochs = epoch_num,
                 verbose = 1,
                 callbacks = [csv_logger]
                 )

## 7) Check training results 

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15,6))

ax1.set_title('Accuracy')
ax1.plot(hist.history['accuracy'])
ax1.plot(hist.history['val_accuracy'])
ax1.set(xlabel='epoch', ylabel='accuracy')
ax1.legend(['train', 'val'], loc='upper left')

ax2.set_title('Loss')
ax2.plot(hist.history['loss'])
ax2.plot(hist.history['val_loss'])
ax2.set(xlabel='epoch', ylabel='loss')
ax2.legend(['train', 'val'], loc='upper left')


## 8) Save model

In [None]:
model.save('my_model_'+model_name) 

## 9) Inference

In [None]:
def prepare_img(img):
  image = tf.keras.preprocessing.image.load_img(img)
  input_arr = keras.preprocessing.image.img_to_array(image)
  input_arr /= 255
  input_arr = np.array([input_arr]) 
  plt.imshow(image)
  plt.show()
  return input_arr

#Load image form computer
#uploaded = files.upload()
#img_infer = list(uploaded)[0]

#Load image from the VM or the Google Drive
img_infer = 'path_to_a_sample.jpg'
print('Running inference on: ' + img_infer)

#Predict image
predictions = model.predict(prepare_img(img_infer))

#Show predictions for all classes
for the_class, pred in sorted(zip(CLASSES_LIST,predictions[0])):
  print('{}: {:.4f}'.format(the_class, pred))