In [1]:
from google.colab import drive
import os
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from tensorflow.python.client import device_lib
from tensorflow import keras
from tensorflow.keras.models import Model

import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Flatten, Dropout
from keras import optimizers
from keras.applications import VGG16

from sklearn.metrics import confusion_matrix, classification_report, roc_curve, roc_auc_score

drive.mount("/content/drive")
os.chdir("drive/MyDrive/Colab Notebooks/ML Project")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
train_dir = "chest_xray/train/"
test_dir = "chest_xray/test/"

In [3]:
train_data = ImageDataGenerator(rescale = 1./255, rotation_range = 40, shear_range = 0.2, zoom_range = 0.2, width_shift_range = 0.2, height_shift_range = 0.2, horizontal_flip = True, fill_mode = 'nearest')
test_data = ImageDataGenerator(rescale = 1./255)

In [4]:
train_generator = train_data.flow_from_directory(train_dir, target_size = (150, 150), batch_size = 32, class_mode = 'binary', shuffle = False)
test_generator = test_data.flow_from_directory(test_dir, target_size = (150, 150), batch_size = 32, class_mode = 'binary', shuffle = False)

Found 5235 images belonging to 2 classes.
Found 624 images belonging to 2 classes.


In [5]:
cnn_base = VGG16(include_top = False, weights = 'imagenet', input_shape = (150, 150, 3))
cnn_base.trainable = False

In [6]:
model = Model(inputs = cnn_base.input, outputs = cnn_base.layers[-1].output)
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 150, 150, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 150, 150, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 150, 150, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 75, 75, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 75, 75, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 75, 75, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 37, 37, 128)       0     

In [7]:
features = model.predict(train_generator)

In [8]:
features = features.reshape((5235, 8192))
print(features.shape)

(5235, 8192)


In [9]:
labels = train_generator.classes.reshape((5235, 1))
features = np.hstack((features, labels))
print(features.shape)

(5235, 8193)


In [10]:
import pickle

f = open("Extracted Train Features.pkl", "wb")
pickle.dump(features, f)
f.close()

In [11]:
test_features = model.predict(test_generator)

In [12]:
test_features = test_features.reshape((624, 8192))
print(test_features.shape)

(624, 8192)


In [13]:
test_labels = test_generator.classes.reshape((624, 1))
test_features = np.hstack((test_features, test_labels))
print(test_features.shape)

(624, 8193)


In [14]:
f = open("Extracted Test Features.pkl", "wb")
pickle.dump(test_features, f)
f.close()