<a href="https://colab.research.google.com/github/edsondamasceno/classification-COVID-19/blob/main/Simple_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Simple CNN Classification
Adapted from: How to build a simple CNN based Image classifier using Keras

Link: https://www.milindsoorya.com/blog/how-to-build-a-simple-cnn-based-image-classifier-using-keras

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

In [None]:
!pip install keras-tuner --upgrade

In [None]:
import pandas as pd
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
import warnings
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import keras_tuner as kt

from pathlib import Path
import os.path
from IPython.display import Image, display
import matplotlib.cm as cm
from sklearn.model_selection import train_test_split
import tensorflow as tf

In [None]:
# Defina o valor inicial para garantir a reprodutibilidade do experimento.
seed = 1842
tf.random.set_seed(seed)
np.random.seed(seed)
# Desative os avisos para um notebook com aparência mais limpa.
warnings.simplefilter('ignore')

In [None]:
image_dir = Path('/content/drive/MyDrive/COVID-19/COVID-QU-Ex')

# Obtenha os caminhos dos arquivos e os rótulos.
filepaths = list(image_dir.glob(r'**/*.png'))
labels = list(map(lambda x: os.path.split(os.path.split(x)[0])[1], filepaths))

filepaths = pd.Series(filepaths, name='Filepath').astype(str)
labels = pd.Series(labels, name='Label')

# Concatenar caminhos das imagens e rótulos
image_df = pd.concat([filepaths, labels], axis=1)

In [None]:
# Embaralhe o DataFrame e redefina o índice.
image_df = image_df.sample(frac=1).reset_index(drop = True)

# Mostrar o resultado
image_df.head(3)

In [None]:
# Exibir 20 imagens do conjunto de dados com seus respectivos rótulos.
fig, axes = plt.subplots(nrows=3, ncols=5, figsize=(15, 7),
                        subplot_kw={'xticks': [], 'yticks': []})

for i, ax in enumerate(axes.flat):
    ax.imshow(plt.imread(image_df.Filepath[i]))
    ax.set_title(image_df.Label[i])
plt.tight_layout()
plt.show()

In [None]:
# Dados separados para treino e teste
train_df, test_df = train_test_split(image_df, train_size=0.8, shuffle=True, random_state=1)

In [None]:
#define image dataset
# Data Augmentation
image_generator = ImageDataGenerator(
        rescale=1./255,
        validation_split=0.1)

In [None]:
#Train & Validation Split
train_dataset = image_generator.flow_from_dataframe(batch_size=100,
                                                 dataframe=train_df,
                                                 x_col='Filepath',
                                                 y_col='Label',
                                                 shuffle=True,
                                                 target_size=(224, 224),
                                                 subset="training",
                                                 class_mode='categorical')

validation_dataset = image_generator.flow_from_dataframe(batch_size=100,
                                                 dataframe=train_df,
                                                 x_col='Filepath',
                                                 y_col='Label',
                                                 shuffle=True,
                                                 target_size=(224, 224),
                                                 subset="validation",
                                                 class_mode='categorical')
#Organizar os dados para nossas previsõe
image_generator_submission = ImageDataGenerator(rescale=1./255)
test_images = image_generator_submission.flow_from_dataframe(
                                                 dataframe=test_df,
                                                 x_col='Filepath',
                                                 y_col='Label',
                                                 shuffle=False,
                                                 target_size=(224, 224),
                                                 batch_size=100,
                                                 class_mode='categorical')

In [None]:
# Model 0
model = keras.models.Sequential([
    keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape = [224, 224,3]),
    keras.layers.MaxPooling2D(),
    keras.layers.Conv2D(64, (3, 3), activation='relu'),
    keras.layers.MaxPooling2D(),
    keras.layers.Dropout(0.3),
    keras.layers.Flatten(),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(2, activation ='softmax')
])

In [None]:
# Model 1
model1 = keras.models.Sequential([
    keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape = [224, 224,3]),
    keras.layers.MaxPooling2D(),
    keras.layers.Conv2D(64, (3, 3), activation='relu'),
    keras.layers.MaxPooling2D(),
    keras.layers.Conv2D(64, (3, 3), activation='relu'),
    keras.layers.Flatten(),
    keras.layers.Dense(256, activation='relu'),
    keras.layers.Dense(2, activation ='softmax')
])

In [None]:
# Model 2
model2 = keras.models.Sequential([
    keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape = [224, 224,3]),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPooling2D(),
    keras.layers.Conv2D(64, (3, 3), activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPooling2D(),
    keras.layers.Conv2D(128, (3, 3), activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPooling2D(),
    keras.layers.Dropout(0.2),
    keras.layers.Flatten(),
    keras.layers.Dense(256, activation='relu'),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(2, activation ='softmax')
])

In [None]:
# Model 3
model3 = keras.models.Sequential([
    keras.layers.Conv2D(32, (5, 5), activation='relu', input_shape = [224, 224,3]),
    keras.layers.MaxPooling2D(),
    keras.layers.Conv2D(32, (5, 5), activation='relu'),
    keras.layers.MaxPooling2D(),
    keras.layers.Conv2D(64, (5, 5), activation='relu'),
    keras.layers.MaxPooling2D(),
    keras.layers.Conv2D(64, (5, 5), activation='relu'),
    keras.layers.MaxPooling2D(),
    keras.layers.Flatten(),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(100, activation='relu'),
    keras.layers.Dense(2, activation ='softmax')
])

In [None]:
model.summary()

In [None]:
model.compile(optimizer='adam',
             loss = 'binary_crossentropy',
             metrics=['accuracy'])


In [None]:
callback = keras.callbacks.EarlyStopping(monitor='val_loss',
                                            patience=5,
                                            restore_best_weights=True)

In [None]:
history = model.fit(train_dataset, epochs=100, validation_data=validation_dataset, callbacks=callback)

In [None]:
model.save('/content/drive/MyDrive/COVID-19/CNN/Model_03/cnn-model.h5')
model.save('cnn-model.h5')

In [None]:
import pandas as p
hist_df = pd.DataFrame(history.history)
hist_csv_file = '/content/drive/MyDrive/COVID-19/CNN/Model_03/history.csv'
with open(hist_csv_file, mode='w') as f:
    hist_df.to_csv(f)

In [None]:
results = model.evaluate(test_images, verbose=0)

print("Test Loss: {:.5f}".format(results[0]))
print("Test Accuracy: {:.2f}%".format(results[1] * 100))

In [None]:
# Predict the label of the test_images
pred = model.predict(test_images)
pred = np.argmax(pred,axis=1)

# Map the label
labels = (train_dataset.class_indices)
labels = dict((v,k) for k,v in labels.items())
pred = [labels[k] for k in pred]

In [None]:
from sklearn.metrics import classification_report
y_test = list(test_df.Label)
print(classification_report(y_test, pred))

In [None]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test,pred)

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns

cf_matrix = confusion_matrix(y_test, pred, normalize='true')
plt.figure(figsize = (10,6))
sns.heatmap(cf_matrix, annot=True, xticklabels = sorted(set(y_test)), yticklabels = sorted(set(y_test)))
plt.title('Normalized Confusion Matrix')
plt.show()

# Examples of prediction

In [None]:
# Exibir 15 imagens do conjunto de dados com seus respectivos rótulos.
fig, axes = plt.subplots(nrows=3, ncols=5, figsize=(15, 7),
                        subplot_kw={'xticks': [], 'yticks': []})

for i, ax in enumerate(axes.flat):
    ax.imshow(plt.imread(test_df.Filepath.iloc[i]))
    ax.set_title(f"True: {test_df.Label.iloc[i]}\nPredicted: {pred[i]}")
plt.tight_layout()
plt.show()

# Feature Extraction Methodology

In [None]:
import numpy as np
import glob
import pandas as pd
from keras.preprocessing import image
from keras.models import load_model, Model

In [None]:
model = load_model('/content/drive/MyDrive/COVID-19/CNN/Model_03/cnn-model.h5')

In [None]:
intermediate_layer_model = Model(inputs=model.input,
                                 outputs=model.get_layer('dense').output)
intermediate_layer_model.summary()

In [None]:
feauture_engg_data = intermediate_layer_model.predict(validation_dataset)
feauture_engg_data = pd.DataFrame(feauture_engg_data)
print('feauture_engg_data shape:', feauture_engg_data.shape)
feauture_engg_data.head(5)  #The features are unnamed now

In [None]:
def extract_feature_one_image(img_path,intermediate_layer_model,input_img):

    img = image.load_img(img_path, target_size=(input_img, input_img))
    img_data = image.img_to_array(img)
    img_data = np.expand_dims(img_data, axis=0)

    features = intermediate_layer_model.predict(img_data)
    features = features.reshape((-1))
    return features

In [None]:
def create_model():

    input_image = 224
    intermediate_layer_model = Model(inputs=model.input,
                                 outputs=model.get_layer('dense').output)

    return intermediate_layer_model, input_image

In [None]:
model_name = 'CNN'
database_name = 'COVID-CT'

path_database = '/content/drive/MyDrive/' + database_name + '/'
path_write = '/content/drive/MyDrive/COVID-19-2021/Arquitetura-4/' + database_name + '_' + model_name + '.csv'

intermediate_layer_model, input_img = create_model()
features = []

('Model created...')

classes = glob.glob(path_database + '*')
cont_classe = 0
features_labels_two_classes = pd.DataFrame()

for classe in classes:
    features = []
    images_names = glob.glob(classe + '/*.*')
    for name in images_names:
        features.append(extract_feature_one_image(name, intermediate_layer_model, input_img))

    features_labels = pd.DataFrame(data=features)
    features_labels['Labels'] = np.uint8(cont_classe)
    features_labels_two_classes = features_labels_two_classes.append(features_labels,
                                                                 ignore_index=True)
    cont_classe+=1
features_labels_two_classes.to_csv(path_write,index=False)

print('Finished!!!!')

In [None]:
features_labels_two_classes.shape