# LOAD IMAGE AND CHECK IF SUBFOLDERS ARE BALANCED

In [1]:
import os
import matplotlib.pyplot as plt
from PIL import Image
import cv2
import numpy as np
from skimage import filters, morphology
from scipy import ndimage
import shutil

# Define the path to the image folder
path = 'C:/Users/user/Desktop/covid/covid_set'

# Get a list of subfolders in the image folder
subfolders = os.listdir(path)

# Loop through each subfolder and print its name and the number of images it contains
for subfolder in subfolders:
    subdir_path = os.path.join(path, subfolder)
    num_images = len(os.listdir(subdir_path))
    print(f"{subfolder}: {num_images} images")

covid: 536 images
normal: 668 images
virus: 619 images


# CLEAN DATASET 

In [2]:
import cv2
import numpy as np
from skimage import filters, morphology
from scipy import ndimage

# Define the path to the image folder
path = 'C:/Users/user/Desktop/covid/covid_set'

# Load and preprocess each image
for filename in os.listdir(path):
    if filename.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.gif')):
        filepath = os.path.join(path, filename)
        img = cv2.imread(filepath)

        # Remove artifacts and noise
        img = cv2.medianBlur(img, 3)
        img = cv2.GaussianBlur(img, (3, 3), 0)
        edges = filters.sobel(img[:,:,0])
        edges = morphology.binary_dilation(edges)
        edges = ndimage.binary_fill_holes(edges)
        img = np.where(edges[..., np.newaxis], img, 255)

        # Normalize the image
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img = cv2.equalizeHist(img)

        # Crop and resize the image
        img = img[100:400, 100:400]
        img = cv2.resize(img, (128, 128))

        # Save the cleaned image
        cv2.imwrite(filepath, img)


# BALANCE DATASET

In [3]:

# Get a list of all the image folders in the parent folder
folders = [os.path.join(path, folder) for folder in os.listdir(path) if os.path.isdir(os.path.join(path, folder))]

# Find the smallest number of images in a folder
min_num_images = min([len(os.listdir(folder)) for folder in folders])

# Copy an equal number of images from each folder to a new folder
new_path = 'C:/Users/user/Desktop/covid/covid_set_balanced'
for folder in folders:
    images = os.listdir(folder)[:min_num_images]
    for image in images:
        source_path = os.path.join(folder, image)
        destination_path = os.path.join(new_path, os.path.basename(folder), image)
        os.makedirs(os.path.dirname(destination_path), exist_ok=True)
        shutil.copyfile(source_path, destination_path)

# Check the number of images in each folder
for folder in os.listdir(new_path):
    print(folder, len(os.listdir(os.path.join(new_path, folder))))


covid 536
normal 536
virus 536


# EXTRACT FEATURES USING HOG

In [4]:
import cv2
import os
import numpy as np
from skimage.feature import hog

# Define the path to the preprocessed data folder
preprocessed_path = 'C:/Users/user/Desktop/covid/covid_set_balanced'

# Define the HOG parameters
orientations = 9
pixels_per_cell = (8, 8)
cells_per_block = (3, 3)
block_norm = 'L2-Hys'

# Initialize empty feature and label lists
features = []
labels = []

# Loop through each subdirectory and extract HOG features for each image
for label in ['covid', 'normal', 'virus']:
    path = os.path.join(preprocessed_path, label)
    for image_file in os.listdir(path):
        # Load the image
        img = cv2.imread(os.path.join(path, image_file))

        # Reduce the size of the image
        img = cv2.resize(img, (128, 128))

        # Extract HOG features
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        hog_features = hog(gray, orientations=orientations, pixels_per_cell=pixels_per_cell, 
                           cells_per_block=cells_per_block, block_norm=block_norm)

        # Append the features and label to the lists
        features.append(hog_features)
        labels.append(label)

# Convert the feature and label lists to numpy arrays
features = np.array(features)
labels = np.array(labels)

# Save the extracted features
np.save('features.npy', features)

# Print the shapes of the feature and label arrays
print(features.shape)
print(labels.shape)


(1608, 15876)
(1608,)


In [None]:
FCNN

In [5]:
import cv2
import os
import numpy as np
from skimage.feature import hog
from sklearn.neural_network import MLPClassifier
from sklearn import metrics
from sklearn.model_selection import StratifiedKFold

# Define the path to the preprocessed data folder
preprocessed_path = 'C:/Users/user/Desktop/covid/covid_set_balanced'

# Define the HOG parameters
orientations = 9
pixels_per_cell = (8, 8)
cells_per_block = (3, 3)
block_norm = 'L2-Hys'

# Initialize empty feature and label lists
features = []
labels = []

# Loop through each subdirectory and extract HOG features for each image
for label in ['covid', 'normal', 'virus']:
    path = os.path.join(preprocessed_path, label)
    for image_file in os.listdir(path):
        # Load the image
        img = cv2.imread(os.path.join(path, image_file))

        # Reduce the size of the image
        img = cv2.resize(img, (128, 128))

        # Extract HOG features
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        hog_features = hog(gray, orientations=orientations, pixels_per_cell=pixels_per_cell, cells_per_block=cells_per_block, block_norm=block_norm)

        # Append the features and label to the lists
        features.append(hog_features)
        labels.append(label)

# Convert features and labels to numpy arrays
features = np.array(features)
labels = np.array(labels)

# Get the confusion matrix
cm = confusion_matrix(y, y_pred) 
labels=['covid', 'normal', 'virus']

# Calculate the true positives, false positives, true negatives, and false negatives
TP = np.diag(cm)
FP = cm.sum(axis=0) - TP
FN = cm.sum(axis=1) - TP
TN = cm.sum() - (TP + FP + FN)

# Calculate the specificity and sensitivity
specificity = TN / (TN + FP)
sensitivity = TP / (TP + FN)

print("Specificity:", specificity)
print("Sensitivity:", sensitivity)

Fold 1: Accuracy = 0.9503105590062112
Fold 2: Accuracy = 0.9503105590062112
Fold 3: Accuracy = 0.937888198757764
Fold 4: Accuracy = 0.968944099378882
Fold 5: Accuracy = 0.9440993788819876
Fold 6: Accuracy = 0.9627329192546584
Fold 7: Accuracy = 0.9503105590062112
Fold 8: Accuracy = 0.9440993788819876
Fold 9: Accuracy = 0.975
Fold 10: Accuracy = 0.95

Average accuracy = 0.9533695652173912


In [6]:
# Evaluate the classifier using the entire dataset and print the accuracy, f1, and cohen k scores
y_pred = clf.predict(features)
accuracy = metrics.accuracy_score(labels, y_pred)
f1 = metrics.f1_score(labels, y_pred, average='weighted')
cohen_k = metrics.cohen_kappa_score(labels, y_pred)
print(f"\nAccuracy = {accuracy}")
print(f"F1 score = {f1}")
print(f"Cohen's Kappa score = {cohen_k}")

# Calculate and print the confusion matrix for the classifier
confusion_matrix = metrics.confusion_matrix(labels, y_pred)
print("\nConfusion Matrix:")
print(confusion_matrix)



Accuracy = 0.9950248756218906
F1 score = 0.9950236785024305
Cohen's Kappa score = 0.9925373134328358

Confusion Matrix:
[[536   0   0]
 [  2 533   1]
 [  0   5 531]]


In [9]:
from sklearn.metrics import confusion_matrix

# Get the confusion matrix
confusion_matrix = metrics.confusion_matrix(labels, y_pred)

# Calculate the true positives, false positives, true negatives, and false negatives
TP = np.diag(confusion_matrix)
FP = confusion_matrix.sum(axis=0) - TP
FN = confusion_matrix.sum(axis=1) - TP
TN = confusion_matrix.sum() - (TP + FP + FN)

# Calculate the specificity and sensitivity
specificity = TN / (TN + FP)
sensitivity = TP / (TP + FN)

print("Specificity:", specificity)
print("Sensitivity:", sensitivity)

Specificity: [0.99813433 0.99533582 0.99906716]
Sensitivity: [1.         0.99440299 0.99067164]


In [None]:
CNN

In [13]:
import cv2
import os
import numpy as np
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from keras.utils import to_categorical
from sklearn import metrics
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import KFold

# Define the path to the preprocessed data folder
preprocessed_path = 'C:/Users/user/Desktop/covid/covid_set_balanced'

# Define the input shape of the images
input_shape = (128, 128, 3)

# Initialize empty feature and label lists
features = []
labels = []

# Loop through each subdirectory and extract the images and labels
for label in ['covid', 'normal', 'virus']:
    path = os.path.join(preprocessed_path, label)
    for image_file in os.listdir(path):
        # Load the image
        img = cv2.imread(os.path.join(path, image_file))

        # Reduce the size of the image
        img = cv2.resize(img, (input_shape[0], input_shape[1]))

        # Append the image and label to the lists
        features.append(img)
        labels.append(label)

# Convert features and labels to numpy arrays
features = np.array(features)
labels = np.array(labels)

# Encode the labels as integers
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Convert labels to categorical
labels = to_categorical(labels)

# Initialize the CNN classifier
model = Sequential()

# Add the first convolutional layer
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))

# Add the first pooling layer
model.add(MaxPooling2D(pool_size=(2, 2)))

# Add the second convolutional layer
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))

# Add the second pooling layer
model.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten the output of the second pooling layer
model.add(Flatten())

# Add the first dense layer
model.add(Dense(128, activation='relu'))

# Add the output layer
model.add(Dense(3, activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Initialize the StratifiedKFold cross validator
skf = KFold(n_splits=10, shuffle=True, random_state=42)

# Initialize empty lists to store the accuracy score for each fold
accuracy_scores = []

# Loop through each fold and train the classifier
for fold_idx, (train_idx, test_idx) in enumerate(skf.split(features, labels)):
    # Split the data into training and testing sets
    X_train, X_test = features[train_idx], features[test_idx]
    y_train, y_test = labels[train_idx], labels[test_idx]

    # Train the classifier
    model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0)

    # Evaluate the classifier on the testing set and calculate the accuracy score
    _, accuracy = model.evaluate(X_test, y_test, verbose=0)
    accuracy_scores.append(accuracy)
    
    


    # Print the accuracy score for the current fold
    print(f"Fold {fold_idx+1}: Accuracy = {accuracy}")

# Print the average accuracy score across all folds
print(f"\nAverage accuracy = {np.mean(accuracy_scores)}")

Fold 1: Accuracy = 0.9254658222198486
Fold 2: Accuracy = 0.8322981595993042
Fold 3: Accuracy = 0.9689440727233887
Fold 4: Accuracy = 0.9875776171684265
Fold 5: Accuracy = 0.9627329111099243
Fold 6: Accuracy = 0.9689440727233887
Fold 7: Accuracy = 0.9937888383865356
Fold 8: Accuracy = 0.9813664555549622
Fold 9: Accuracy = 0.987500011920929
Fold 10: Accuracy = 1.0

Average accuracy = 0.9608617961406708


In [16]:
# Evaluate the classifier on the entire dataset
y_pred_all = np.argmax(model.predict(features), axis=1)
y_true_all = np.argmax(labels, axis=1)
accuracy = metrics.accuracy_score(y_true_all, y_pred_all)
f1_score = metrics.f1_score(y_true_all, y_pred_all, average='weighted')
cohen_k_score = metrics.cohen_kappa_score(y_true_all, y_pred_all)

# Print the accuracy, f1, and cohen k scores
print("Accuracy:", accuracy)
print("F1 Score:", f1_score)
print("Cohen Kappa Score:", cohen_k_score)

# Calculate and print the confusion matrix for the classifier
confusion_matrix = metrics.confusion_matrix(y_true_all, y_pred_all)
print("Confusion Matrix:")
print(confusion_matrix)


Accuracy: 0.9993781094527363
F1 Score: 0.9993781089115775
Cohen Kappa Score: 0.9990671641791045
Confusion Matrix:
[[535   1   0]
 [  0 536   0]
 [  0   0 536]]


In [18]:
from sklearn.metrics import confusion_matrix

# Get the confusion matrix
confusion_matrix = metrics.confusion_matrix(y_true_all, y_pred_all)

# Calculate the true positives, false positives, true negatives, and false negatives
TP = np.diag(confusion_matrix)
FP = confusion_matrix.sum(axis=0) - TP
FN = confusion_matrix.sum(axis=1) - TP
TN = confusion_matrix.sum() - (TP + FP + FN)

# Calculate the specificity and sensitivity
specificity = TN / (TN + FP)
sensitivity = TP / (TP + FN)

print("Specificity:", specificity)
print("Sensitivity:", sensitivity)

Specificity: [1.         0.99906716 1.        ]
Sensitivity: [0.99813433 1.         1.        ]
