In [1]:
import cv2
import glob
import matplotlib.pyplot as plt
import imutils
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.decomposition import PCA, KernelPCA
from sklearn.ensemble import RandomForestClassifier

In [2]:
data_no = 'data-covid/no/'
ext = ['png', 'jpg', 'gif']    # Add image formats here

files_no = []
[files_no.extend(glob.glob(data_no + '*.' + e)) for e in ext]

images_no = [cv2.imread(file) for file in files_no]

In [37]:
data_yes = 'data-covid/yes/'
ext = ['png', 'jpg', 'gif']    # Add image formats here

files_yes = []
[files_yes.extend(glob.glob(data_yes + '*.' + e)) for e in ext]

images_yes = [cv2.imread(file) for file in files_yes]

In [39]:
print(len(images_no)+ len(images_yes))


8440


In [40]:
def crop_image(image):
    
    # Convert the image to grayscale, and blur it slightly
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.GaussianBlur(gray, (5, 5), 0)
    
    thresh = cv2.threshold(gray, 45, 255, cv2.THRESH_BINARY)[1]
    thresh = cv2.erode(thresh, None, iterations=2)
    thresh = cv2.dilate(thresh, None, iterations=2)

    # Find contours in thresholded image, then grab the largest one
    cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)
    c = max(cnts, key=cv2.contourArea)
    # extreme points
    extLeft = tuple(c[c[:, :, 0].argmin()][0])
    extRight = tuple(c[c[:, :, 0].argmax()][0])
    extTop = tuple(c[c[:, :, 1].argmin()][0])
    extBot = tuple(c[c[:, :, 1].argmax()][0])
    
    # crop new image out of the original image using the four extreme points (left, right, top, bottom)
    new_image = image[extTop[1]:extBot[1], extLeft[0]:extRight[0]]            
    
    return new_image

In [41]:
covid_imgs_croped_yes = []
covid_imgs_croped_no = []


for image in images_yes:
    x = crop_image(image)
    x_resize = cv2.resize(x, (128, 128))
    gray = cv2.cvtColor(x_resize, cv2.COLOR_BGR2GRAY)
    covid_imgs_croped_yes.append(gray)


for image in images_no:
    x = crop_image(image)
    x_resize = cv2.resize(x, (128, 128))
    gray = cv2.cvtColor(x_resize, cv2.COLOR_BGR2GRAY)
    covid_imgs_croped_no.append(gray)

In [42]:
y_yes = np.ones(len(covid_imgs_croped_yes), dtype="int8")
y_no = np.zeros(len(covid_imgs_croped_no), dtype="int8")

In [43]:
covid_imgs_croped_yes

[array([[ 53,   0,   0, ...,   0,   1,   4],
        [  1,   0,   0, ...,   0,   1,   4],
        [  0,   0,   0, ...,   0,   0,  15],
        ...,
        [ 42,  69,  98, ...,  14,   1,   0],
        [ 54,  76, 105, ...,   9,   1,   1],
        [ 73,  85, 112, ...,   4,   1,   7]], dtype=uint8),
 array([[132, 128, 128, ...,  39,  35,  28],
        [130, 124, 122, ...,  53,  53,  47],
        [140, 131, 125, ...,  53,  50,  49],
        ...,
        [  1,   2,   2, ..., 159, 156, 154],
        [  1,   4,   2, ..., 162, 158, 157],
        [  2,   4,   2, ..., 165, 162, 161]], dtype=uint8),
 array([[ 19,   0,   0, ..., 187, 193, 198],
        [ 13,   0,   0, ..., 186, 192, 197],
        [ 81,  83,  64, ..., 189, 195, 200],
        ...,
        [191, 107, 101, ...,   0,   0,   2],
        [188, 105,  96, ...,   0,   0,   2],
        [195, 109, 102, ...,   0,   0,   2]], dtype=uint8),
 array([[72, 72, 72, ...,  2,  3,  3],
        [ 0,  0,  0, ...,  2,  1,  2],
        [83, 86, 68, ...,  1

In [44]:
X = np.concatenate((covid_imgs_croped_yes, covid_imgs_croped_no), axis=0)
y = np.concatenate((y_yes, y_no), axis=0)
d1, d2, d3 = X.shape

In [45]:
X = X.reshape((d1, d2 * d3))


In [46]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)


## scale data before train model


In [47]:
scaler_ = StandardScaler()
X_train_sc = scaler_.fit_transform(X_train)
X_test_sc = scaler_.transform(X_test)

## random forest without pca


In [48]:
rf_model = RandomForestClassifier()
rf_model.fit(X_train_sc, y_train)
y_predict_rf = rf_model.predict(X_test_sc)

## SVC without pca


In [49]:
svc_model = SVC(kernel="linear")
svc_model.fit(X_train_sc, y_train)
y_predict_svc = svc_model.predict(X_test_sc)


## PCA

In [50]:
data_pca = PCA(n_components=12)
pca_components = data_pca.fit(X_train_sc)
X_train_pca = pca_components.fit_transform(X_train_sc)
X_test_pca = pca_components.transform(X_test_sc)

## KernelPCA


In [51]:
data_kpca = KernelPCA()
kpca_components = data_kpca.fit(X_train_sc)
X_train_kpca = kpca_components.fit_transform(X_train_sc)
X_test_kpca = kpca_components.transform(X_test_sc)


## RandomForest With PCA


In [52]:
rf_model_pca = RandomForestClassifier()
rf_model_pca.fit(X_train_pca, y_train)
y_predict_rf_pca = rf_model_pca.predict(X_test_pca)

## SVC With PCA


In [53]:
svc_model_pca = SVC(kernel="linear")
svc_model.fit(X_train_pca, y_train)
y_predict_pca = svc_model.predict(X_test_pca)

## RandomForest With KernelPCA


In [54]:
rf_model_kpca = RandomForestClassifier()
rf_model_kpca.fit(X_train_kpca, y_train)
y_predict_rf_kpca = rf_model_kpca.predict(X_test_kpca)

## SVC With KernelPCA


In [55]:
svc_model_kpca = SVC(kernel="linear")
svc_model.fit(X_train_kpca, y_train)
y_predict_kpca = svc_model.predict(X_test_kpca)

In [56]:
print("without PCA: ", accuracy_score(y_test, y_predict_svc))
print("with PCA: ", accuracy_score(y_test, y_predict_pca))

without PCA:  0.8838862559241706
with PCA:  0.8755924170616114


In [57]:
print("without PCA:")
print(classification_report(y_test, y_predict_svc))

without PCA:
              precision    recall  f1-score   support

           0       0.93      0.93      0.93      1429
           1       0.62      0.63      0.63       259

    accuracy                           0.88      1688
   macro avg       0.78      0.78      0.78      1688
weighted avg       0.89      0.88      0.88      1688



In [58]:
print("with PCA:")
print(classification_report(y_test, y_predict_pca))

with PCA:
              precision    recall  f1-score   support

           0       0.88      0.99      0.93      1429
           1       0.84      0.24      0.37       259

    accuracy                           0.88      1688
   macro avg       0.86      0.61      0.65      1688
weighted avg       0.87      0.88      0.84      1688



In [59]:
print("RandomForest without PCA:")
print(classification_report(y_test, y_predict_rf))

RandomForest without PCA:
              precision    recall  f1-score   support

           0       0.94      0.99      0.97      1429
           1       0.95      0.64      0.77       259

    accuracy                           0.94      1688
   macro avg       0.94      0.82      0.87      1688
weighted avg       0.94      0.94      0.94      1688



In [60]:
print("RandomForest with PCA:")
print(classification_report(y_test, y_predict_rf_pca))

RandomForest with PCA:
              precision    recall  f1-score   support

           0       0.92      0.99      0.95      1429
           1       0.89      0.51      0.65       259

    accuracy                           0.91      1688
   macro avg       0.90      0.75      0.80      1688
weighted avg       0.91      0.91      0.90      1688



In [61]:
print("RandomForest without KPCA:")
print(classification_report(y_test, y_predict_rf_kpca))

RandomForest without KPCA:
              precision    recall  f1-score   support

           0       0.77      0.10      0.18      1429
           1       0.14      0.83      0.24       259

    accuracy                           0.21      1688
   macro avg       0.45      0.47      0.21      1688
weighted avg       0.67      0.21      0.19      1688



In [62]:
print("SVC with KPCA:")
print(classification_report(y_test, y_predict_kpca))

SVC with KPCA:
              precision    recall  f1-score   support

           0       0.93      0.93      0.93      1429
           1       0.62      0.63      0.63       259

    accuracy                           0.88      1688
   macro avg       0.78      0.78      0.78      1688
weighted avg       0.89      0.88      0.88      1688

