## Importing Required Packages

In [30]:
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

## Setting up data extractor

In [31]:
def get_files(path):
    file_list = []
    for path, directories, files in os.walk(path):
        file_list = files
        return file_list

In [32]:
img_raw = cv2.imread(f'COVID19_Lung_CT/CT_COVID/2019-novel-Coronavirus-severe-adult-respiratory-dist_2020_International-Jour-p3-89%0.png',0)
print(img_raw.shape)
a = np.array(img_raw).flatten()
print(a.shape)


(335, 580)
(194300,)


## Extracting COVID-19 data, normalizing & then adding to dataframe

In [33]:
c_path = "./COVID19_Lung_CT/CT_COVID"
c_files = get_files(c_path)
c_data = []


for img in c_files:
    img_raw = cv2.imread(f'{c_path}/{img}',0)
    try:
        img_norm = cv2.resize(img_raw,(20,20)) # This needs to be improved with effective feature extraction
        img_flat = np.array(img_norm).flatten()
        c_data.append([img_flat,1]) # Assigning label 1 for COVID data
    except AssertionError: # Using try except clause makes sure we don't have any images which would not resize properly
        pass
        
print(len(c_data))

349


In [5]:
#pd.set_option("max_rows", None)
df = pd.DataFrame(c_data, columns = ['img_data', 'label'])
df

Unnamed: 0,img_data,label
0,"[255, 255, 255, 255, 255, 255, 255, 255, 255, ...",1
1,"[7, 8, 9, 7, 8, 8, 8, 8, 9, 8, 9, 8, 7, 6, 8, ...",1
2,"[251, 245, 74, 87, 86, 77, 76, 63, 82, 67, 120...",1
3,"[24, 69, 63, 64, 67, 111, 101, 83, 95, 99, 101...",1
4,"[0, 0, 0, 0, 62, 35, 189, 195, 186, 33, 130, 1...",1
...,...,...
344,"[52, 51, 51, 53, 52, 61, 76, 104, 55, 66, 54, ...",1
345,"[0, 45, 41, 46, 45, 41, 43, 46, 43, 45, 42, 44...",1
346,"[77, 77, 80, 78, 74, 248, 225, 251, 254, 228, ...",1
347,"[242, 255, 255, 242, 237, 253, 255, 255, 255, ...",1


## Extracting Non COVID-19 data, normalizing & then adding to dataframe

In [6]:
nc_path = "./COVID19_Lung_CT/CT_NonCOVID"
nc_files = get_files(nc_path)
nc_data = []

for img in nc_files:
    img_raw = cv2.imread(f'{nc_path}/{img}',0)
    try:
        img_norm = cv2.resize(img_raw,(20,20))
        img_flat = np.array(img_norm).flatten()
        nc_data.append([img_flat,0])
    except AssertionError:
        pass
    
print(len(nc_data))

397


In [7]:
df2 = pd.DataFrame(nc_data, columns = ['img_data', 'label'])
df2

Unnamed: 0,img_data,label
0,"[70, 63, 67, 66, 195, 187, 190, 182, 182, 127,...",0
1,"[0, 0, 60, 71, 61, 63, 83, 60, 85, 94, 76, 247...",0
2,"[19, 19, 19, 19, 19, 19, 19, 20, 21, 18, 21, 1...",0
3,"[1, 0, 0, 1, 220, 213, 215, 213, 215, 214, 207...",0
4,"[0, 0, 45, 43, 45, 44, 47, 53, 45, 50, 51, 48,...",0
...,...,...
392,"[1, 1, 15, 44, 44, 44, 43, 44, 44, 45, 44, 44,...",0
393,"[89, 234, 225, 232, 247, 243, 236, 244, 249, 2...",0
394,"[210, 241, 235, 215, 240, 224, 165, 247, 231, ...",0
395,"[42, 42, 42, 42, 42, 42, 42, 43, 44, 68, 45, 5...",0


### Merging data frames

In [8]:
img_master = df.append(df2, ignore_index=True)
img_master

Unnamed: 0,img_data,label
0,"[255, 255, 255, 255, 255, 255, 255, 255, 255, ...",1
1,"[7, 8, 9, 7, 8, 8, 8, 8, 9, 8, 9, 8, 7, 6, 8, ...",1
2,"[251, 245, 74, 87, 86, 77, 76, 63, 82, 67, 120...",1
3,"[24, 69, 63, 64, 67, 111, 101, 83, 95, 99, 101...",1
4,"[0, 0, 0, 0, 62, 35, 189, 195, 186, 33, 130, 1...",1
...,...,...
741,"[1, 1, 15, 44, 44, 44, 43, 44, 44, 45, 44, 44,...",0
742,"[89, 234, 225, 232, 247, 243, 236, 244, 249, 2...",0
743,"[210, 241, 235, 215, 240, 224, 165, 247, 231, ...",0
744,"[42, 42, 42, 42, 42, 42, 42, 43, 44, 68, 45, 5...",0


In [9]:
img_final = img_master.sample(frac=1).reset_index(drop=True) #Shuffling dataframe and resetting index values
img_final

Unnamed: 0,img_data,label
0,"[84, 247, 233, 245, 255, 255, 248, 225, 231, 2...",1
1,"[232, 233, 212, 230, 231, 227, 207, 219, 209, ...",0
2,"[1, 1, 56, 54, 52, 50, 65, 57, 57, 54, 55, 51,...",0
3,"[1, 1, 60, 65, 58, 61, 61, 68, 72, 58, 82, 75,...",0
4,"[59, 51, 58, 232, 218, 255, 252, 244, 217, 225...",0
...,...,...
741,"[215, 55, 55, 52, 59, 54, 173, 169, 59, 57, 55...",1
742,"[6, 5, 5, 4, 4, 4, 3, 3, 4, 4, 3, 3, 3, 3, 3, ...",1
743,"[77, 77, 78, 173, 227, 232, 234, 232, 214, 172...",0
744,"[1, 1, 45, 44, 45, 44, 44, 42, 45, 43, 41, 44,...",0


In [10]:
writer = pd.ExcelWriter('total.xlsx')
# write dataframe to excel
.to_excel(writer)
# save the excel
writer.save()
print('DataFrame is written successfully to Excel File.')

SyntaxError: invalid syntax (<ipython-input-10-a2fbe2f7715a>, line 3)

In [11]:
img_final.columns


Index(['img_data', 'label'], dtype='object')

In [12]:
import numpy as np
import pandas as pd

def train_validate_test_split(df, train_percent=.7, validate_percent=.1, seed=None):
    np.random.seed(seed)
    perm = np.random.permutation(df.index)
    m = len(df.index)
    train_end = int(train_percent * m)
    validate_end = int(validate_percent * m) + train_end
    train = df.iloc[perm[:train_end]]
    validate = df.iloc[perm[train_end:validate_end]]
    test = df.iloc[perm[validate_end:]]
    return train, validate, test

In [13]:
train, validate, test = train_validate_test_split(img_final)

In [14]:
train

Unnamed: 0,img_data,label
238,"[1, 1, 28, 25, 43, 26, 25, 28, 27, 26, 40, 28,...",0
169,"[66, 58, 64, 62, 65, 119, 236, 230, 240, 230, ...",0
319,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 91, 138, ...",0
384,"[2, 62, 37, 119, 109, 112, 140, 134, 119, 112,...",1
591,"[67, 64, 66, 66, 77, 74, 75, 80, 72, 68, 67, 6...",0
...,...,...
254,"[0, 0, 79, 77, 86, 74, 132, 213, 216, 225, 174...",0
733,"[1, 1, 1, 44, 44, 45, 44, 44, 45, 45, 44, 48, ...",0
599,"[0, 0, 2, 231, 212, 214, 210, 219, 198, 78, 85...",0
674,"[1, 57, 53, 54, 119, 184, 208, 217, 217, 216, ...",0


In [15]:
writer = pd.ExcelWriter('train.xlsx')
# write dataframe to excel
train.to_excel(writer)
# save the excel
writer.save()
print('DataFrame is written successfully to Excel File.')

DataFrame is written successfully to Excel File.


In [16]:
test

Unnamed: 0,img_data,label
665,"[21, 21, 25, 220, 253, 221, 233, 214, 226, 37,...",1
261,"[28, 136, 203, 226, 31, 54, 37, 215, 196, 205,...",1
520,"[142, 174, 171, 170, 176, 177, 174, 177, 182, ...",0
536,"[1, 1, 2, 89, 101, 84, 246, 254, 254, 250, 250...",1
341,"[51, 51, 50, 53, 51, 49, 99, 55, 51, 55, 51, 5...",1
...,...,...
611,"[68, 81, 164, 223, 253, 228, 242, 236, 216, 20...",0
404,"[7, 8, 9, 7, 8, 8, 8, 8, 9, 8, 9, 8, 7, 6, 8, ...",1
692,"[1, 1, 1, 11, 26, 30, 25, 28, 28, 28, 29, 30, ...",0
450,"[50, 50, 46, 45, 47, 46, 48, 44, 50, 45, 47, 4...",1


In [17]:
writer = pd.ExcelWriter('test.xlsx')
# write dataframe to excel
test.to_excel(writer)
# save the excel
writer.save()
print('DataFrame is written successfully to Excel File.')

DataFrame is written successfully to Excel File.


In [18]:
validate

Unnamed: 0,img_data,label
214,"[62, 64, 64, 63, 63, 83, 95, 166, 131, 60, 65,...",1
275,"[63, 243, 217, 209, 215, 214, 232, 247, 246, 2...",1
628,"[215, 211, 208, 215, 235, 243, 252, 250, 248, ...",1
741,"[215, 55, 55, 52, 59, 54, 173, 169, 59, 57, 55...",1
127,"[54, 54, 54, 53, 70, 49, 66, 62, 66, 59, 56, 5...",0
...,...,...
88,"[218, 226, 234, 216, 219, 241, 221, 228, 244, ...",0
117,"[192, 215, 214, 196, 208, 215, 217, 205, 216, ...",1
3,"[1, 1, 60, 65, 58, 61, 61, 68, 72, 58, 82, 75,...",0
313,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 38, 47, 1...",0


In [19]:
writer = pd.ExcelWriter('validate.xlsx')
# write dataframe to excel
validate.to_excel(writer)
# save the excel
writer.save()
print('DataFrame is written successfully to Excel File.')

DataFrame is written successfully to Excel File.


### Training & Testing Split

In [19]:
y_df = img_final[['label']]
X_df = img_final[['img_data']]


In [20]:
X = np.asarray(X_df)
y = np.asarray(y_df)
y = y.flatten()

In [21]:
X_final = []
for i in X: # Just use reshape here instead
    for g in i:
        X_final.append(g)
    

In [22]:
X_f = np.array(X_final)

In [23]:
X_train, X_test, y_train, y_test = train_test_split(
     X_f, y, random_state=600)

print(f'X_train.shape = {X_train.shape}')
print(f'X_test.shape  = {X_test.shape}')
print(f'y_train.shape = {y_train.shape}')
print(f'y_test.shape  = {y_test.shape}')

X_train.shape = (559, 400)
X_test.shape  = (187, 400)
y_train.shape = (559,)
y_test.shape  = (187,)


### Implementing SVM Model

In [24]:
sv = SVC(C=1,kernel='poly',gamma = 'auto')

In [25]:
sv.fit(X_train, y_train)

SVC(C=1, gamma='auto', kernel='poly')

In [26]:
predict = sv.predict(X_test)

In [27]:
print(f'Model accuracy is {accuracy_score(y_test, predict)*100:.2f}%')

Model accuracy is 67.91%


In [28]:
predict

array([1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1,
       0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0,
       1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1,
       1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1,
       0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0,
       1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0,
       0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0])

In [29]:
y_test

array([0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0,
       0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0,
       1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
       0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1,
       0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1,
       0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1,
       0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0,
       1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1,
       1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0])