## Importing Required Packages

In [1]:
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
import pandas as pd

## Setting up data extractor

In [2]:
def get_files(path):
    file_list = []
    for path, directories, files in os.walk(path):
        file_list = files
        return file_list

## Extracting COVID-19 data, normalizing & then adding to dataframe

In [3]:
c_path = "./COVID19_Lung_CT/CT_COVID"
c_files = get_files(c_path)
c_data = []

for img in c_files:
    img_raw = cv2.imread(f'{c_path}/{img}')
    img_norm = cv2.resize(img_raw,(50,50))
    img_flat = np.array(img_norm).flatten()
    c_data.append([img_flat,1])

print(len(c_data))

349


In [4]:
#pd.set_option("max_rows", None)
df = pd.DataFrame(c_data, columns = ['img_data', 'label'])
df

Unnamed: 0,img_data,label
0,"[255, 255, 255, 255, 255, 255, 255, 255, 255, ...",1
1,"[6, 8, 8, 6, 8, 9, 5, 7, 8, 8, 7, 9, 9, 8, 10,...",1
2,"[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 89, 89, 8...",1
3,"[1, 1, 1, 6, 6, 6, 59, 59, 59, 65, 65, 65, 69,...",1
4,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
...,...,...
344,"[50, 50, 50, 51, 51, 51, 52, 52, 52, 52, 52, 5...",1
345,"[255, 255, 255, 255, 255, 255, 255, 255, 255, ...",1
346,"[0, 0, 0, 76, 76, 76, 76, 76, 76, 79, 79, 79, ...",1
347,"[236, 236, 236, 237, 237, 237, 237, 237, 237, ...",1


## Extracting Non COVID-19 data, normalizing & then adding to dataframe

In [5]:
nc_path = "./COVID19_Lung_CT/CT_NonCOVID"
nc_files = get_files(nc_path)
nc_data = []

for img in nc_files:
    img_raw = cv2.imread(f'{nc_path}/{img}')
    img_norm = cv2.resize(img_raw,(50,50))
    img_flat = np.array(img_norm).flatten()
    nc_data.append([img_flat,0])
    
print(len(nc_data))

397


In [6]:
df2 = pd.DataFrame(nc_data, columns = ['img_data', 'label'])
df2

Unnamed: 0,img_data,label
0,"[70, 70, 70, 63, 63, 63, 68, 68, 68, 66, 66, 6...",0
1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0
2,"[18, 18, 18, 20, 20, 20, 19, 19, 19, 19, 19, 1...",0
3,"[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, ...",0
4,"[3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...",0
...,...,...
392,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",0
393,"[84, 84, 84, 90, 90, 90, 83, 83, 83, 118, 118,...",0
394,"[214, 219, 224, 207, 210, 215, 225, 229, 234, ...",0
395,"[42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 4...",0


In [7]:
img_master = df.append(df2, ignore_index=True)
img_master

Unnamed: 0,img_data,label
0,"[255, 255, 255, 255, 255, 255, 255, 255, 255, ...",1
1,"[6, 8, 8, 6, 8, 9, 5, 7, 8, 8, 7, 9, 9, 8, 10,...",1
2,"[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 89, 89, 8...",1
3,"[1, 1, 1, 6, 6, 6, 59, 59, 59, 65, 65, 65, 69,...",1
4,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
...,...,...
741,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",0
742,"[84, 84, 84, 90, 90, 90, 83, 83, 83, 118, 118,...",0
743,"[214, 219, 224, 207, 210, 215, 225, 229, 234, ...",0
744,"[42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 4...",0


In [8]:
img_final = img_master.sample(frac=1).reset_index(drop=True)
img_final

Unnamed: 0,img_data,label
0,"[45, 45, 45, 46, 46, 46, 45, 45, 45, 47, 47, 4...",0
1,"[31, 29, 29, 36, 31, 32, 81, 80, 81, 47, 47, 4...",1
2,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",1
3,"[68, 68, 68, 62, 62, 62, 64, 64, 64, 64, 64, 6...",1
4,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",0
...,...,...
741,"[46, 46, 46, 40, 40, 40, 46, 46, 46, 46, 46, 4...",1
742,"[1, 1, 1, 1, 1, 1, 14, 14, 14, 51, 51, 51, 50,...",1
743,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",0
744,"[0, 0, 0, 77, 77, 77, 83, 83, 83, 79, 79, 79, ...",1
