# Differentiating Regular and Pneumonia X-rays with ML
### By Haoyuan Pang and Bradley Russavage

In [None]:
# Import required libraries
import os
import cv2 # opencv
import requests
import io
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

## Define Functions

In [None]:
main_path = os.getcwd() + '/data/covid_xray/'
train_path = main_path + 'train/'
test_path = main_path + 'test/'

max_dim = 200 # pixels

def precision_recall(confmatrix):
    tp = np.diag(conf_matrix)
    fp = np.sum(conf_matrix, axis=0) - tp 
    fn = np.sum(conf_matrix, axis=1) - tp
    
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    
    return precision, recall

# Requires images to be downloaded
def convert_images(subpath):
    data = []
    for i in ['NORMAL/', 'PNEUMONIA/']:
        path = subpath + i
        for xray in os.listdir(path):
            # print(xray) # image name

            # Convert image to np array using opencv
            try:
                img = cv2.imread(path + xray)
                img_resize = cv2.resize(img, (max_dim, max_dim), cv2.INTER_AREA) # shrink img to 200 by 200
                cv2.cvtColor(img_resize, cv2.COLOR_BGR2RGB) # convert back to RGB
                
                if i == "NORMAL/":
                    data.append([img_resize, 0])
                else:
                    data.append([img_resize, 1])
            except Exception as e:
                print(e)

        print("Finished " + i)
    return data
        



## Data preprocessing

In [None]:
# Only use these functions if you have the data manually downloaded

#convert_images(train_path)
#convert_images(test_path)

# Write to a file for ease of data access
#data = np.asarray(data)
#np.save(main_path + "data.npy", data)

# Load data from data.npy
# data = np.load(main_path + "data.npy", allow_pickle=True)
# print(data[0])

In [None]:
# Load data from silk server. This may take a while
URL = "https://brussava.w3.uvm.edu/cs254/data.npy"
resp = requests.get(URL)
resp.raise_for_status()
data = np.load(io.BytesIO(resp.content), allow_pickle=True)

print(f"Data size: {len(data)}")

df = pd.DataFrame(data, columns=["Image", "Pneumonia"])
df.head()

In [None]:
# Visualize an xray image
print(data[1][0])

# This shows as a popup window
#cv2.imshow('Test', data[1][0])
#cv2.waitKey(0) # Do not delete this line or the notebook kernel will crash

In [None]:
# Splitting into train and test
X = np.array([x for x, _ in data])
y = np.array([y for _, y in data])

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)


# Normalize RGB values from 0-255 to 0-1 (div by 255)
x_train = np.array(x_train) / 255
x_test = np.array(x_test) / 255

## Model here

In [None]:
# code

## Results

In [None]:
# Confusion matrix
confmatrix = confusion_matrix(y_test, y_predicted)
cm_plt = plt.imshow(conf_matrix)
plt.title("Confusion Matrix")
plt.colorbar(cm_plt)
plt.show()

In [None]:
# Precision and recall
precision, recall = precision_recall(confmatrix)

# Display in a df
# prec_df = pd.DataFrame(precision, columns=['Precision'])
# recall_df = pd.DataFrame(recall, columns=['Recall'])
# print(pd.concat([prec_df, recall_df], axis=1))