# COVisualize-19

## COMS 4771 Final Project

Classifying COVID-19 patients from lung scans.

Anthony Krivonos

### Library Imports

In [9]:
import os, sys

from os import mkdir, chdir
from os.path import realpath, join, dirname, exists

import cv2
import numpy as np
import pandas as pd

### Import Data

In [10]:
"""
    Read CSVs into DataFrames.
"""

train_data = pd.read_csv('./train.csv')
test_data = pd.read_csv('./test.csv')

test_data

Unnamed: 0,id,filename
0,0,test/img-0.jpeg
1,1,test/img-1.jpeg
2,2,test/img-2.jpeg
3,3,test/img-3.jpeg
4,4,test/img-4.jpeg
...,...,...
479,479,test/img-479.jpeg
480,480,test/img-480.jpeg
481,481,test/img-481.jpeg
482,482,test/img-482.jpeg


### Quick Settings

Keep these updated, so we only have to do certain tasks (like preprocessing) once.

In [11]:
SETTINGS = {

    # Preprocess the images at all?
    "PREPROCESS": True,
    
    # Preprocess the images using specific methods
    "PREPROCESS_METHOD_A": False,
    "PREPROCESS_METHOD_B": False,
    "PREPROCESS_METHOD_C": False,
    "PREPROCESS_METHOD_D": False,
    
    
    
}

### Image Preprocessing

#### Method A – Square images w/ padding

1. Find smallest-dimension training image dimension. This will be the standard image size for both training and testing.
2. Take this image's larger dimension and resize all images into squares with that side length. While doing this, center the images and pad the left and right sides.

#### Method B – Crop images, ignoring aspect ratio

1. Find smallest-dimension training image dimensions. This will be the standard image size for both training and testing.
2. Resizeevery image to this height and width, ignoring the aspect ratio of each image.

#### Method C – Crop images to smallest size, maintaining aspect ratio

1. Find smallest-dimension training image dimensions. This will be the standard image size for both training and testing.
2. Resize and crop every image to this height and width, maintaining the aspect ratio of each image and centering its source.

#### Method D – Crop images to square, maintaining aspect ratio

1. Find smallest-dimension training image dimension. This will be the standard image size for both training and testing.
2. Resize and crop every image to this size, maintaining the aspect ratio of each image and centering its source.

##### Tools
- Uses OpenCV (`CV2`)

In [12]:
##
#   Directory Names
##

# Name of the directory to put the processed images in
PROCESSED_DIRECTORY = 'processed'

# Different process method directories
METHOD_A_DIRECTORY = 'method_a'
METHOD_B_DIRECTORY = 'method_b'
METHOD_C_DIRECTORY = 'method_c'
METHOD_D_DIRECTORY = 'method_d'


##
#   Image Writing Helper Function
##
def relative_imwrite(relative_filepath, img):
    """
    Call cv2.imwrite(..., img) on a relative file path.
    :param relative_filepath: The path (i.e. 'processed/train/img-2.jpeg').
    :param img: The cv2 image.
    """
    
    # Store current working directory so we can navigate back
    cwd = os.getcwd()
    
    # Get index of last slash to use it as a splitting point
    split_idx = relative_filepath.rindex("/")
    
    # Create a relative path and filename from this
    relative_path = relative_filepath[:split_idx]
    file_name = relative_filepath[(split_idx + 1):]
    
    # Extract the directory names
    directory_names = relative_path.split("/")
    top_directory_name = cwd
    
    # Create the directory if it doesn't exist and then move to it
    # Repeat this for every subdirectory
    for dir_name in directory_names:
        top_directory_name = join(top_directory_name, dir_name)
        if not exists(top_directory_name):
            mkdir(dir_name)
        chdir(dir_name)

    # Save the file at the given path
    file_path = "./" + file_name
    cv2.imwrite(file_path, img)
    chdir(cwd)


if SETTINGS["PREPROCESS"]:

    # Maps of file names to CV2 images
    train_images = {}
    test_images = {}
    
    min_train_image_width = sys.maxsize
    min_train_image_height = sys.maxsize
    
    ##
    #   Traverse train images and record smallest dimension
    ##
    
    # Find the smallest training image, storing the images as they're traversed
    for _, row in train_data.iterrows():
        
        # Read image
        id, filename = row['id'], row['filename']
        image = cv2.imread(row['filename'])
        train_images[id] = image
        
        # Record minimum height and width
        height, width, _ = image.shape
        min_train_image_width = min(min_train_image_width, width)
        min_train_image_height = min(min_train_image_height, height)
    
    # Instantiate the smallest size of the two dimensions in another variable
    min_train_image_size = min(min_train_image_width, min_train_image_height)
    
    ##
    #   Traverse test images only
    ##
    
    for _, row in test_data.iterrows():
        
        # Read image
        id, filename = row['id'], row['filename']
        image = cv2.imread(row['filename'])
        test_images[id] = image

In [13]:
    
##
#   Method A
##

def resize_and_pad(cv2_img, to_size, padding_color=0):
    """
    Resize the given CV2 image to a square with the given size length, and then pad it with the given color.
    Adapted from https://stackoverflow.com/questions/44720580/resize-image-canvas-to-maintain-square-aspect-ratio-in-python-opencv.
    :param cv2_img: The CV2 image obtained via cv2.imread(...).
    :param to_size: The desired size int.
    :param padding_color: A color int, list, tuple, or ndarray.
    :return: The padded image.
    """
    
    # Create height and width variables for better naming
    to_height = to_width = to_size

    # Get actual image dimensions
    height, width = cv2_img.shape[:2]
    aspect_ratio = width / height

    # Interpolate differently based on the image's relative size
    if height > to_height or width > to_width:
        # Shrink image via inter area as its too large
        interp = cv2.INTER_AREA
    else:
        # Stretch image via inter cubic as its too small
        interp = cv2.INTER_CUBIC

    is_image_horizontal = aspect_ratio > 1
    is_image_vertical = aspect_ratio < 1
    
    # Height and width we're resizing the image to
    new_height, new_width = to_height, to_width
    
    # Padding around the new image's inner edges
    pad_left, pad_right, pad_top, pad_bot = 0, 0, 0, 0

    if is_image_horizontal:
        # Image is horizontal, so requires vertical padding
        new_height = np.round(new_width / aspect_ratio).astype(int)
        pad_vert = (to_height - new_height) / 2
        pad_top, pad_bot = np.floor(pad_vert).astype(int), np.ceil(pad_vert).astype(int)

    elif is_image_vertical:
        # Image is vertical, so required horizontal padding
        new_width = np.round(new_height * aspect_ratio).astype(int)
        pad_horiz = (to_width - new_width) / 2
        pad_left, pad_right = np.floor(pad_horiz).astype(int), np.ceil(pad_horiz).astype(int)

    # If only one color is provided and the image is RGB, then set the padding color to an array of length 3
    if len(cv2_img.shape) is 3 and not isinstance(padding_color, (list, tuple, np.ndarray)):
        padding_color = [padding_color] * 3

    # Resize the image to the newly calculated dimensions and interpolation strategy
    new_img = cv2.resize(cv2_img, (new_width, new_height), interpolation=interp)
    
    # Add the calculated borders around the image
    new_img = cv2.copyMakeBorder(new_img, pad_top, pad_bot, pad_left, pad_right, borderType=cv2.BORDER_CONSTANT, value=padding_color)

    return new_img

if SETTINGS["PREPROCESS"] and SETTINGS["PREPROCESS_METHOD_A"]:

    # Add a black border around the resized images
    COLOR = 0
    
    ##
    #   Resize Training Data
    ##
    
    # Resize and write training images
    for id in train_images.keys():
        train_image = train_images[id]
        resized_train_image = resize_and_pad(train_image, min_train_image_size, COLOR)
        image_dir = PROCESSED_DIRECTORY + "/" + METHOD_A_DIRECTORY + "/train/img-" + str(id) + ".jpeg"
        relative_imwrite(image_dir, resized_train_image)
    
    ##
    #   Resize Test Data
    ##
    
    # Resize and write testing images
    for id in test_images.keys():
        test_image = test_images[id]
        resized_test_image = resize_and_pad(test_image, min_train_image_size, COLOR)
        image_dir = PROCESSED_DIRECTORY + "/" + METHOD_A_DIRECTORY + "/test/img-" + str(id) + ".jpeg"
        relative_imwrite(image_dir, resized_test_image)

In [14]:
    
##
#   Method B
##

def resize_ignoring_aspect_ratio(cv2_img, to_width, to_height):
    """
    Resizes the image to the given size, ignoring aspect ratio.
    :param cv2_img: The image to resize.
    :param to_width: The desired width.
    :param to_height: The desired height.
    :return: A new, resized cv2 image.
    """
    return cv2.resize(cv2_img, (to_width, to_height), interpolation = cv2.INTER_AREA)

if SETTINGS["PREPROCESS"] and SETTINGS["PREPROCESS_METHOD_B"]:
    
    ##
    #   Resize Training Data
    ##
    
    # Resize and write training images
    for id in train_images.keys():
        train_image = train_images[id]
        resized_train_image = resize_ignoring_aspect_ratio(train_image, min_train_image_width, min_train_image_height)
        image_dir = PROCESSED_DIRECTORY + "/" + METHOD_B_DIRECTORY + "/train/img-" + str(id) + ".jpeg"
        relative_imwrite(image_dir, resized_train_image)
    
    ##
    #   Resize Test Data
    ##
    
    # Resize and write testing images
    for id in test_images.keys():
        test_image = test_images[id]
        resized_test_image = resize_ignoring_aspect_ratio(test_image, min_train_image_width, min_train_image_height)
        image_dir = PROCESSED_DIRECTORY + "/" + METHOD_B_DIRECTORY + "/test/img-" + str(id) + ".jpeg"
        relative_imwrite(image_dir, resized_test_image)
        

In [15]:
    
##
#   Method C
##

def resize_maintaining_aspect_ratio(cv2_img, to_width, to_height):
    """
    Crop the given cv2 image to the desired width and height, maintaining the image's original aspect ratio.
    :param cv2_img: The image to crop.
    :param to_width: The desired width.
    :param to_height: The desired height.
    :return: The new resized and cropped image.
    """
    
    # Create height and width variables for better naming
    height, width = cv2_img.shape[:2]
    aspect_ratio = width / height
    
    # Resizing
    max_side = max(to_height, to_width)
    if height < width:
        new_height = max_side
        new_width = int(aspect_ratio * new_height)
    else:
        new_width = max_side
        new_height = int(new_width / aspect_ratio)
    resized_img = cv2.resize(cv2_img, (new_width, new_height), interpolation = cv2.INTER_AREA)
        
    # Cropping
    left_padding = int((new_width - to_width) / 2)
    right_padding = int(np.ceil((new_width - to_width) / 2))
    top_padding = int((new_height - to_height) / 2)
    bottom_padding = int(np.ceil((new_height - to_height) / 2))
    cropped_img = resized_img[top_padding:(new_height - bottom_padding), left_padding:(new_width - right_padding)]
    
    return cropped_img

if SETTINGS["PREPROCESS"] and SETTINGS["PREPROCESS_METHOD_C"]:
    
    ##
    #   Resize Training Data
    ##
    
    # Resize and write training images
    for id in train_images.keys():
        train_image = train_images[id]
        resized_train_image = resize_maintaining_aspect_ratio(train_image, min_train_image_width, min_train_image_height)
        image_dir = PROCESSED_DIRECTORY + "/" + METHOD_C_DIRECTORY + "/train/img-" + str(id) + ".jpeg"
        relative_imwrite(image_dir, resized_train_image)
    
    ##
    #   Resize Test Data
    ##
    
    # Resize and write testing images
    for id in test_images.keys():
        test_image = test_images[id]
        resized_test_image = resize_maintaining_aspect_ratio(test_image, min_train_image_width, min_train_image_height)
        image_dir = PROCESSED_DIRECTORY + "/" + METHOD_C_DIRECTORY + "/test/img-" + str(id) + ".jpeg"
        relative_imwrite(image_dir, resized_test_image)

        

In [16]:
    
##
#   Method D
##

def resize_to_square_maintaining_aspect_ratio(cv2_img, to_size):
    """
    Wrapper around resize_maintaining_aspect_ratio to produce a cropped square image.
    :param cv2_img: The image to crop.
    :param to_size: The desired square side size.
    :return: The new resized and cropped image.
    """
    return resize_maintaining_aspect_ratio(cv2_img, to_size, to_size)

if SETTINGS["PREPROCESS"] and SETTINGS["PREPROCESS_METHOD_D"]:
    
    ##
    #   Resize Training Data
    ##
    
    # Resize and write training images
    for id in train_images.keys():
        train_image = train_images[id]
        resized_train_image = resize_to_square_maintaining_aspect_ratio(train_image, min_train_image_size)
        image_dir = PROCESSED_DIRECTORY + "/" + METHOD_D_DIRECTORY + "/train/img-" + str(id) + ".jpeg"
        relative_imwrite(image_dir, resized_train_image)
    
    ##
    #   Resize Test Data
    ##
    
    # Resize and write testing images
    for id in test_images.keys():
        test_image = test_images[id]
        resized_test_image = resize_to_square_maintaining_aspect_ratio(test_image, min_train_image_size)
        image_dir = PROCESSED_DIRECTORY + "/" + METHOD_D_DIRECTORY + "/test/img-" + str(id) + ".jpeg"
        relative_imwrite(image_dir, resized_test_image)

