# 1 Prerequisites

In [32]:
import gdown
from pickle import dump, load
import shutil
import os
import numpy as np
import pandas as pd
from PIL import Image
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.optimizers import Adam

## 1.1 Required global functions

In [11]:
def download_from_drive(filename, file_id):
    url = f"https://drive.google.com/uc?export=download&id={file_id}"
    gdown.download(url, filename, quiet=False)

def load_dataset(image_size=(150, 150)):

    categories = ["NORMAL", "COVID"]
    datasets_name_list = ["test", "train"]
    X = [[], []] # 0 for test & 1 for train
    y = [[], []] # 0 for test & 1 for train

    for i, dataset_name in enumerate(datasets_name_list):
        for label, category in enumerate(categories):
            dir_path = "/kaggle/working/dataset1/" + dataset_name + '/' + category + '/'
            for filename in os.listdir(dir_path):
                img_path = os.path.join(dir_path, filename)
                img = Image.open(img_path).convert("RGB")
                img = img.resize(image_size)
                img_array = np.array(img)
                X[i].append(img_array)
                y[i].append(label) # NORMAL = 0, COVID = 1

    X_train = np.array(X[1])
    y_train = np.array(y[1])
    X_test = np.array(X[0])
    y_test = np.array(y[0])

    return X_train, y_train, X_test, y_test
        

## 1.2 Downloading & Loading the dataset

In [7]:
download_from_drive(
    filename="Datasets.rar",
    file_id="1wM1NufVrRtbHuLmeBjiOls_e80Qrsoy-"
)

Downloading...
From (original): https://drive.google.com/uc?export=download&id=1wM1NufVrRtbHuLmeBjiOls_e80Qrsoy-
From (redirected): https://drive.google.com/uc?export=download&id=1wM1NufVrRtbHuLmeBjiOls_e80Qrsoy-&confirm=t&uuid=4c6dd7af-157c-41c9-bdd4-0caa4dc59bae
To: /kaggle/working/Datasets.rar
100%|██████████| 220M/220M [00:02<00:00, 81.0MB/s] 


In [8]:
# Extract the Datasets.rar file in the current directory
!unrar x "Datasets.rar" ./


UNRAR 6.11 beta 1 freeware      Copyright (c) 1993-2022 Alexander Roshal


Extracting from Datasets.rar

Creating    ./dataset2                                                OK
Creating    ./dataset2/test                                           OK
Creating    ./dataset2/test/NORMAL                                    OK
Extracting  ./dataset2/test/NORMAL/NORMAL(1266).jpg                      0  OK 
Extracting  ./dataset2/test/NORMAL/NORMAL(1267).jpg                      0  OK 
Extracting  ./dataset2/test/NORMAL/NORMAL(1268).jpg                      0  OK 
Extracting  ./dataset2/test/NORMAL/NORMAL(1269).jpg                      1  OK 
Extracting  ./dataset2/test/NORMAL/NORMAL(1270).jpg                      1  OK 
Extracting  ./dataset2/test/NORMAL/NORMAL(1271).jpg                      1  OK 
Extracting  ./dataset2/test/NORMAL/NORMAL(1272).jpg                      2  OK 
Extracting  ./dataset2/test/NORMAL/NORMAL(1273).jpg                      2  OK 
Extracting  ./dataset2/test/NORMAL/

In [12]:
X_train, y_train, X_test, y_test = load_dataset()

In [23]:
num_train = len(X_train[0])
num_test = len(X_test[0])
num_covid = sum(y_train[y_train==1]) + sum(y_test[y_test==1])
print(f"Number of Training Samples: {num_train}\nNumber of Test Samples: {num_test}\nNumber of COVID samples: {num_covid}\nNumber of Normal samples: {num_train + num_test - num_covid}")

Number of Training Samples: 150
Number of Test Samples: 150
Number of COVID samples: 94
Number of Normal samples: 206


## 1.3 CNN Architecture

In [41]:
def define_model(input_shape=(150,150,3), lr=0.001):
    model = Sequential(
        [
            # Input
            Input(shape=input_shape),
            
            # Conv1
            Conv2D(64, kernel_size=(3,3), activation='relu', padding='same'),
            BatchNormalization(axis=-1),
            MaxPooling2D(pool_size=(2,2)),
            Dropout(0.2),

            # Conv2
            Conv2D(64, kernel_size=(3,3), activation='relu', padding='same'),
            BatchNormalization(axis=-1),
            MaxPooling2D(pool_size=(2,2)),
            Dropout(0.2),

            # Conv3
            Conv2D(128, kernel_size=(3,3), activation='relu', padding='same'),
            BatchNormalization(axis=-1),
            MaxPooling2D(pool_size=(2,2)),
            Dropout(0.2),

            # Conv4
            Conv2D(128, kernel_size=(3,3), activation='relu', padding='same'),
            BatchNormalization(axis=-1),
            MaxPooling2D(pool_size=(2,2)),
            Dropout(0.2),

            # Conv5
            Conv2D(256, kernel_size=(3,3), activation='relu', padding='same'),
            BatchNormalization(axis=-1),
            MaxPooling2D(pool_size=(2,2)),
            Dropout(0.2),

            # Conv6
            Conv2D(256, kernel_size=(3,3), activation='relu', padding='same'),
            BatchNormalization(axis=-1),
            MaxPooling2D(pool_size=(2,2)),
            Dropout(0.2),

            # Flatten Layer
            Flatten(),

            # FCL
            Dense(512, activation='relu'),
            BatchNormalization(axis=-1),
            Dense(256, activation='relu'),
            BatchNormalization(axis=-1),

            # Output
            Dense(1, activation='sigmoid')
        ]
    )

    model.compile(
        optimizer=Adam(learning_rate=lr),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )

    return model

In [43]:
model = define_model(
    input_shape=(150,150,3),
    lr=0.001
)
model.summary()

# 2 Data Collection and Image Preprocessing (25 points)

## 2.1 Flipping

## 2.2 Rotating 90 degrees

## 2.3 Rotating 180 degrees

## 2.4 Rotating 270 degrees