# Detecting Covid-19

In [None]:
import tensorflow as tf
import numpy as np
import tensorflow.keras as keras
from tensorflow.keras.utils import to_categorical, Sequence
from tensorflow.keras.applications import NASNetLarge
from tensorflow.keras.optimizers import Nadam, SGD
from tensorflow.keras.callbacks import TensorBoard
from keras.preprocessing import image
from keras.applications.vgg19 import preprocess_input
from skimage.io import imread
from sklearn.metrics import classification_report, confusion_matrix
import os
import shutil

## Preparing dataset

In [None]:
shutil.os.makedirs("dataset/covid-chestray")

In [None]:
!git clone dataset/covid-chestray

In [None]:
batch_size = 64
epochs = 100
categories = {}

num_classes = len(categories)

print("Categories: ", categories)
print("Number of classes: ", num_classes)

class COVSequence(Sequence):

        def __init__(self, list_path, num_classes, batch_size, shuffle=True, transform=None, data_aug=None, nrows=None):
            self.imgpath = imgpath
            self.transform = transform
            self.data_aug = data_aug
            self.views = views
            
            # defined here to make the code easier to read
            pneumonias = ["COVID-19", "SARS", "MERS", "ARDS", "Streptococcus", "Pneumocystis", "Klebsiella", "Chlamydophila", "Legionella"]
            
            self.pathologies = ["Pneumonia","Viral Pneumonia", "Bacterial Pneumonia", "Fungal Pneumonia", "No Finding"] + pneumonias
            self.pathologies = sorted(self.pathologies)

            mapping = dict()
            mapping["Pneumonia"] = pneumonias
            mapping["Viral Pneumonia"] = ["COVID-19", "SARS", "MERS"]
            mapping["Bacterial Pneumonia"] = ["Streptococcus", "Klebsiella", "Chlamydophila", "Legionella"]
            mapping["Fungal Pneumonia"] = ["Pneumocystis"]
            
            # Load data
            self.csvpath = csvpath
            self.csv = pd.read_csv(self.csvpath, nrows=nrows)
            self.MAXVAL = 255  # Range [0 255]

            # Keep only the frontal views.
            #idx_pa = self.csv["view"].isin(["PA", "AP", "AP Supine"])
            idx_pa = self.csv["view"].isin(self.views)
            self.csv = self.csv[idx_pa]
            
            self.labels = []
            for pathology in self.pathologies:
                mask = self.csv["finding"].str.contains(pathology)
                if pathology in mapping:
                    for syn in mapping[pathology]:
                        #print("mapping", syn)
                        mask |= self.csv["finding"].str.contains(syn)
                self.labels.append(mask.values)
            self.labels = np.asarray(self.labels).T
            self.labels = self.labels.astype(np.float32)

        def __len__(self):
            return len(self.labels)

        def __getitem__(self, idx):
            batch_paths = self.file_paths[idx * self.batch_size:(idx + 1) * self.batch_size]
            batch_images = np.empty((self.batch_size, 128, 128, 3))
            batch_labels = np.empty((self.batch_size), dtype=int)
            for i,p in enumerate(batch_paths):
                img, label = self.process_image(p)
                batch_images[i,] = img
                batch_labels[i] = label
            return batch_images, to_categorical(np.array(batch_labels), num_classes=self.num_classes)
        
        def on_epoch_end(self):
            if self.shuffle:
                np.random.shuffle(self.file_paths)
        
        def process_image(self, img_path):
            label = int(img_path.split(' ')[1])
            img = Image.open(img_path.split(' ')[0].strip())
            img = img.convert("RGB")
            img = img.resize((128, 128))
            return np.array(img)/255, label

        def get_labels(self):
            labels = []
            for f in self.file_paths:
                labels.append(int(f.split(' ')[1]))
            return np.array(labels)
        
        def get_sample(self, size):
            sample_images = np.empty((size, 128, 128, 3))
            for i in range(size):
                sample_images.append(self.process_image(self.file_paths[i]))
            return sample_images

        def load_imageset_from_file(self, path):
            files = []
            with open(path) as f:
                for line in f:
                    files.append(line.rstrip())
            files = np.array(files)
            if self.shuffle:
                np.random.shuffle(files)
            return files

## Defining model

In [None]:
base_model = Xception(weights='imagenet', include_top=False)

# add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dense(256, activation='relu')(x)
# and a dropout to prevent overfitting
x = Dropout(0.2)(x) 
# and a logistic layer
predictions = Dense(len(categories), activation='softmax')(x)

# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)

# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
    layer.trainable = False

# compile the model (should be done *after* setting layers to non-trainable)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])