In [None]:
!pip install imutils

# **1. Data manipulation**

## **1.1 Import libaries**

In [None]:

import time
import os
import cv2
import random
import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torchvision import models
from torch import optim
from sklearn.metrics import *
from PIL import Image
import shutil
import copy
from collections import OrderedDict

In [None]:
num_workers = 4 #change this parameter based on your system configuration
batch_size = 32 #change this parameter based on your system configuration
seed = 24
random.seed(seed)
metrics = ['Accuracy', 'Precision', 'Recall', 'F1-score']
classes = ['covid', 'normal', 'pneumonia']
num_classes = len(classes)
splits = ['train', 'validation', 'testing']

In [None]:
DATA_FILES_PATH = "../working"
DATA_PATH="../input/mycovid19gc/"
DATA_FILES_FULL_PATH = os.path.expanduser(DATA_FILES_PATH)
SAVED_MODEL_FILE_NAME = os.path.join(DATA_FILES_FULL_PATH, "keras_spell_e{}.h5") # an HDF5 file


## **2. Statistics**

In [None]:
def showDatasetSize(data_path=DATA_PATH):
    df = pd.DataFrame(columns = ['label'] + splits, index = classes)
    for row in classes:
        for col in splits:
            df.loc[row,col] = int(len(os.listdir(os.path.join(data_path, col+'/'+row))))
    df['total'] = df.sum(axis=1).astype(int)
    df.loc['TOTAL'] = df.sum(axis=0).astype(int)
    df['label'] = ['0', '1', '2','']
    print(df)
    return df
df_dataset = showDatasetSize()

## **3. Load data**

In [None]:
def load_data(data_path=DATA_PATH, num_workers=num_workers):
    transform_dict = {
        'model': transforms.Compose(
                                    [transforms.Resize(224),
                                     transforms.CenterCrop(224),
                                     transforms.ToTensor(),
                                     ])}
    train_data = datasets.ImageFolder(root=data_path + '/train', transform=transform_dict['model'])
    train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    val_data = datasets.ImageFolder(root=data_path + '/validation', transform=transform_dict['model'])
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, shuffle=False, num_workers=num_workers)
    test_data = datasets.ImageFolder(root=data_path + '/testing', transform=transform_dict['model'])
    test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=False, num_workers=num_workers)
    return train_data, train_loader, val_data, val_loader, test_data, test_loader

train_data, train_loader, val_data, val_loader, test_data, test_loader = load_data()
dataset = torch.utils.data.ConcatDataset([train_data, val_data, test_data])

In [None]:
train_data, train_loader, val_data, val_loader, test_data, test_loader = load_data()
dataset = torch.utils.data.ConcatDataset([train_data, val_data, test_data])

In [None]:
dataset_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)


In [None]:
len(dataset)

In [None]:
def show_batch_images(dataset):
    n_images, scale = 8, 3
    data_loader = torch.utils.data.DataLoader(dataset, batch_size=25*n_images, shuffle = True)
    images, labels = next(iter(data_loader))
    for i in range(len(classes)):
        print(f'Class: {classes[i]}')
        images_category = images[labels==i][:n_images]
        grid = torchvision.utils.make_grid(images_category, padding=20)
        npgrid = grid.cpu().numpy()
        plt.figure(figsize=(40*scale/n_images, 20*scale/n_images))
        plt.imshow(np.transpose(npgrid, (1, 2, 0)), interpolation='nearest')
        plt.show()
        plt.savefig('x-ray[i].pdf')  
        #plt.savefig("viz1.png", bbox_inches='tight')

show_batch_images(train_data)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'You are using {device}')

# **Model Development: using pre-trained modules**

In [None]:
def calc_metrics(predictions, actuals, row):
    df = pd.DataFrame(columns =metrics)
    Y_pred = np.concatenate(predictions, axis=0)
    Y_test = np.concatenate(actuals, axis=0)
    df.loc[row, 'Accuracy'] = accuracy_score(Y_test, Y_pred)
    df.loc[row, 'Precision'] = precision_score(Y_test, Y_pred, average="macro")
    df.loc[row, 'Recall'] = recall_score(Y_test, Y_pred, average="macro")
    df.loc[row, 'F1-score'] = f1_score(Y_test, Y_pred, average="macro")
    return df

def create_confusion_matrix(preds, y_test):
    ylist, predlist = [], []
    for pred in preds:
        for item in pred:
            predlist.append(int(item))
    for y in y_test:
        for item in y:
            ylist.append(int(item))
    data_dict = {'y_Actual':    ylist, 'y_Predicted': predlist}
    df = pd.DataFrame(data_dict, columns=['y_Actual','y_Predicted'])
    cm = pd.crosstab(df['y_Actual'], df['y_Predicted'], rownames=['ACTUAL'], colnames=['PREDICTED'])
    return cm

def plot_train_val_losses(df):
    df2 = pd.melt(df, id_vars=['epoch'], value_vars=['train_loss', 'validation_loss'], var_name='process', value_name='loss')
    sns.lineplot(x = "epoch", y = "loss", data = df2, hue = "process",
                style = "process", palette = "hot", dashes = False, 
                markers = ["o", "<"],  legend="brief").set_title("Train and Validation Losses by Epoch")
    plt.show()

## **1.1 Define models**

In [None]:
def train_all_model(model):
    t_start = time.time()
    global best_val_model
    global best_val_loss
    best_val_loss = 1
    global best_val_epoch
    best_val_epoch = 0
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    df = pd.DataFrame(columns = ['model_name', 'epoch', 'train_loss','train_acc'])
    print(f"Training model {model_name} with {df_dataset.loc['TOTAL', 'train']} samples and max of {n_epochs} epochs, and validating with {df_dataset.loc['TOTAL', 'validation']} samples\n")
    train_size= len(dataset_loader)
    for epoch in range(1, n_epochs+1):
        # Beginning of training step
        t0 = time.time()
        model.train()
        train_loss, val_loss,train_acc,val_acc = 0.0, 0.0, 0.0, 0.0
        for i, (data, target) in enumerate(dataset_loader):
            target = target.to(device)
            data = data.to(device)
            optimizer.zero_grad()
            outputs = model(data)
            _, Y_pred_tag = torch.max(outputs, dim = 1)
            Y_pred_tag = Y_pred_tag.detach().cpu().numpy()
            Y_pred_tag = Y_pred_tag.reshape(len(Y_pred_tag), 1).astype('int8')
            Y_test = target.detach().cpu().numpy()
            Y_test = Y_test.reshape(len(Y_test), 1).astype('int8')
            train_acc += accuracy_score(Y_test, Y_pred_tag)
            loss = criterion(outputs, target)
            loss.backward()
            optimizer.step()
            train_loss += loss.detach().cpu().numpy()
        # Beginning of evaluation step
        
        print(f"Epoch {epoch}:\t train loss={train_loss/train_size:.5f} \t train acc={train_acc/train_size:.5f} \t time={(time.time() - t0):.2f}s")
        df.loc[len(df)] = [model_name, epoch, train_loss/train_size,train_acc/train_size]
        epoch_acc=train_acc/train_size
        if epoch_acc >= best_acc :
            best_acc = epoch_acc
            best_model_wts = copy.deepcopy(model.state_dict())
        if use_scheduler: scheduler.step(val_loss/val_size)   # Optional to use scheduler for dynamic learning rate
    #print(f"Best model has val loss={best_val_loss:.5f} for {best_val_epoch} epochs")
    time_elapsed = time.time() - t_start
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, df

In [None]:
def train_val_model(model):
    t_start = time.time()
    global best_val_model
    global best_val_loss
    best_val_loss = 1
    global best_val_epoch
    best_val_epoch = 0
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    df = pd.DataFrame(columns = ['model_name', 'epoch', 'train_loss','train_acc', 'validation_loss','validation_acc'])
    print(f"Training model {model_name} with {df_dataset.loc['TOTAL', 'train']} samples and max of {n_epochs} epochs, and validating with {df_dataset.loc['TOTAL', 'validation']} samples\n")
    train_size, val_size = len(train_loader), len(val_loader)
    for epoch in range(1, n_epochs+1):
        # Beginning of training step
        t0 = time.time()
        model.train()
        train_loss, val_loss,train_acc,val_acc = 0.0, 0.0, 0.0, 0.0
        for i, (data, target) in enumerate(train_loader):
            target = target.to(device)
            data = data.to(device)
            optimizer.zero_grad()
            outputs = model(data)
            _, Y_pred_tag = torch.max(outputs, dim = 1)
            Y_pred_tag = Y_pred_tag.detach().cpu().numpy()
            Y_pred_tag = Y_pred_tag.reshape(len(Y_pred_tag), 1).astype('int8')
            Y_test = target.detach().cpu().numpy()
            Y_test = Y_test.reshape(len(Y_test), 1).astype('int8')
            train_acc += accuracy_score(Y_test, Y_pred_tag)
            loss = criterion(outputs, target)
            loss.backward()
            optimizer.step()
            train_loss += loss.detach().cpu().numpy()
        # Beginning of evaluation step
        model.eval()
        for j, (data, target) in enumerate(val_loader):
            target = target.to(device)
            data = data.to(device)
            outputs = model(data)
            loss = criterion(outputs, target)
            val_loss += loss.detach().cpu().numpy()
            _, Y_pred_tag = torch.max(outputs, dim = 1)
            Y_pred_tag = Y_pred_tag.detach().cpu().numpy()
            Y_pred_tag = Y_pred_tag.reshape(len(Y_pred_tag), 1).astype('int8')
            Y_test = target.detach().cpu().numpy()
            Y_test = Y_test.reshape(len(Y_test), 1).astype('int8')
            val_acc += accuracy_score(Y_test, Y_pred_tag)
        print(f"Epoch {epoch}:\t train loss={train_loss/train_size:.5f} \t train acc={train_acc/train_size:.5f}\t val loss={val_loss/val_size:.5f} \t val acc={val_acc/val_size:.5f} \t time={(time.time() - t0):.2f}s")
        epoch_acc=val_acc/val_size
        df.loc[len(df)] = [model_name, epoch, train_loss/train_size,train_acc/train_size, val_loss/val_size,val_acc/val_size]
        if epoch_acc >= best_acc :
            best_acc = epoch_acc
            best_model_wts = copy.deepcopy(model.state_dict())
        if use_scheduler: scheduler.step(val_loss/val_size)   # Optional to use scheduler for dynamic learning rate
    #print(f"Best model has val loss={best_val_loss:.5f} for {best_val_epoch} epochs")
    time_elapsed = time.time() - t_start
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))
    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, df

In [None]:
def train_val_model_kfold(model, train_loader_kfold):
    t_start = time.time()
    df = pd.DataFrame(columns = ['model_name', 'epoch', 'train'])
    print(f"Training model {model_name} with {df_dataset.loc['TOTAL', 'train']} samples and max of {n_epochs} epochs")
    train_size = len(train_loader_kfold)
    for epoch in range(1, n_epochs+1):
        # Beginning of training step
        t0 = time.time()
        #model.train()
        train_loss = 0.0
        for i, (data, target) in enumerate(train_loader_kfold):
            target = target.to(device)
            data = data.to(device)
            optimizer.zero_grad()
            outputs = model(data)
            loss = criterion(outputs, target)
            loss.backward()
            optimizer.step()
            train_loss += loss.detach().cpu().numpy()
        print(f"Epoch {epoch}:\t train loss={train_loss/train_size:.5f} \t time={(time.time() - t0):.2f}s")
        df.loc[len(df)] = [model_name, epoch, train_loss/train_size]
    print(f"Total time training and evaluating: {(time.time()-t_start):.2f}s")
    return model, df


In [None]:

def calc_pred_actuals(model, loader):
    predictions, actuals = [], []
    with torch.no_grad():
        for data, target in loader:
            data = data.to(device)
            target = target.to(device)
            Y_pred_orig = model(data)
            _, Y_pred_tag = torch.max(Y_pred_orig, dim = 1)
            Y_pred_tag = Y_pred_tag.detach().cpu().numpy()
            Y_pred_tag = Y_pred_tag.reshape(len(Y_pred_tag), 1).astype('int8')
            Y_test = target.detach().cpu().numpy()
            Y_test = Y_test.reshape(len(Y_test), 1).astype('int8')
            predictions.append(Y_pred_tag)
            actuals.append(Y_test)
    return predictions, actuals

In [None]:
def accuracy_model(model, loader):
    model.eval()
    print(f"Testing the model {model_name} with {df_dataset.loc['TOTAL', 'testing']} samples \n")   
    predictions, actuals = calc_pred_actuals(model, loader)
    conf_matrix = create_confusion_matrix(predictions, actuals)
    df_test = calc_metrics(predictions, actuals, 'Test Results').astype(float)
    print(df_test)
    return df_test, conf_matrix


In [None]:
def generate_resnet34( num_classes, use_pretrained=True):
    model_ft = models.resnet34(pretrained=use_pretrained)
    for param in model_ft.parameters():
        param.requires_grad = False
    num_ftrs = model_ft.fc.in_features
   # model_ft.fc = nn.Linear(num_ftrs, num_classes)
    model_ft.fc = nn.Sequential(
                      nn.Linear(num_ftrs, 224), 
                      nn.ReLU(), 
                      nn.Dropout(0.4),
                      nn.Linear(224, num_classes),                   
                      nn.LogSoftmax(dim=1))
    model_ft.to(device)
    return model_ft

In [None]:
def vgg16_v2(num_classes,use_pretrained=True):
    model_ft = models.vgg16_bn(pretrained=use_pretrained)
    layers_vgg16 = nn.Sequential(OrderedDict([
          ('fc1', nn.Linear(25088, 512)),
          ('activation1', nn.ReLU()),
          ('dropout1', nn.Dropout(0.4)),
          ('fc2', nn.Linear(512, 256)),
          ('activation2', nn.ReLU()),
          ('dropout2', nn.Dropout()),
          ('fc3', nn.Linear(256, 128)),
          ('activation3', nn.ReLU()),
          ('dropout3', nn.Dropout()),
           ('fc4', nn.Linear(128, 1)),
           ('out', nn.Softmax())]))
    num_ftrs = model_ft.classifier[6].in_features
    #model_ft.fc = nn.Linear(num_ftrs, num_classes)
    model_ft.classifier[6] = nn.Sequential(
                      nn.Linear(num_ftrs, 224), 
                      nn.ReLU(), 
                      nn.Dropout(0.4),
                      nn.Linear(224, num_classes),                   
                      nn.LogSoftmax(dim=1))
    model_ft.to(device)

   # model_ft.classifier = layers_vgg16 #or _vgg19, or _resnet34
    return model_ft

In [None]:
def vgg16(num_classes,use_pretrained=True):
    model_ft = models.vgg16_bn(pretrained=use_pretrained)
    for param in model_ft.parameters():
        param.requires_grad = False
    num_ftrs = model_ft.classifier[6].in_features
    model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
    model_ft.to(device)
    return model_ft

In [None]:
def generate_vgg16( num_classes, use_pretrained=True):
    model_ft = models.vgg19_bn(pretrained=use_pretrained)
    for param in model_ft.parameters():
        param.requires_grad = False
    num_ftrs = model_ft.classifier[6].in_features
    #model_ft.fc = nn.Linear(num_ftrs, num_classes)
    model_ft.classifier[6] = nn.Sequential(
                      nn.Linear(num_ftrs, 224), 
                      nn.ReLU(), 
                      nn.Dropout(0.4),
                      nn.Linear(224, num_classes),                   
                      nn.LogSoftmax(dim=1))
    model_ft.to(device)
    model_ft = nn.DataParallel(model_ft)
    return model_ft

In [None]:
def generate_inception(  num_classes, use_pretrained=True):
    model_ft = models.inception_v3(pretrained=use_pretrained)
    for param in model_ft.parameters():
        param.requires_grad = False
            #param.aux_logits = False    
        #num_ftrs = model_ft.fc.in_features
        #model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Sequential(
                      nn.Linear(num_ftrs, 224), 
                      nn.ReLU(), 
                      nn.Dropout(0.4),
                      nn.Linear(224, num_classes),                   
                      nn.LogSoftmax(dim=1))
    model_ft.aux_logits = False
    model_ft.to(device)
    return model_ft

## **1.2 Pre-trained model example: resnet34**

In [None]:
model_name="vgg16"
vgg16_model=vgg16_v2(num_classes,use_pretrained=True)

In [None]:
np.random.seed(seed)
torch.manual_seed(seed)
n_epochs = 20
learning_rate = 1e-2
criterion = nn.CrossEntropyLoss()
#optimizer=torch.optim.Adam(vgg16_model.parameters(), lr=learning_rate,  weight_decay=learning_rate/n_epochs)
optimizer = torch.optim.SGD(vgg16_model.parameters(), lr=learning_rate)
use_scheduler = False   # Set True if using scheduler


In [None]:
pretrained_model, df_vgg16_epochs = train_all_model(vgg16_model)

In [None]:
MODEL_PATH="./"
torch.save(pretrained_model.state_dict(), os.path.join(MODEL_PATH, 'finalourmodel.pth'))

In [None]:
pretrain_model = vgg16_v2(num_classes,True)

In [None]:
vgg16_model.load_state_dict(torch.load('../input/modelg/finalourmodel.pth'))
my_submission = pd.DataFrame(columns=["case","class"])

In [None]:
def load_test_data(data_path="../input/testfolder/", num_workers=num_workers):
    transform_dict = {
        'model': transforms.Compose(
                                    [transforms.Resize(224),
                                     transforms.CenterCrop(224),
                                     transforms.ToTensor(),
                                     ])}
    test_data = datasets.ImageFolder(root=data_path, transform=transform_dict['model'])
    test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=False, num_workers=num_workers)
    return  test_data, test_loader

test_data, test_loader = load_test_data()
dataset_test = torch.utils.data.ConcatDataset([test_data])

In [None]:

with torch.no_grad():
    ind=0
    pred=[]
    for i,(data, target) in enumerate(test_loader, 0):
        data = data.to(device)
        Y_pred_orig = vgg16_model(data)
        _, Y_pred_tag = torch.max(Y_pred_orig, dim = 1)
        Y_pred_tag = Y_pred_tag.detach().cpu().numpy()
        Y_pred_tag = Y_pred_tag.reshape(len(Y_pred_tag), 1).astype('int8')
        for va in Y_pred_tag.tolist():
            pred.append(va[0])
            dic=dict()
            number_str=str(len(my_submission)+1)
            zero_filled_number = number_str.zfill(4)
            name=str(zero_filled_number)+".jpg"
            my_submission.loc[len(my_submission)]=[name,va[0]] 

In [None]:
import os
os.remove("./submission.csv")

In [None]:
test_csv = pd.read_csv("../input/mycovid19gc/submission.csv")
test_csv[' class'] = pred

In [None]:
name = input("Enter the name")
test_csv.to_csv(str(name)+".txt", index = False,sep=",")

In [None]:
test_csv

In [None]:
my_submission.to_csv('submission.txt',index=False, delimiter="\t")

<a href="./submission.txt"> Download File </a>

In [None]:
my_submission

In [None]:
plot_train_val_losses(df_vgg16_epochs)

In [None]:
test_vgg_results, conf_vgg_matrix_test = accuracy_model(pretrained_model, test_loader)

## **RESNET34**

In [None]:
model_name="resnet34"
resnet34_model = generate_resnet34(  num_classes, use_pretrained=True)

In [None]:
np.random.seed(seed)
torch.manual_seed(seed)
n_epochs = 20
learning_rate = 1e-2
criterion = nn.CrossEntropyLoss()
#optimizer=torch.optim.Adam(vgg16_model.parameters(), lr=learning_rate,  weight_decay=learning_rate/n_epochs)
optimizer = torch.optim.SGD(resnet34_model.parameters(), lr=learning_rate)
use_scheduler = False   # Set True if using scheduler

In [None]:

pretrained_model_resnet34, df_resnet34_epochs = train_val_model(resnet34_model)

In [None]:
plot_train_val_losses(df_resnet34_epochs)

In [None]:
test_resnet34_results, conf_resnet34_matrix_test = accuracy_model(pretrained_model_resnet34, test_loader)

## **Intercept**

In [None]:
model_name="inception"
inception_model = generate_inception(  num_classes, use_pretrained=True)

In [None]:
np.random.seed(seed)
torch.manual_seed(seed)
n_epochs = 20
learning_rate = 1e-2
criterion = nn.CrossEntropyLoss()
#optimizer=torch.optim.Adam(vgg16_model.parameters(), lr=learning_rate,  weight_decay=learning_rate/n_epochs)
optimizer = torch.optim.SGD(inception_model.parameters(), lr=learning_rate)
use_scheduler = False   # Set True if using scheduler

In [None]:
pretrained_model_inception, df_inception_epochs = train_val_model(inception_model)

In [None]:
plot_train_val_losses(df_inception_epochs)


In [None]:
test_inception_results, conf_inception_matrix_test = accuracy_model(pretrained_model_inception, test_loader)

## **CNN**

In [None]:

layer = [128, 128, 256, 256, 3]

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, layer[0], 3, padding=1)
        self.bn1 = nn.BatchNorm2d(layer[0])
        self.pool1 = nn.MaxPool2d(kernel_size=4, stride=4)
        self.conv2 = nn.Conv2d(layer[0], layer[1], 3, padding=1)
        self.bn2 = nn.BatchNorm2d(layer[1])
        self.conv3 = nn.Conv2d(layer[1], layer[2], 3, padding=1)
        self.bn3 = nn.BatchNorm2d(layer[2])
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.linear1 = nn.Linear(14 * 14 * layer[2], layer[3])
        self.linear2 = nn.Linear(layer[3], layer[4])
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout()
    def forward(self, x):
        x = self.pool1(self.relu(self.bn1(self.conv1(x))))
        x = self.pool2(self.relu(self.bn2(self.conv2(x))))
        x = self.pool2(self.relu(self.bn3(self.conv3(x))))
        x = x.reshape(x.size(0), -1)
        x = self.relu(self.linear1(x))
        x = self.dropout(x)
        x = self.linear2(x)
        return x

In [None]:
np.random.seed(seed)
torch.manual_seed(seed)
net = Net().to(device)
model_name = 'DLH_COVID'
n_epochs = 20
learning_rate = 1e-3
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate)
use_scheduler = False   # Set True if using scheduler

In [None]:
our_model, df_epochs = train_val_model(net)

In [None]:
plot_train_val_losses(df_epochs)

In [None]:
test_results, conf_matrix_test = accuracy_model(our_model, test_loader)

In [None]:
from sklearn.model_selection import KFold
def validate_model_kfold(model, model_name, k_folds):
    #model.eval()
    print(f"Validating the model {model_name} with {df_dataset.loc['TOTAL', 'train']} samples and {k_folds}-folds \n")
    df = pd.DataFrame(columns = metrics)
    kfold = KFold(n_splits=k_folds, shuffle=True)
    print(len(dataset))
    for fold, (train_ids, test_ids) in enumerate(kfold.split(dataset)):
        train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
        test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)
        train_kfold_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, sampler=train_subsampler)
        test_kfold_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, sampler=test_subsampler)
        #model.apply(reset_weights)
        model, df_vgg_epochs = train_val_model_kfold(model, train_kfold_loader)
        predictions, actuals = calc_pred_actuals(model, test_kfold_loader)
        conf_matrix = create_confusion_matrix(predictions, actuals)
        df_aux = calc_metrics(predictions, actuals, 'FOLD '+str(fold+1))
        df = df.append(df_aux)      
    df.loc['Average'] = df.mean(axis=0)
    print(df.astype(float))
    return df.astype(float), conf_matrix


In [None]:
model_name="vgg16"
vgg16_model=vgg16_v2(num_classes,use_pretrained=True)

In [None]:
np.random.seed(seed)
torch.manual_seed(seed)
n_epochs = 20
learning_rate = 1e-2
criterion = nn.CrossEntropyLoss()
#optimizer=torch.optim.Adam(vgg16_model.parameters(), lr=learning_rate,  weight_decay=learning_rate/n_epochs)
optimizer = torch.optim.SGD(vgg16_model.parameters(), lr=learning_rate)
use_scheduler = False   # Set True if using scheduler


In [None]:
validation_our_model_results, conf_matrix_our_model_val = validate_model_kfold(vgg16_model, model_name = 'vgg16', k_folds = 5)

In [None]:
def load_test_data(data_path="../input/testfolder/", num_workers=num_workers):
    transform_dict = {
        'model': transforms.Compose(
                                    [transforms.Resize(224),
                                     transforms.CenterCrop(224),
                                     transforms.ToTensor(),
                                     ])}
    test_data = datasets.ImageFolder(root=data_path, transform=transform_dict['model'])
    test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=False, num_workers=num_workers)
    return  test_data, test_loader

test_data, test_loader = load_test_data()
dataset_test = torch.utils.data.ConcatDataset([test_data])

In [None]:
def calc_pred(model, loader):
    predictions = []
    with torch.no_grad():
        for data, target in loader:
            data = data.to(device)
            Y_pred_orig = model(data)
            _, Y_pred_tag = torch.max(Y_pred_orig, dim = 1)
            Y_pred_tag = Y_pred_tag.detach().cpu().numpy()
            Y_pred_tag = Y_pred_tag.reshape(len(Y_pred_tag), 1).astype('int8')
           
            predictions.append(Y_pred_tag)
    return predictions

In [None]:
### print(f"Testing the model {model_name} with {dataset_test['TOTAL', 'test']} samples \n") 
predictions= calc_pred(pretrained_model, test_loader)

In [None]:
my_submission = pd.DataFrame(columns=["case","class"])


In [None]:
# you could use any filename. We choose submission here
my_submission.to_csv('submission.csv', index=False)

In [None]:
f = open(MODEL_PATH+"submission.csv", "w")
with torch.no_grad():
    for i,(data, target,samples_fname) in enumerate(test_loader, 0):
        data = data.to(device)
        Y_pred_orig = pretrained_model(data)
        _, Y_pred_tag = torch.max(Y_pred_orig, dim = 1)
        Y_pred_tag = Y_pred_tag.detach().cpu().numpy()
        Y_pred_tag = Y_pred_tag.reshape(len(Y_pred_tag), 1).astype('int8')
        f.write("\n".join([
            ", ".join(x)
            for x in zip(map(str,Y_pred_tag.tolist()), samples_fname)
        ]) + "\n")

f.close()

In [None]:
model_name="vgg16"
vgg16_model = generate_vgg16(  num_classes, use_pretrained=True)
pretrained_vgg16_model, df_vgg16_epochs = train_val_model(vgg16_model)

In [None]:
plot_train_val_losses(df_vgg16_epochs)

In [None]:
test_vgg_results, conf_vgg_matrix_test = accuracy_model(pretrained_vgg16_model, test_loader)

In [None]:
from tensorflow.keras.callbacks import Callback

In [None]:
# import the necessary packages
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import cv2
import os
from sklearn.utils import shuffle
from sklearn.preprocessing import LabelEncoder

In [None]:
class Configuration(object):
    """Dump stuff here"""

CONFIG = Configuration()
# parameters for the training:
CONFIG.batch_size = 100 # As the model changes in size, play with the batch size to best fit the process in memory
CONFIG.epochs = 30 # due to mini-epochs.
CONFIG.steps_per_epoch = 1000 # This is a mini-epoch. Using News 2013 an epoch would need to be ~60K.
CONFIG.validation_steps = 10
CONFIG.number_of_iterations = 10
CONFIG.INIT_LR = 1e-3
# since we are using Jupyter Notebooks we can replace our argument
# parsing code with *hard coded* arguments and values
args = {
    "dataset": "../input/covid19gc/train",
    "validation": "../input/covid19gc/validation",
     "testing": "../input/covid19gc/testing",
    "plot": "plot.png",
    "model": "covid19.model"
}



In [None]:
def generate_model():
        # load the VGG16 network, ensuring the head FC layer sets are left
    # off
    baseModel = VGG16(weights="imagenet", include_top=False,
        input_tensor=Input(shape=(224, 224, 3)))

    # construct the head of the model that will be placed on top of the
    # the base model
    headModel = baseModel.output
    headModel = AveragePooling2D(pool_size=(4, 4))(headModel)
    headModel = Flatten(name="flatten")(headModel)
    headModel = Dense(64, activation="relu")(headModel)
    headModel = Dropout(0.5)(headModel)
    headModel = Dense(3, activation="softmax")(headModel)

    # place the head FC model on top of the base model (this will become
    # the actual model we will train)
    model = Model(inputs=baseModel.input, outputs=headModel)

    # loop over all layers in the base model and freeze them so they will
    # *not* be updated during the first training process
    for layer in baseModel.layers:
        layer.trainable = False
    # compile our model
    print("[INFO] compiling model...")
    opt = Adam(lr=CONFIG.INIT_LR , decay=CONFIG.INIT_LR  / CONFIG.epochs )
    model.compile(loss="categorical_crossentropy", optimizer=opt,
        metrics=["accuracy"])
    return model

In [None]:
# grab the list of images in our dataset directory, then initialize
# the list of data (i.e., images) and class images
def loadIamge(path):
    print("[INFO] loading images...")
    imagePaths = list(paths.list_images(path))
    data = []
    labels = []
    cnx=0
    max_image = 1000
    # loop over the image paths
    for imagePath in imagePaths:

        # extract the class label from the filename
        label = imagePath.split(os.path.sep)[-2]

        # load the image, swap color channels, and resize it to be a fixed
        # 224x224 pixels while ignoring aspect ratio
        image = cv2.imread(imagePath)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, (224, 224))
        # update the data and labels lists, respectively
        data.append(image)
        labels.append(label)
        cnx+=1
    return data,labels



In [None]:
def dataTransformation(data,labels):
    data_shuffled,labels_shuffled=shuffle(data, labels)
    labels_shuffled_np=np.array(labels_shuffled)
    
    
    vec = lb.fit_transform(labels_shuffled_np)
    labels_cate=to_categorical(vec)
    return data_shuffled,labels_cate

In [None]:
def generator(data, labels):
    """Returns a tuple (inputs, targets)
    All arrays should contain the same number of samples.
    The generator is expected to loop over its data indefinitely.
    An epoch finishes when  samples_per_epoch samples have been seen by the model.
    """
    while True:
        for i in range(0, len(labels), CONFIG.batch_size):
            X, y = np.array(data[i:i+ CONFIG.batch_size]),labels[i:i+ CONFIG.batch_size]
            yield X, y
                    

In [None]:
lb = LabelEncoder()

In [None]:

data,labels=loadIamge(args["dataset"])
data,labels=dataTransformation(data,labels)
data_val,labels_val=loadIamge(args["validation"])
data_val,labels_val=dataTransformation(data_val,labels_val)
data_test,labels_test=loadIamge(args["testing"])
data_test,labels_test=dataTransformation(data_test,labels_test)
#(valX, testX, valY, testY) = train_test_split(data_val, labels_val,test_size=0.20, stratify=labels_val, random_state=42)
    

In [None]:
class OnEpochEndCallback(Callback):
    """Execute this every end of epoch"""

    def on_epoch_end(self, epoch, logs=None):
        """On Epoch end - do some stats"""
        X_val, y_val = next(generator(valX,valY))
        print_random_predictions(self.model,  X_val, y_val)
        self.model.save(SAVED_MODEL_FILE_NAME.format(epoch))

In [None]:

def print_random_predictions(model, X_val, y_val):
    """Select 10 samples from the validation set at random so we can visualize errors"""
    print()
    
        # make predictions on the testing set
    print("[INFO] evaluating network...")
    #predIdxs = model.predict(X_val, batch_size=CONFIG.batch_size)

    # for each image in the testing set we need to find the index of the
    # label with corresponding largest predicted probability
    #predIdxs = np.argmax(predIdxs, axis=1)
    
    scores = model.evaluate(X_val, y_val, verbose=0)
    print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

    # show a nicely formatted classification report
   # print(classification_report(y_val.argmax(axis=1), predIdxs,target_names=lb.classes_))
    print()


In [None]:
ON_EPOCH_END_CALLBACK = OnEpochEndCallback()

In [None]:
def itarative_train(model,data,labels,data_val,labels_val):
    """
    Iterative training of the model
     - To allow for finite RAM...
     - To allow infinite training data as the training noise is injected in runtime
    """
    model.fit_generator(generator(data,labels), steps_per_epoch=CONFIG.steps_per_epoch,
                        epochs=CONFIG.epochs,
                        verbose=1, callbacks=[ON_EPOCH_END_CALLBACK, ], validation_data=generator(data_val,labels_val),
                        validation_steps=CONFIG.validation_steps,
                        class_weight=None, max_queue_size=10, workers=1,
                        initial_epoch=0)


In [None]:
model=generate_model()


In [None]:
itarative_train(model,data,labels,valX,valY)