# Train Binary Classifiers

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import itertools
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
from lung_dataset import Lung_Dataset
from torch.utils.data import DataLoader
from functions import train, Net, Net4, plot_learning_curve,save_metrics

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

## Normal vs Infected

In [5]:
bs_val = 20
# from Train import train, load, plot_learning_curve
batchsizes = [8, 16, 20]
l_rates = [0.001, 0.005, 0.0001, 0.0005]
p_dropout = [0.1, 0.25, 0.5, 0.75, 0.9]
max_w_acc = 0
best_model = "None"

for bs_val, lrate, pdrop in itertools.product(batchsizes, l_rates, p_dropout):

    ld_train = Lung_Dataset("train", "normal/infected")
    trainloader = DataLoader(ld_train, batch_size = bs_val, shuffle = True)

    ld_test = Lung_Dataset("test", "normal/infected")
    testloader = DataLoader(ld_test, batch_size = bs_val, shuffle = True)

    first_net = Net(num_classes=2, pd=pdrop)

    optimizer = optim.Adam(first_net.parameters(), lr=lrate)
    criterion = nn.CrossEntropyLoss()

    print("Train batches {}, Test batches {}".format(len(trainloader), len(testloader)))
    num_epochs = 10
    model = first_net
    save_path = './actual_saved_models'
    img_path = './plots'
    results_path = './results'
    model_name = str(bs_val)+'_'+str(lrate)+'_'+str(pdrop)+'_ni'
    binary = True
    verbose = False

    loss_list, acc_list, test_loss_list, test_acc_list, path_list, num_param = train(num_epochs, model, trainloader, testloader, criterion, optimizer, save_path, model_name, binary, verbose)
    
    if max(test_acc_list)>max_w_acc:
        max_w_acc=max(test_acc_list)
        best_model = model_name
    
    plot_learning_curve(loss_list, acc_list, test_loss_list, test_acc_list, img_path, model_name)
    save_metrics(results_path, model_name, loss_list, acc_list, test_loss_list, test_acc_list, path_list, num_param)

## Infected vs Covid

In [20]:
# from Train import train, load, plot_learning_curve
batchsizes = [8, 16, 20]
l_rates = [0.001, 0.005, 0.0001, 0.0005]
p_dropout = [0.1, 0.25, 0.5, 0.75, 0.9]
max_w_acc = 0
best_model = "None"

for bs_val, lrate, pdrop in itertools.product(batchsizes, l_rates, p_dropout):

    second_net = Net4(num_classes=2,pd=pdrop)

    ld = Lung_Dataset("train", "covid/non-covid")
    trainloader = DataLoader(ld, batch_size = bs_val, shuffle = True)

    ld = Lung_Dataset("test", "covid/non-covid")
    testloader = DataLoader(ld, batch_size = bs_val, shuffle = True)


    optimizer = optim.Adam(second_net.parameters(), lr=lrate)

    print("Train batches {}, Test batches {}".format(len(trainloader), len(testloader)))
    num_epochs = 10
    model = second_net
    save_path = './actual_saved_models'
    img_path = './plots'
    results_path = './results'
    model_name = str(bs_val)+'_'+str(lrate)+'_'+str(pdrop)+'_cnc'
    binary = True
    verbose = False
    loss_list, acc_list, test_loss_list, test_acc_list, path_list, num_param = train(num_epochs, model, trainloader, testloader, criterion, optimizer, save_path, model_name, binary, verbose)
    
    if max(test_acc_list)>max_w_acc:
        max_w_acc=max(test_acc_list)
        best_model = model_name
    
    plot_learning_curve(loss_list, acc_list, test_loss_list, test_acc_list, img_path, model_name)
    save_metrics(results_path, model_name, loss_list, acc_list, test_loss_list, test_acc_list, path_list, num_param)

## Two stage classification

In [14]:
import collections

def two_stage_testing(testloader, first_model, second_model):
        first_model.eval()
        second_model.eval()
        first_model = first_model.to(device)
        second_model = second_model.to(device)
        
        first_stage_labels = []
        second_stage_labels = []
        actual_labels = []
        
        for i, data in enumerate(testloader, 0):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = first_model(inputs)
            _, predicted = torch.max(outputs, 1)
            first_stage_labels.extend(predicted)

            outputs = second_model(inputs)
            _, predicted = torch.max(outputs, 1)
            second_stage_labels.extend(predicted)
            
            actual_labels.extend(labels)
            
        
        predicted_labels = [0 if not first_label else 2 if not second_label else 1 for
                            first_label, second_label in zip(first_stage_labels, second_stage_labels)]

        first_stage_labels = [x.item() for x in first_stage_labels]
        second_stage_labels = [x.item() for x in second_stage_labels]
        actual_labels = [x.item() for x in actual_labels]
        print(first_stage_labels[:10])
        print(second_stage_labels[:10])
        print(predicted_labels[:10])
        print(actual_labels[:10])
        
        print(collections.Counter(predicted_labels))
        print(collections.Counter(actual_labels))
        
        accuracy = sum(actual == predicted for actual, predicted in zip(actual_labels, predicted_labels))/len(actual_labels)
        print("accuracy {:.3f}".format(accuracy))

run the testloader for two class classification

In [18]:
ld = Lung_Dataset("test", "normal/non-covid/covid")
testloader = DataLoader(ld, batch_size = bs_val, shuffle = True)

two_stage_testing(testloader, first_net, second_net)