In [1]:
import pydub # faster than librosa
import numpy      as np
import pandas     as pd
import time

from coughvid.pytorch.coughvid_dataset import CoughvidDataset
from coughvid.pytorch.coughvid_dataset import CoughvidDataset
from coughvid.pytorch.coswara_dataset import CoswaraDataset
import torch
from torch.utils.data import DataLoader, SubsetRandomSampler
from torchvision.models import resnet50, resnet18

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import copy


#from torch.utils.tensorboard import SummaryWriter
#writer = SummaryWriter()

In [2]:
dir = 'C:/COUGHVID_public_dataset/public_dataset/'

full_dataset = CoughvidDataset(dir, 'metadata_compiled.csv', get_features=True)

dataframe = full_dataset.dataframe

minority_class_count = len(dataframe[dataframe['status']==1])

print(f'{minority_class_count} samples in the minority class.')

sample_dataset = CoughvidDataset(dir, 'metadata_compiled.csv', get_features=True, samples_per_class=minority_class_count)

Mask file C:/COUGHVID_public_dataset/public_dataset/ does not exist. Calculating masks on the fly.
8997 records ready to load across 2 groups.
699 samples in the minority class.
Mask file C:/COUGHVID_public_dataset/public_dataset/ does not exist. Calculating masks on the fly.
1398 records ready to load across 2 groups.


In [2]:
dir = './data/coswara/'

full_dataset = CoswaraDataset(dir, 'filtered_data.csv', get_features=True)

dataframe = full_dataset.dataframe

minority_class_count = len(dataframe[dataframe['covid_status']==1])

print(f'{minority_class_count} samples in the minority class.')

sample_dataset = CoswaraDataset(dir, 'filtered_data.csv', get_features=True, samples_per_class=minority_class_count)

1758 records ready to load across 2 groups.
379 samples in the minority class.
758 records ready to load across 2 groups.


In [3]:
# split data into training and test samples
train_indices, test_indices = train_test_split(np.arange(0,len(sample_dataset)-1), test_size=0.25)

batch_size  = 1
num_workers = 2

train_loader  = DataLoader(sample_dataset, 
                           num_workers=num_workers,
                           sampler=SubsetRandomSampler(train_indices)
                          )

test_loader   = DataLoader(sample_dataset, 
                           num_workers=num_workers,
                           sampler=SubsetRandomSampler(test_indices)
                          )

dataloaders = {
    "train": train_loader,
    "test": test_loader
}

In [4]:
# load model and change output shape for binary prediction
model = resnet18()

model.fc = torch.nn.Sequential(
    torch.nn.Dropout(0.10,inplace=True),
    torch.nn.Linear(
        in_features=512, #2048 for resnet50
        out_features=1
    ),
    torch.nn.Sigmoid()
)

model.conv1 = torch.nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3,bias=True)


optimizer = torch.optim.Adam(model.parameters())
criterion = torch.nn.BCELoss()

model.double()

ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2):

In [7]:
class LogisticRegression(torch.nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LogisticRegression, self).__init__()
        self.linear = torch.nn.Linear(input_dim, output_dim)

    def forward(self, x):
        outputs = self.linear(x)
        return outputs
    
batch_size = 100
n_iters = 3000
EPOCHS = int(n_iters / (len(sample_dataset) / batch_size))

lr_rate = 0.00001    
    
model = LogisticRegression(4050,1)
optimizer = torch.optim.SGD(model.parameters(), lr=lr_rate)
criterion = torch.nn.BCELoss()
model.double()


LogisticRegression(
  (linear): Linear(in_features=4050, out_features=1, bias=True)
)

In [5]:
'''RESNET training code adapted from https://www.kaggle.com/gxkok21/resnet50-with-pytorch'''

#if torch.cuda.is_available():
#    model = model.cuda()
#    print('Using GPU.')

best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0

EPOCHS = 100

for i in range(EPOCHS):
    for phase in ["train", "test"]:
        if phase == "train":
            model.train()
        else:
            model.eval()
        
        samples = 0
        loss_sum = 0
        correct_sum = 0
        for j, batch in enumerate(dataloaders[phase]):
            X,labels = batch
            #print(labels)
            #print(len(X),len(X[0]),len(X[1]))
            #print(labels[None,...].double())
            #X = torch.Tensor(X)
            #print(X.shape)
            #labels = 1-labels
            if torch.cuda.is_available():
                X = X.cuda()
                labels = labels.cuda()
                model = model.cuda()

            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                y = model(X[None,...].double())
                #y = model(X.flatten().double())
                #print(y,labels)
                loss = criterion(
                    y,
                    #torch.clip(y,0,1), 
                    labels[None,...].double()
                    #labels.double()
                )
                #print(loss.item())
                #print(labels[None,...].double())
                
                #writer.add_scalar("Loss/train", loss, i)

                if phase == "train":
                    loss.backward()
                    optimizer.step()
                    
                loss_sum += loss.item() * X.shape[0] # We need to multiple by batch size as loss is the mean loss of the samples in the batch
                samples += X.shape[0]
                num_corrects = torch.sum((y >= 0.5).float() == labels[0].float())
                correct_sum += num_corrects
                
                # Print batch statistics every 50 batches
                if j % 50 == 49 and phase == "train":
                    print("{}:{} - loss: {}, acc: {}".format(
                        i + 1, 
                        j + 1, 
                        float(loss_sum) / float(samples), 
                        float(correct_sum) / float(samples)
                    ))
                
        # Print epoch statistics
        epoch_acc = float(correct_sum) / float(samples)
        epoch_loss = float(loss_sum) / float(samples)
        print("epoch: {} - {} loss: {}, {} acc: {}".format(i + 1, phase, epoch_loss, phase, epoch_acc))
        
        # Deep copy the model
        if phase == "test" and epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_wts = copy.deepcopy(model.state_dict())
            torch.save(best_model_wts, "resnet18_coswara2.pth")

1:50 - loss: 0.8269391575306768, acc: 0.54
1:100 - loss: 0.8007805970205196, acc: 0.54
1:150 - loss: 0.7589026590127247, acc: 0.5466666666666666
1:200 - loss: 0.765497807714462, acc: 0.545
1:250 - loss: 0.7692785551749199, acc: 0.532
1:300 - loss: 0.7680838852590592, acc: 0.5133333333333333
1:350 - loss: 0.7801158084574296, acc: 0.5085714285714286
1:400 - loss: 0.7741961137682348, acc: 0.515
1:450 - loss: 0.7683172856743179, acc: 0.52
1:500 - loss: 0.771655032604613, acc: 0.51
1:550 - loss: 0.7721034887728451, acc: 0.5054545454545455
1:600 - loss: 0.7725281898275577, acc: 0.5016666666666667
1:650 - loss: 0.7702950134569199, acc: 0.5030769230769231
1:700 - loss: 0.7716291003629021, acc: 0.5
1:750 - loss: 0.7666966620427148, acc: 0.5093333333333333
1:800 - loss: 0.7689655251296785, acc: 0.5025
1:850 - loss: 0.7676761053436842, acc: 0.508235294117647
1:900 - loss: 0.7703142958751942, acc: 0.5066666666666667
1:950 - loss: 0.7731526118027576, acc: 0.49894736842105264
1:1000 - loss: 0.770804

8:100 - loss: 0.7047864834085682, acc: 0.52
8:150 - loss: 0.6954771950890262, acc: 0.54
8:200 - loss: 0.6963243593377613, acc: 0.54
8:250 - loss: 0.7035103052434206, acc: 0.524
8:300 - loss: 0.7038057289919544, acc: 0.52
8:350 - loss: 0.7034293392769965, acc: 0.5171428571428571
8:400 - loss: 0.703688295222219, acc: 0.52
8:450 - loss: 0.7037035359533019, acc: 0.5133333333333333
8:500 - loss: 0.701281804378787, acc: 0.528
8:550 - loss: 0.7025403846480507, acc: 0.5272727272727272
8:600 - loss: 0.7048186170310602, acc: 0.5133333333333333
8:650 - loss: 0.7034872867974259, acc: 0.5215384615384615
8:700 - loss: 0.7020020574590516, acc: 0.5271428571428571
8:750 - loss: 0.702836937060866, acc: 0.524
8:800 - loss: 0.7033285439115268, acc: 0.525
8:850 - loss: 0.7043462355351569, acc: 0.5211764705882352
8:900 - loss: 0.7047383639401397, acc: 0.5177777777777778
8:950 - loss: 0.7039643744927112, acc: 0.5221052631578947
8:1000 - loss: 0.7042889678925687, acc: 0.522
epoch: 8 - train loss: 0.7042871398

epoch: 14 - test loss: 0.6897806521008446, test acc: 0.52
15:50 - loss: 0.7020997133142372, acc: 0.46
15:100 - loss: 0.7066923864325787, acc: 0.49
15:150 - loss: 0.7307390412879211, acc: 0.49333333333333335
15:200 - loss: 0.7226274207275816, acc: 0.51
15:250 - loss: 0.7244939763502465, acc: 0.512
15:300 - loss: 0.7207207547511274, acc: 0.5066666666666667
15:350 - loss: 0.7201535386994248, acc: 0.49714285714285716
15:400 - loss: 0.7183589399974576, acc: 0.4925
15:450 - loss: 0.7140894242066238, acc: 0.5066666666666667
15:500 - loss: 0.7107478277907272, acc: 0.514
15:550 - loss: 0.7098367478868374, acc: 0.5163636363636364
15:600 - loss: 0.7077328036967196, acc: 0.52
15:650 - loss: 0.707721327412376, acc: 0.5184615384615384
15:700 - loss: 0.706536168826129, acc: 0.52
15:750 - loss: 0.7054127828535458, acc: 0.5226666666666666
15:800 - loss: 0.7052663332899651, acc: 0.52
15:850 - loss: 0.7048378465344046, acc: 0.5164705882352941
15:900 - loss: 0.7046270867241079, acc: 0.5166666666666667
15:

epoch: 21 - train loss: 0.6601983161412964, train acc: 0.6045845272206304
epoch: 21 - test loss: 0.6873000051020562, test acc: 0.5257142857142857
22:50 - loss: 0.6141985283614253, acc: 0.62
22:100 - loss: 0.6555332538471872, acc: 0.58
22:150 - loss: 0.6740660484697962, acc: 0.5933333333333334
22:200 - loss: 0.6478381080033941, acc: 0.635
22:250 - loss: 0.643901933157816, acc: 0.636
22:300 - loss: 0.6448946758965356, acc: 0.64
22:350 - loss: 0.6455425450202955, acc: 0.6428571428571429
22:400 - loss: 0.647034483599192, acc: 0.6425
22:450 - loss: 0.6390241957362777, acc: 0.6555555555555556
22:500 - loss: 0.6430751700473487, acc: 0.648
22:550 - loss: 0.6404810121684715, acc: 0.6527272727272727
22:600 - loss: 0.6433461274366284, acc: 0.6483333333333333
22:650 - loss: 0.6408120435394521, acc: 0.6523076923076923
22:700 - loss: 0.6426466985797389, acc: 0.6471428571428571
22:750 - loss: 0.6428420127965123, acc: 0.652
22:800 - loss: 0.6459138925963644, acc: 0.6425
22:850 - loss: 0.64379027005363

28:950 - loss: 0.46961989822038447, acc: 0.7957894736842105
28:1000 - loss: 0.4769744145188776, acc: 0.793
epoch: 28 - train loss: 0.4780825081204299, train acc: 0.789875835721108
epoch: 28 - test loss: 0.8066051146815828, test acc: 0.5171428571428571
29:50 - loss: 0.39539131086636947, acc: 0.8
29:100 - loss: 0.36746038321083296, acc: 0.85
29:150 - loss: 0.3969618907077236, acc: 0.8333333333333334
29:200 - loss: 0.3795605498970643, acc: 0.845
29:250 - loss: 0.3861571685426831, acc: 0.836
29:300 - loss: 0.40385531119314116, acc: 0.8233333333333334
29:350 - loss: 0.39810957444053113, acc: 0.8257142857142857
29:400 - loss: 0.40288526108747613, acc: 0.8225
29:450 - loss: 0.4009835115913209, acc: 0.8266666666666667
29:500 - loss: 0.4110343409261006, acc: 0.82
29:550 - loss: 0.410949280579464, acc: 0.82
29:600 - loss: 0.40570014035052654, acc: 0.8183333333333334
29:650 - loss: 0.4144292622140151, acc: 0.816923076923077
29:700 - loss: 0.40952382849477664, acc: 0.82
29:750 - loss: 0.4120181095

35:750 - loss: 0.21217942127034492, acc: 0.9226666666666666
35:800 - loss: 0.22347450544696693, acc: 0.9175
35:850 - loss: 0.2305957808091033, acc: 0.9129411764705883
35:900 - loss: 0.22260549113159303, acc: 0.9177777777777778
35:950 - loss: 0.21526753005826343, acc: 0.9210526315789473
35:1000 - loss: 0.2116075283267791, acc: 0.922
epoch: 35 - train loss: 0.21135832365623203, train acc: 0.9216809933142311
epoch: 35 - test loss: 1.128479567283923, test acc: 0.49142857142857144
36:50 - loss: 0.12143467003711697, acc: 0.94
36:100 - loss: 0.11401965406738991, acc: 0.95
36:150 - loss: 0.09182168377077828, acc: 0.96
36:200 - loss: 0.09046469074192523, acc: 0.965
36:250 - loss: 0.09591598527048337, acc: 0.964
36:300 - loss: 0.088528197805955, acc: 0.9666666666666667
36:350 - loss: 0.08145342900845653, acc: 0.9685714285714285
36:400 - loss: 0.08474327074211478, acc: 0.97
36:450 - loss: 0.08777868512530121, acc: 0.9688888888888889
36:500 - loss: 0.10582821614003007, acc: 0.96
36:550 - loss: 0.1

KeyboardInterrupt: 