In [1]:
import pydub # faster than librosa
import numpy      as np
import pandas     as pd
import time

from coughvid.pytorch.coughvid_dataset import CoughvidDataset
from coughvid.pytorch.coughvid_dataset import CoughvidDataset
from coughvid.pytorch.coswara_dataset import CoswaraDataset
import torch
from torch.utils.data import DataLoader, SubsetRandomSampler
from torchvision.models import resnet50, resnet18

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import copy


#from torch.utils.tensorboard import SummaryWriter
#writer = SummaryWriter()

In [2]:
dir = 'C:/COUGHVID_public_dataset/public_dataset/'

full_dataset = CoughvidDataset(dir, 'metadata_compiled.csv', get_features=True)

dataframe = full_dataset.dataframe

minority_class_count = len(dataframe[dataframe['status']==2])

print(f'{minority_class_count} samples in the minority class.')

sample_dataset = CoughvidDataset(dir, 'metadata_compiled.csv', get_features=True, samples_per_class=minority_class_count)

Mask file C:/COUGHVID_public_dataset/public_dataset/ does not exist. Calculating masks on the fly.
8997 records ready to load across 2 groups.
699 samples in the minority class.


In [2]:
dir = './data/coswara/'

full_dataset = CoswaraDataset(dir, 'filtered_data.csv', get_features=True)

dataframe = full_dataset.dataframe

minority_class_count = len(dataframe[dataframe['covid_status']==1])

print(f'{minority_class_count} samples in the minority class.')

sample_dataset = CoswaraDataset(dir, 'filtered_data.csv', get_features=True, samples_per_class=minority_class_count)

1758 records ready to load across 2 groups.
379 samples in the minority class.
758 records ready to load across 2 groups.


In [3]:
# split data into training and test samples
train_indices, test_indices = train_test_split(np.arange(0,len(sample_dataset)-1), test_size=0.25)

batch_size  = 1
num_workers = 2

train_loader  = DataLoader(sample_dataset, 
                           num_workers=num_workers,
                           sampler=SubsetRandomSampler(train_indices)
                          )

test_loader   = DataLoader(sample_dataset, 
                           num_workers=num_workers,
                           sampler=SubsetRandomSampler(test_indices)
                          )

dataloaders = {
    "train": train_loader,
    "test": test_loader
}

In [4]:
# load model and change output shape for binary prediction
model = resnet50()

model.fc = torch.nn.Sequential(
    torch.nn.Linear(
        in_features=2048, #2048 for resnet50
        out_features=1
    ),
    torch.nn.Sigmoid()
)

model.conv1 = torch.nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3,bias=True)

model.double()

optimizer = torch.optim.Adam(model.parameters())
criterion = torch.nn.BCELoss()

In [7]:
class LogisticRegression(torch.nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LogisticRegression, self).__init__()
        self.linear = torch.nn.Linear(input_dim, output_dim)

    def forward(self, x):
        outputs = self.linear(x)
        return outputs
    
batch_size = 100
n_iters = 3000
EPOCHS = int(n_iters / (len(sample_dataset) / batch_size))

lr_rate = 0.00001    
    
model = LogisticRegression(4050,1)
optimizer = torch.optim.SGD(model.parameters(), lr=lr_rate)
criterion = torch.nn.BCELoss()
model.double()


LogisticRegression(
  (linear): Linear(in_features=4050, out_features=1, bias=True)
)

In [None]:
'''RESNET training code adapted from https://www.kaggle.com/gxkok21/resnet50-with-pytorch'''

#if torch.cuda.is_available():
#    model = model.cuda()
#    print('Using GPU.')

best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0

EPOCHS = 100

for i in range(EPOCHS):
    for phase in ["train", "test"]:
        if phase == "train":
            model.train()
        else:
            model.eval()
        
        samples = 0
        loss_sum = 0
        correct_sum = 0
        for j, batch in enumerate(dataloaders[phase]):
            X,labels = batch
            #print(labels)
            #print(len(X),len(X[0]),len(X[1]))
            #print(labels[None,...].double())
            #X = torch.Tensor(X)
            #print(X.shape)
            #labels = 1-labels
            if torch.cuda.is_available():
                X = X.cuda()
                labels = labels.cuda()
                model = model.cuda()

            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                y = model(X[None,...].double())
                #y = model(X.flatten().double())
                #print(y,labels)
                loss = criterion(
                    y,
                    #torch.clip(y,0,1), 
                    labels[None,...].double()
                    #labels.double()
                )
                #print(loss.item())
                #print(labels[None,...].double())
                
                #writer.add_scalar("Loss/train", loss, i)

                if phase == "train":
                    loss.backward()
                    optimizer.step()
                    
                loss_sum += loss.item() * X.shape[0] # We need to multiple by batch size as loss is the mean loss of the samples in the batch
                samples += X.shape[0]
                num_corrects = torch.sum((y >= 0.5).float() == labels[0].float())
                correct_sum += num_corrects
                
                # Print batch statistics every 50 batches
                if j % 50 == 49 and phase == "train":
                    print("{}:{} - loss: {}, acc: {}".format(
                        i + 1, 
                        j + 1, 
                        float(loss_sum) / float(samples), 
                        float(correct_sum) / float(samples)
                    ))
                
        # Print epoch statistics
        epoch_acc = float(correct_sum) / float(samples)
        epoch_loss = float(loss_sum) / float(samples)
        print("epoch: {} - {} loss: {}, {} acc: {}".format(i + 1, phase, epoch_loss, phase, epoch_acc))
        
        # Deep copy the model
        if phase == "test" and epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_wts = copy.deepcopy(model.state_dict())
            torch.save(best_model_wts, "resnet50.pth")

1:50 - loss: 1.3773438241659657, acc: 0.52
1:100 - loss: 1.210422534143371, acc: 0.5
1:150 - loss: 1.103712671869177, acc: 0.5133333333333333
1:200 - loss: 1.1194825138055535, acc: 0.495
1:250 - loss: 1.0582234638692185, acc: 0.492
1:300 - loss: 1.036168053782516, acc: 0.48333333333333334
1:350 - loss: 1.0162485492627167, acc: 0.4828571428571429
1:400 - loss: 0.9997444971665211, acc: 0.4775
1:450 - loss: 1.0008785378209286, acc: 0.4688888888888889
1:500 - loss: 0.9949616565729389, acc: 0.47
1:550 - loss: 0.9756683694603254, acc: 0.4709090909090909
epoch: 1 - train loss: 0.9722796260657434, train acc: 0.4691358024691358
epoch: 1 - test loss: 0.705419147142651, test acc: 0.5789473684210527
2:50 - loss: 0.7777113691118274, acc: 0.54
2:100 - loss: 0.8208505019383524, acc: 0.54
2:150 - loss: 0.8166100903143694, acc: 0.58
2:200 - loss: 0.8130358767633598, acc: 0.575
2:250 - loss: 0.8015454872175428, acc: 0.568
2:300 - loss: 0.7883982937300786, acc: 0.56
2:350 - loss: 0.7891802490850887, acc:

epoch: 12 - test loss: 0.6497733136009725, test acc: 0.5789473684210527
13:50 - loss: 0.6459336890685639, acc: 0.68
13:100 - loss: 0.6881361544733993, acc: 0.59
13:150 - loss: 0.7006788023286343, acc: 0.54
13:200 - loss: 0.6986750918813897, acc: 0.505
13:250 - loss: 0.6927291265910736, acc: 0.516
13:300 - loss: 0.7030358818236778, acc: 0.52
13:350 - loss: 0.7075149721593521, acc: 0.52
13:400 - loss: 0.7166119093916383, acc: 0.5125
13:450 - loss: 0.712567375207528, acc: 0.5155555555555555
13:500 - loss: 0.713166963153416, acc: 0.522
13:550 - loss: 0.7163538515616824, acc: 0.509090909090909
epoch: 13 - train loss: 0.7163495609513182, train acc: 0.5079365079365079
epoch: 13 - test loss: 0.670345557303403, test acc: 0.6210526315789474
14:50 - loss: 0.6314504265135681, acc: 0.7
14:100 - loss: 0.6651339576913995, acc: 0.61
14:150 - loss: 0.6802941735349299, acc: 0.6066666666666667
14:200 - loss: 0.6870106328468043, acc: 0.59
14:250 - loss: 0.6927060865885185, acc: 0.56
14:300 - loss: 0.69011

24:550 - loss: 0.7012299602022262, acc: 0.5145454545454545
epoch: 24 - train loss: 0.6981589346159892, train acc: 0.5220458553791887
epoch: 24 - test loss: 0.8882429895381964, test acc: 0.4631578947368421
25:50 - loss: 0.7164761710052149, acc: 0.58
25:100 - loss: 0.7175332925762434, acc: 0.57
25:150 - loss: 0.7126058916661332, acc: 0.54
25:200 - loss: 0.6996134398137192, acc: 0.575
25:250 - loss: 0.7021722682850365, acc: 0.556
25:300 - loss: 0.701345206787489, acc: 0.53
25:350 - loss: 0.7008874712099208, acc: 0.5285714285714286
25:400 - loss: 0.7011319000850168, acc: 0.51
25:450 - loss: 0.6995986829868895, acc: 0.52
25:500 - loss: 0.7006153934417052, acc: 0.524
25:550 - loss: 0.7013042543503863, acc: 0.5236363636363637
epoch: 25 - train loss: 0.7006199886643599, train acc: 0.5255731922398589
epoch: 25 - test loss: 0.7006045699863187, test acc: 0.5210526315789473
26:50 - loss: 0.6901898606985469, acc: 0.54
26:100 - loss: 0.6963570495158491, acc: 0.48
26:150 - loss: 0.6924813865871045, a

36:450 - loss: 0.6967925814708688, acc: 0.54
36:500 - loss: 0.6975158467137941, acc: 0.538
36:550 - loss: 0.6974719005140779, acc: 0.5345454545454545
epoch: 36 - train loss: 0.695372229761262, train acc: 0.5396825396825397
epoch: 36 - test loss: 0.7152334613513028, test acc: 0.46842105263157896
37:50 - loss: 0.7311657259266338, acc: 0.52
37:100 - loss: 0.7158984860306568, acc: 0.55
37:150 - loss: 0.7189938340528269, acc: 0.49333333333333335
37:200 - loss: 0.7162933681715085, acc: 0.49
37:250 - loss: 0.7144279920181017, acc: 0.48
37:300 - loss: 0.7125314472694965, acc: 0.48333333333333334
37:350 - loss: 0.7109087868279047, acc: 0.48857142857142855
37:400 - loss: 0.7092613519211731, acc: 0.495
37:450 - loss: 0.7057228869635774, acc: 0.5088888888888888
37:500 - loss: 0.7021457016797473, acc: 0.518
37:550 - loss: 0.6998010595644683, acc: 0.5236363636363637
epoch: 37 - train loss: 0.6946481040810284, train acc: 0.5308641975308642
epoch: 37 - test loss: 1.6558484749160847, test acc: 0.463157

48:350 - loss: 0.6003172181738955, acc: 0.7028571428571428
48:400 - loss: 0.5873342143304776, acc: 0.7125
48:450 - loss: 0.5930284361425381, acc: 0.7022222222222222
48:500 - loss: 0.5885370514420438, acc: 0.702
48:550 - loss: 0.5781107280927805, acc: 0.7
epoch: 48 - train loss: 0.5722905040869994, train acc: 0.7054673721340388
epoch: 48 - test loss: 1.7094089076699264, test acc: 0.5105263157894737
49:50 - loss: 0.5881008464833698, acc: 0.78
49:100 - loss: 0.5558975905252046, acc: 0.77
49:150 - loss: 0.557953758688539, acc: 0.7733333333333333
49:200 - loss: 0.5683528411804802, acc: 0.76
49:250 - loss: 0.5747035179457198, acc: 0.74
49:300 - loss: 0.5862895970803156, acc: 0.7166666666666667
49:350 - loss: 0.5862765252574, acc: 0.7114285714285714
49:400 - loss: 0.5816830447003042, acc: 0.705
49:450 - loss: 0.5754372337565681, acc: 0.7022222222222222
49:500 - loss: 0.5653453963377285, acc: 0.712
49:550 - loss: 0.5631372932779359, acc: 0.7090909090909091
epoch: 49 - train loss: 0.56018563108

60:250 - loss: 0.32123315043222883, acc: 0.88
60:300 - loss: 0.32762987995487397, acc: 0.8666666666666667
60:350 - loss: 0.3261254626140917, acc: 0.8657142857142858
60:400 - loss: 0.31964412405847276, acc: 0.8725
60:450 - loss: 0.3104597997177522, acc: 0.8777777777777778
60:500 - loss: 0.31532099597582325, acc: 0.88
