In [34]:
import torch as pt
from torch import nn
import torchaudio as ta
from torchsummary import summary
from torch.utils.data import Dataset,DataLoader

import numpy as numpy

import os
import subprocess
import tqdm as tqdm
import json

In [30]:
if pt.cuda.is_available():
    device='cuda'
else:
    device='cpu'

In [31]:
print(device)

cuda


In [21]:
path='data\public_dataset'
for file in os.listdir(path):
    if file.endswith('webm'):
        name,ext=os.path.splitext(file)
        new_name=name+'.wav'
        os.rename(os.path.join(path,file),os.path.join(path,new_name))

In [13]:
def convert_webm_to_wav(file):
    command = ['ffmpeg', '-i', file, file[:-5] + '.wav']
    subprocess.run(command)

In [19]:
path='data\public_dataset'
for file in os.listdir(path):
    if file.endswith('webm'):
        subprocess.run(['ffmpeg','-i',os.path.join(path,file),'test_df/'+file[:-5]+'.wav'])

In [2]:
path='test_df'
name_set=set()
for file in os.listdir(path):
    if file.endswith('wav'):
        name_set.add(file)
print(len(name_set))

25984


In [24]:
t=os.path.join(path,list(name_set)[0])
label_path='data\public_dataset'
fname=t[8:-4]
l=os.path.join(label_path,fname+'.json')
print(fname)
with open(l,'r') as f:
    content=json.loads(f.read())
print(content)

2d71f678-6687-4a2f-bfed-e40fd23b9752
{'datetime': '2020-04-14T18:46:15.579555+00:00', 'cough_detected': '0.8779', 'latitude': '37.8', 'longitude': '29.1', 'age': '8', 'gender': 'male', 'respiratory_condition': 'False', 'fever_muscle_pain': 'False', 'status': 'healthy'}


In [27]:
print(list(name_set)[0])

2d71f678-6687-4a2f-bfed-e40fd23b9752.wav


In [6]:
signal,sr=ta.load(t)

In [16]:
print(content['cough_detected'])

0.9902


In [66]:
class CoughVidDataset(Dataset):

    def __init__(self,audio_path,label_path,transformation,target_sample_rate,num_samples,device):
        name_set=set()
        for file in os.listdir(audio_path):
            if file.endswith('wav'):
                name_set.add(file)
        name_set=list(name_set)
        self.datalist=name_set
        self.audio_path=audio_path
        self.label_path=label_path
        self.device=device
        self.transformation=transformation.to(device)
        self.target_sample_rate=target_sample_rate
        self.num_samples=num_samples
        
    def __len__(self):
        return len(self.datalist)

    def __getitem__(self,idx):
        audio_file_path=os.path.join(self.audio_path,self.datalist[idx])
        label_file_path=os.path.join(self.label_path,self.datalist[idx][:-4]+'.json')
        with open(label_file_path,'r') as f:
            content=json.loads(f.read())
            f.close()
        label=content['cough_detected']
        waveform,sample_rate=ta.load(audio_file_path) #(num_channels,samples) -> (1,samples) makes the waveform mono
        waveform=waveform.to(self.device)
        waveform=self._resample(waveform,sample_rate)   
        waveform=self._mix_down(waveform)
        waveform=self._cut(waveform)
        waveform=self._right_pad(waveform)
        waveform=self.transformation(waveform)
        return waveform,float(label)

    def _resample(self,waveform,sample_rate):
        # used to handle sample rate
        resampler=ta.transforms.Resample(sample_rate,self.target_sample_rate)
        return resampler(waveform)
    
    def _mix_down(self,waveform):
        # used to handle channels
        waveform=pt.mean(waveform,dim=0,keepdim=True)
        return waveform
    
    def _cut(self,waveform):
        # cuts the waveform if it has more than certain samples
        if waveform.shape[1]>self.num_samples:
            waveform=waveform[:,:self.num_samples]
        return waveform
    
    def _right_pad(self,waveform):
        # pads the waveform if it has less than certain samples
        signal_length=waveform.shape[1]
        if signal_length<self.num_samples:
            num_padding=self.num_samples-signal_length
            last_dim_padding=(0,num_padding) # first arg for left second for right padding. Make a list of tuples for multi dim
            waveform=pt.nn.functional.pad(waveform,last_dim_padding)
        return waveform



In [35]:
class CNNNetwork(nn.Module):

    def __init__(self):
        super().__init__()
        self.conv1=nn.Sequential(
            nn.Conv2d(in_channels=1,out_channels=16,kernel_size=3,stride=1,padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.conv2=nn.Sequential(
            nn.Conv2d(in_channels=16,out_channels=32,kernel_size=3,stride=1,padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.conv3=nn.Sequential(
            nn.Conv2d(in_channels=32,out_channels=64,kernel_size=3,stride=1,padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.conv4=nn.Sequential(
            nn.Conv2d(in_channels=64,out_channels=128,kernel_size=3,stride=1,padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.flatten=nn.Flatten()
        self.linear1=nn.Linear(in_features=128*5*4,out_features=128)
        self.linear2=nn.Linear(in_features=128,out_features=1)
        self.output=nn.Sigmoid()
    
    def forward(self,input_data):
        x=self.conv1(input_data)
        x=self.conv2(x)
        x=self.conv3(x)
        x=self.conv4(x)
        x=self.flatten(x)
        x=self.linear1(x)
        logits=self.linear2(x)
        output=self.output(logits)
        
        return output


In [80]:
def train_single_epoch(model,dataloader,loss_fn,optimizer,device):
    for waveform,label in tqdm.tqdm(dataloader):
        waveform=waveform.to(device)
        # label=pt.from_numpy(numpy.array(label))
        label=label.to(device)
        # calculate loss and preds
        logits=model(waveform)
        loss=loss_fn(logits.float(),label.float().view(-1,1))
        # backpropogate the loss and update the gradients
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"loss:{loss.item()}")

In [48]:
def train(model,dataloader,loss_fn,optimizer,device,epochs):
    for i in tqdm.tqdm(range(epochs)):
        print(f"epoch:{i+1}")
        train_single_epoch(model,dataloader,loss_fn,optimizer,device)
        print('-------------------------------------------')
    print('Finished Training')

In [82]:
audio_path='test_df'
label_path='data\public_dataset'
SAMPLE_RATE=22050
NUM_SAMPLES=22050
BATCH_SIZE=128
EPOCHS=1

In [67]:
melspectogram=ta.transforms.MelSpectrogram(sample_rate=SAMPLE_RATE,n_fft=1024,hop_length=512,n_mels=64)
coughvid_dataset=CoughVidDataset(audio_path,label_path,melspectogram,SAMPLE_RATE,NUM_SAMPLES,device)

In [68]:
train_dataloader=DataLoader(coughvid_dataset,batch_size=BATCH_SIZE,shuffle=True)

In [72]:
for waveform,label in train_dataloader:
    print(label.shape,waveform.shape)
    break

torch.Size([128]) torch.Size([128, 1, 64, 44])


In [29]:
signal,label=coughvid_dataset[0]
print(signal.shape)

torch.Size([1, 64, 44])


In [86]:
model=CNNNetwork().cuda()
summary(model,(1,64,44))

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 16, 33, 23]          --
|    └─Conv2d: 2-1                       [-1, 16, 66, 46]          160
|    └─ReLU: 2-2                         [-1, 16, 66, 46]          --
|    └─MaxPool2d: 2-3                    [-1, 16, 33, 23]          --
├─Sequential: 1-2                        [-1, 32, 17, 12]          --
|    └─Conv2d: 2-4                       [-1, 32, 35, 25]          4,640
|    └─ReLU: 2-5                         [-1, 32, 35, 25]          --
|    └─MaxPool2d: 2-6                    [-1, 32, 17, 12]          --
├─Sequential: 1-3                        [-1, 64, 9, 7]            --
|    └─Conv2d: 2-7                       [-1, 64, 19, 14]          18,496
|    └─ReLU: 2-8                         [-1, 64, 19, 14]          --
|    └─MaxPool2d: 2-9                    [-1, 64, 9, 7]            --
├─Sequential: 1-4                        [-1, 128, 5, 4]           --
|    └─

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 16, 33, 23]          --
|    └─Conv2d: 2-1                       [-1, 16, 66, 46]          160
|    └─ReLU: 2-2                         [-1, 16, 66, 46]          --
|    └─MaxPool2d: 2-3                    [-1, 16, 33, 23]          --
├─Sequential: 1-2                        [-1, 32, 17, 12]          --
|    └─Conv2d: 2-4                       [-1, 32, 35, 25]          4,640
|    └─ReLU: 2-5                         [-1, 32, 35, 25]          --
|    └─MaxPool2d: 2-6                    [-1, 32, 17, 12]          --
├─Sequential: 1-3                        [-1, 64, 9, 7]            --
|    └─Conv2d: 2-7                       [-1, 64, 19, 14]          18,496
|    └─ReLU: 2-8                         [-1, 64, 19, 14]          --
|    └─MaxPool2d: 2-9                    [-1, 64, 9, 7]            --
├─Sequential: 1-4                        [-1, 128, 5, 4]           --
|    └─

In [87]:
loss_fn=pt.nn.MSELoss()
optimizer=pt.optim.SGD(model.parameters(),lr=0.1,momentum=0.9)

In [88]:
train(model,train_dataloader,loss_fn,optimizer,device,EPOCHS)

  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/203 [00:00<?, ?it/s][Aepoch:1

  0%|          | 1/203 [00:11<38:19, 11.39s/it][A
  1%|          | 2/203 [00:21<37:12, 11.11s/it][A
  1%|▏         | 3/203 [00:32<36:23, 10.92s/it][A
  2%|▏         | 4/203 [00:41<34:55, 10.53s/it][A
  2%|▏         | 5/203 [00:52<34:37, 10.49s/it][A
  3%|▎         | 6/203 [01:01<33:29, 10.20s/it][A
  3%|▎         | 7/203 [01:12<33:45, 10.33s/it][A
  4%|▍         | 8/203 [01:22<33:05, 10.18s/it][A
  4%|▍         | 9/203 [01:32<32:33, 10.07s/it][A
  5%|▍         | 10/203 [01:42<32:31, 10.11s/it][A
  5%|▌         | 11/203 [01:52<32:25, 10.13s/it][A
  6%|▌         | 12/203 [02:03<32:37, 10.25s/it][A
  6%|▋         | 13/203 [02:13<32:22, 10.23s/it][A
  7%|▋         | 14/203 [02:23<32:09, 10.21s/it][A
  7%|▋         | 15/203 [02:33<31:34, 10.08s/it][A
  8%|▊         | 16/203 [02:42<31:01,  9.96s/it][A
  8%|▊         | 17/203 [02:52<30:42,  9.91s/it][A
  9%|▉         | 18/203 [03:03<31:04