In [None]:
!nvidia-smi

Wed Jun  7 14:21:13 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   60C    P8    10W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
!unzip /content/drive/MyDrive/coughvid_cnn/balanced_dataset.zip > /dev/null

In [None]:
cough_audio_data_path = "/content/balanced_dataset"

In [None]:
features_path = "/content/drive/MyDrive/coughvid_cnn/metadata_cnn.parquet"

In [None]:
import pandas as pd
df = pd.read_parquet(features_path)
df.head()

Unnamed: 0,uuid,status
0,001328dc-ea5d-4847-9ccf-c5aa2a3f2d0f,1
1,001d8e33-a4af-4edb-98ba-b03f891d9a6c,0
2,0029d048-898a-4c70-89c7-0815cdcf7391,2
3,002d28bc-7806-4dfb-9c9b-afa8cb623cac,1
4,00357712-dd5a-4c0a-90a4-39f1f4b9d5fd,0


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6469 entries, 0 to 6468
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   uuid    6469 non-null   object
 1   status  6469 non-null   int32 
dtypes: int32(1), object(1)
memory usage: 75.9+ KB


In [None]:
status_labels_list = list(df.status)
len(status_labels_list)

6469

In [None]:
!pip3 install torch torchvision torchaudio

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import torch
import torchaudio
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
import os

In [None]:
# Define the dataset class
class CoughDataset(Dataset):
    def __init__(self, audio_path, labels_path, transformation, target_sample_rate, num_samples):
        self.audio_path = audio_path
        self.labels_df = pd.read_parquet(labels_path)
        #self.device = device
        self.transformation = transformation  #.to(self.device)
        self.target_sample_rate = target_sample_rate
        self.num_samples = num_samples

    def __len__(self):
        return len(self.labels_df)

    def __getitem__(self, index):
        audio_file_path = self._get_audio_path(index)
        audio_label = self._get_audio_label(index)

        signal, sample_rate = torchaudio.load(audio_file_path)
        #signal = signal.to(self.device)
        
        signal = self._resample_if_necessary(signal, sample_rate)
        signal = self._mix_down_if_necessary(signal)
        signal = self._cut_if_necessary(signal)
        signal = self._right_pad_if_necessary(signal)
        signal = self.transformation(signal)
        
        return signal, audio_label

    def _get_audio_path(self, index):
        filename = f"{self.labels_df.iloc[index,0]}" +".wav"
        path = os.path.join(self.audio_path,filename)
        return path
    
    def _get_audio_label(self, index):
        label = self.labels_df.iloc[index,1]
        return label

    def _resample_if_necessary(self, signal, sr):
        if sr != self.target_sample_rate:
            resampler = torchaudio.transforms.Resample(sr, self.target_sample_rate)
            signal = resampler(signal)
        return signal

    def _mix_down_if_necessary(self, signal):
        if signal.shape[0] > 1:
            signal = torch.mean(signal, dim=0, keepdim=True)
        return signal

    def _cut_if_necessary(self, signal):
        if signal.shape[1] > self.num_samples:
            signal = signal[:, :self.num_samples]
        return signal

    def _right_pad_if_necessary(self, signal):
        length_signal = signal.shape[1]
        if length_signal < self.num_samples:
            num_missing_samples = self.num_samples - length_signal
            last_dim_padding = (0, num_missing_samples)
            signal = torch.nn.functional.pad(signal, last_dim_padding)
        return signal


In [None]:
AUDIO_DIRECTORY = cough_audio_data_path
AUDIO_LABELS_DIRECTORY = features_path

SAMPLE_RATE = 22050
NUM_SAMPLES = 22050

#if torch.cuda.is_available():
#    device = "cuda"
#else:
#    device = "cpu"
#print(f"Using device {device}")


In [None]:
mel_spectrogram = torchaudio.transforms.MelSpectrogram(
        sample_rate=SAMPLE_RATE,
        n_fft=1024,
        hop_length=512,
        n_mels=64
    ) 

# Create an instance of the dataset
coughdata = CoughDataset(audio_path = AUDIO_DIRECTORY, labels_path = AUDIO_LABELS_DIRECTORY, transformation = mel_spectrogram, target_sample_rate = SAMPLE_RATE, num_samples = NUM_SAMPLES)

print(f"The number of files are equal to : {len(coughdata)}")
    
healthy_audio, healthy_label = coughdata[0]
print(f"Healthy_label corresponds to label number: {healthy_label}")

cough_audio, covid_label = coughdata[1]
print(f"Covid_label corresponds to label number: {covid_label}")

symptomatic_audio, symptomatic_label = coughdata[2]
print(f"Symptomatic_label corresponds to label number: {symptomatic_label}")

The number of files are equal to : 6469
Healthy_label corresponds to label number: 1
Covid_label corresponds to label number: 0
Symptomatic_label corresponds to label number: 2


In [None]:
from torch import nn
from torchsummary import summary

class CNNNetwork(nn.Module):

    def __init__(self):
        super().__init__()
        # 4 conv blocks / flatten / linear / softmax
        self.conv1 = nn.Sequential(
            nn.Conv2d(
                in_channels=1,
                out_channels=16,
                kernel_size=3,
                stride=1,
                padding=2
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(
                in_channels=16,
                out_channels=32,
                kernel_size=3,
                stride=1,
                padding=2
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(
                in_channels=32,
                out_channels=64,
                kernel_size=3,
                stride=1,
                padding=2
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.conv4 = nn.Sequential(
            nn.Conv2d(
                in_channels=64,
                out_channels=128,
                kernel_size=3,
                stride=1,
                padding=2
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.flatten = nn.Flatten()
        self.linear = nn.Linear(128 * 5 * 4, 3)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, input_data):
        x = self.conv1(input_data)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.flatten(x)
        logits = self.linear(x)
        predictions = self.softmax(logits)
        return predictions


In [None]:
if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"
print(f"Using device {device}")

Using device cuda


In [None]:
cnn = CNNNetwork().to(device)
summary(cnn, (1, 64, 44))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 66, 46]             160
              ReLU-2           [-1, 16, 66, 46]               0
         MaxPool2d-3           [-1, 16, 33, 23]               0
            Conv2d-4           [-1, 32, 35, 25]           4,640
              ReLU-5           [-1, 32, 35, 25]               0
         MaxPool2d-6           [-1, 32, 17, 12]               0
            Conv2d-7           [-1, 64, 19, 14]          18,496
              ReLU-8           [-1, 64, 19, 14]               0
         MaxPool2d-9             [-1, 64, 9, 7]               0
           Conv2d-10           [-1, 128, 11, 9]          73,856
             ReLU-11           [-1, 128, 11, 9]               0
        MaxPool2d-12            [-1, 128, 5, 4]               0
          Flatten-13                 [-1, 2560]               0
           Linear-14                   

In [None]:
from torch.utils.data import DataLoader
def create_data_loader(train_data, batch_size):
    train_dataloader = DataLoader(train_data, batch_size=batch_size)
    return train_dataloader

In [None]:
def train_single_epoch(model, data_loader, loss_fn, optimiser, device):
    for input, target in data_loader:
        target = target.type(torch.LongTensor)
        input, target = input.to(device), target.to(device)

        # calculate loss
        prediction = model(input)
        loss = loss_fn(prediction, target)

        # backpropagate error and update weights
        optimiser.zero_grad()
        loss.backward()
        optimiser.step()

    print(f"loss: {loss.item()}")

In [None]:
def train(model, data_loader, loss_fn, optimiser, device, epochs):
    for i in range(epochs):
        print(f"Epoch {i+1}")
        train_single_epoch(model, data_loader, loss_fn, optimiser, device)
        print("---------------------------")
    print("Finished training")

In [None]:
AUDIO_DIRECTORY = cough_audio_data_path
AUDIO_LABELS_DIRECTORY = features_path

SAMPLE_RATE = 22050
NUM_SAMPLES = 22050
BATCH_SIZE = 128
EPOCHS = 20
LEARNING_RATE = 0.003


if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"
print(f"Using device {device}")

mel_spectrogram = torchaudio.transforms.MelSpectrogram(
        sample_rate=SAMPLE_RATE,
        n_fft=1024,
        hop_length=512,
        n_mels=64
    ) 

# Create an instance of the dataset
coughdata = CoughDataset(audio_path = AUDIO_DIRECTORY, labels_path = AUDIO_LABELS_DIRECTORY, \
                            transformation = mel_spectrogram, \
                            target_sample_rate = SAMPLE_RATE, num_samples = NUM_SAMPLES)

train_dataloader = create_data_loader(coughdata, BATCH_SIZE)


Using device cuda


In [None]:
# construct model and assign it to device
cnn = CNNNetwork().to(device)
print(cnn)

CNNNetwork(
  (conv1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv3): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv4): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear): Linear(in_features=2560, out_features=3, bias=True)
  (softmax): Softmax(dim=1)
)


In [None]:
# initialise loss funtion + optimiser
loss_fn = nn.CrossEntropyLoss()
optimiser = torch.optim.Adam(cnn.parameters(), lr = LEARNING_RATE)

In [None]:
# train model
train(cnn, train_dataloader, loss_fn, optimiser, device, EPOCHS)


Epoch 1
loss: 1.0546574592590332
---------------------------
Epoch 2
loss: 1.0648887157440186
---------------------------
Epoch 3
loss: 0.9603337645530701
---------------------------
Epoch 4
loss: 0.9742236137390137
---------------------------
Epoch 5
loss: 0.9610645174980164
---------------------------
Epoch 6
loss: 0.9526959657669067
---------------------------
Epoch 7
loss: 0.9606091976165771
---------------------------
Epoch 8


KeyboardInterrupt: ignored

In [None]:
# save model
torch.save(cnn.state_dict(), "cnnnet.pth")
print("Trained custom cnn model saved at cnnnet.pth")

Trained custom cnn model saved at cnnnet.pth


In [None]:
class_mapping = {
    0 : "covid",
    1 : "healthy",
    2 : "symptomatic"
}

In [None]:
def predict(model, input, target, class_mapping):
    model.eval()
    with torch.no_grad():
        predictions = model(input)
        # Tensor (1, 10) -> [ [0.1, 0.01, ..., 0.6] ]
        predicted_index = predictions[0].argmax(0)
        #predicted = class_mapping[predicted_index]
        #expected = class_mapping[target]
        predicted =  predicted_index
        expected = target
    return predicted, expected

In [None]:
import numpy as np
state_dict = torch.load("cnnnet.pth")
cnn.load_state_dict(state_dict)


mel_spectrogram = torchaudio.transforms.MelSpectrogram(
    sample_rate=SAMPLE_RATE,
    n_fft=1024,
    hop_length=512,
    n_mels=64
)

coughdata = CoughDataset(audio_path = AUDIO_DIRECTORY, labels_path = AUDIO_LABELS_DIRECTORY, \
                            transformation = mel_spectrogram, \
                            target_sample_rate = SAMPLE_RATE, num_samples = NUM_SAMPLES)



input, target = coughdata[0][0], coughdata[0][1]
input.unsqueeze_(0)

input_array = np.array(input)
target_array = np.array(target)
target_tensor = torch.from_numpy(target_array)
target = target_tensor.type(torch.LongTensor)
input, target = input.to(device), target.to(device)

# make an inference
predicted, expected = predict(cnn, input, target, class_mapping)
print(f"Predicted: '{predicted}', expected: '{expected}'")

Predicted: '2', expected: '1'
