In [1]:
import pickle
import os
import pandas as pd
import librosa
import librosa.display
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import tqdm.notebook as tqdm
from torchsummary import summary
import torch.optim as optim

# Model_definition and Dataloaders

In [9]:
class FreeSound_Sense(torch.nn.Module):
    
    def __init__(self):
        super(FreeSound_Sense, self).__init__()
        self.conv1d_1_16_9 = nn.Conv1d(in_channels=1, out_channels=16, kernel_size=9)
        self.conv1d_16_16_9 = nn.Conv1d(in_channels=16, out_channels=16, kernel_size=9)
        self.conv1d_16_32_3 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3)
        self.conv1d_32_32_3_1 = nn.Conv1d(in_channels=32, out_channels=32, kernel_size=3)
        self.conv1d_32_32_3_2 = nn.Conv1d(in_channels=32, out_channels=32, kernel_size=3)
        self.conv1d_32_32_3_3 = nn.Conv1d(in_channels=32, out_channels=32, kernel_size=3)
        self.conv1d_32_64_3 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3)
        self.conv1d_64_64_3 = nn.Conv1d(in_channels=64, out_channels=64, kernel_size=3)
        
        self.maxpool_16 = nn.MaxPool1d(16)
        self.maxpool_8 = nn.MaxPool1d(8)
        self.maxpool_4 = nn.MaxPool1d(4)
        
        self.relu = nn.ReLU()
        self.sigm = nn.Sigmoid()
        self.softmax = nn.Softmax(dim=1)
        self.dropout = nn.Dropout(0.1)
        
        self.fc_64_64 = nn.Linear(in_features=64, out_features=64)
        self.fc_64_512 = nn.Linear(in_features=64, out_features=512)
        self.fc_512_42 = nn.Linear(in_features=512, out_features=42)
        
        
    def forward(self, x):
        
        # First Block
        x = self.conv1d_1_16_9(x)
        x = self.relu(x)
        x = self.conv1d_16_16_9(x)
        x = self.relu(x)
        x = self.maxpool_16(x)
        x = self.dropout(x)
        
        # Second Block
        x = self.conv1d_16_32_3(x)
        x = self.relu(x)
        x = self.conv1d_32_32_3_1(x)
        x = self.relu(x)
        x = self.maxpool_4(x)
        x = self.dropout(x)
        
        # Third Block
        x = self.conv1d_32_32_3_2(x)
        x = self.relu(x)
        x = self.conv1d_32_32_3_3(x)
        x = self.relu(x)
        x = self.maxpool_4(x)
        x = self.dropout(x)
        
        # Fourth Block
        x = self.conv1d_32_64_3(x)
        x = self.relu(x)
        x = self.conv1d_64_64_3(x)
        x = self.relu(x)
        x = self.maxpool_4(x)
        x = torch.mean(x, 2)

        # Final Layers
        x = torch.flatten(x, start_dim=1)
        x = self.fc_64_64(x)
        x = self.relu(x)
        x = self.fc_64_512(x)
        x = self.relu(x)
        x = self.fc_512_42(x)
        x = self.softmax(x)
        
        return x


In [10]:
def audio_norm(data):
    max_data = np.max(data)
    min_data = np.min(data)
    data = (data-min_data)/(max_data-min_data+1e-6)
    return data-0.5

def load_audio_file(file_path, input_length=4096):
    
    if file_path not in Loaded_data:
        data = librosa.core.load(file_path, sr=None) 
        data = librosa.core.resample(data[0], data[1], 11025)
        Loaded_data[file_path] = data
    else:
        data = Loaded_data[file_path]
    
    if len(data)>input_length:
        st_idx = int((len(data)/2)-(input_length/2))
        data = data[st_idx:st_idx+input_length]
        
    else:
        T = np.zeros(input_length, dtype=float)
        T[:len(data)] = data
        data = T
       
    data = audio_norm(data)
    return np.array([data])

class CoughDataset(torch.utils.data.Dataset):
    def __init__(self, cough_data_path, non_cough_data_path):
        
        # Load Data
        self.input_length = 4096
        self.cough_data = pickle.load(open(cough_data_path, 'rb'))
        self.keys = list(self.cough_data.keys())
        self.non_cough_df = pd.read_csv(non_cough_data_path)
        

        # shuffle Data
        self.non_cough_data = pd.read_csv(non_cough_data_path)
        self.non_cough_data = self.non_cough_data[self.non_cough_data.label!='Cough'][:len(self.keys)]
        
        self.non_cough_idx = 0
        self.cough_idx = 0
        self.max_len = len(self.keys)
        
        self.neg = True

    def __len__(self):
        return len(self.keys)*2
    
    def __getitem__(self, idx):
        
        # rnadomly select positive of negetaive instannce
        if self.neg:
            self.neg = False
            non_cough_file_path = 'data/freesound-audio-tagging/audio_train/' + list(self.non_cough_df[self.non_cough_idx: self.non_cough_idx+1].fname)[0]
            self.non_cough_idx = 0 if self.non_cough_idx+1>=self.max_len else self.non_cough_idx+1
            return load_audio_file(non_cough_file_path), np.array([0], float)
        
        else:
            self.neg = True
            cough_data_base = np.zeros(self.input_length)
            cough_data_instance = self.cough_data[self.keys[self.cough_idx]]
            self.cough_idx = 0 if self.cough_idx+1>=self.max_len else self.cough_idx+1
            
            instance_length = len(cough_data_instance)
            
            if instance_length/self.input_length < 1:
                start_idx =  int(self.input_length/2 - int(instance_length/2))
                cough_data_base[start_idx:instance_length+start_idx] = cough_data_instance
            
            else:
                start_idx = int(instance_length/2)-int(self.input_length/2)
                cough_data_base = cough_data_instance[start_idx: start_idx+self.input_length]
            
            return np.array([audio_norm(cough_data_base)]), np.array([1], float)
                

# Loading Data

In [11]:
# Load Non-cough samples at 11025Hz
Loaded_data = {}
if os.path.exists('data/freesound-audio-tagging/free_sound_11025.pkl'):
    Loaded_data = pickle.load(open('data/freesound-audio-tagging/free_sound_11025.pkl', 'rb'))

# non Cough csv for the filenames
data_non_cough_instance_csv_path = 'data/freesound-audio-tagging/train.csv'
    
# COugh instance_data file path
data_cough_instances_path = 'data/collected/data/collected_data_11025.pkl'


Cough_Data = CoughDataset(data_cough_instances_path, data_non_cough_instance_csv_path)
mini_batch_size = 64
Cough_Dataloader = DataLoader(Cough_Data, batch_size=mini_batch_size, shuffle=False)

# Loading Weights

In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
Model = FreeSound_Sense()
Model.fc_512_42 = nn.Linear(in_features=512, out_features=1)
Model.softmax = nn.Sigmoid()

Model.load_state_dict(torch.load("model_weights/Cough_Data_1D_conv_smaller_43_epoch_85Acc_sigm.stDict"))
Model.float()
Model.to(device)
summary(Model, (1, 4096))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv1d-1             [-1, 16, 4088]             160
              ReLU-2             [-1, 16, 4088]               0
            Conv1d-3             [-1, 16, 4080]           2,320
              ReLU-4             [-1, 16, 4080]               0
         MaxPool1d-5              [-1, 16, 255]               0
           Dropout-6              [-1, 16, 255]               0
            Conv1d-7              [-1, 32, 253]           1,568
              ReLU-8              [-1, 32, 253]               0
            Conv1d-9              [-1, 32, 251]           3,104
             ReLU-10              [-1, 32, 251]               0
        MaxPool1d-11               [-1, 32, 62]               0
          Dropout-12               [-1, 32, 62]               0
           Conv1d-13               [-1, 32, 60]           3,104
             ReLU-14               [-1,

# Evaluation

In [13]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
Model.to(device)
Model.eval()

data_progress_bar = tqdm.tqdm(Cough_Dataloader)
positives=0
for data, targets in data_progress_bar:
    data = data.float().to(device)
    targets = targets.numpy().reshape(-1)==1
    
    outputs = Model(data)
    outputs = outputs.detach().cpu().numpy().reshape(-1)>0.5
    
    positives += np.sum(targets==outputs)

print('Valid Acc ', str(positives*100/(len(Cough_Dataloader)*mini_batch_size)))

HBox(children=(IntProgress(value=0, max=228), HTML(value='')))


Valid Acc  85.92379385964912


In [14]:
from thop import profile
macs, params = profile(Model, inputs=(torch.randn(1, 1, 4096).to(device), ))
macs, params

[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv1d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool1d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[WARN] Cannot find rule for <class 'torch.nn.modules.activation.Sigmoid'>. Treat it as zero Macs and zero Params.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[WARN] Cannot find rule for <class '__main__.FreeSound_Sense'>. Treat it as zero Macs and zero Params.


(11897152.0, 69873.0)