<center> 

# **Cough sound analysis using Deep Learning methods for COVID-19 diagnosis**

### Division of Information Transmission Systems and Material Technology

## Christina Ntourma
</center>




# Imports

In [None]:
import csv
import matplotlib
import numpy as np
import csv
import json
import os

import sys
import pickle
import time
import scipy

#for loading and visualizing audio files
import librosa
import librosa.display
import pywt
import statistics

import warnings
from os import listdir
from os.path import isfile, join
import random

import pandas as pd
import subprocess
from pathlib import Path
from matplotlib import pyplot as plt
from math import sqrt

In [None]:
import os
import sys
import subprocess
import numpy as np
import glob
import json
import pandas as pd
import glob
import shutil

# Data Preprocessing

## Import and extract dataset

In [None]:
!git clone https://github.com/iiscleap/Coswara-Data.git

In [None]:
cd "Coswara-Data"

/content/Coswara-Data


In [None]:
!git reset --hard 365767c

HEAD is now at 365767c Edited combined_data


In [None]:
cd ..

/content


In [None]:
'''
This script creates a folder "Extracted_data" inside which it extracts all the wav files in the directories date-wise
Link to github: https://github.com/iiscleap/Coswara-Data/blob/365767c9b3d52df5d002b363a7ead2e743ef61f4/extract_data.py
'''

coswara_data_dir = "Coswara-Data"
extracted_data_dir = os.path.join(coswara_data_dir, 'Extracted_data')  

if not os.path.exists(coswara_data_dir):
    raise("Check the Coswara dataset directory!")

if not os.path.exists(extracted_data_dir):
    os.makedirs(extracted_data_dir) # Creates the Extracted_data folder if it doesn't exist

dirs_extracted = set(map(os.path.basename,glob.glob('{}/202*'.format(extracted_data_dir))))
dirs_all = set(map(os.path.basename,glob.glob('{}/202*'.format(coswara_data_dir))))

dirs_to_extract = list(set(dirs_all) - dirs_extracted)

for d in dirs_to_extract:
    p = subprocess.Popen('cat {}/{}/*.tar.gz.* |tar -xvz -C {}/'.format(coswara_data_dir, d, extracted_data_dir), shell=True)
    p.wait()


print("Extraction process complete!")

Extraction process complete!


## Convert audio to image

In [None]:
# Possible values: cough-heavy, cough-shallow
# Declares the type of coughs being converted
cough_type = "cough-shallow"

In [None]:
def audio_to_mel(signal, sr, fig_name, destination_folder):
    fig, ax = plt.subplots()
    S = librosa.feature.melspectrogram(y=signal, sr=sr, n_fft= 512, hop_length = 64, win_length = 128)
    S_dB = librosa.power_to_db(S, ref=np.max)
    img = librosa.display.specshow(S_dB, x_axis='time', y_axis='mel', sr=sr, fmax=20000, ax=ax)
    plt.subplots_adjust(left=0,right=1,bottom=0,top=1)
    plt.savefig(destination_folder + fig_name, dpi = 300, frameon='false')

In [None]:
def audio_to_hcqt(signal, sr, fig_name, destination_folder):
    fig, ax = plt.subplots()
    C = np.abs(librosa.hybrid_cqt(signal, sr=sr))
    img = librosa.display.specshow(librosa.amplitude_to_db(C, ref=np.max),
                               sr=sr, x_axis='time', y_axis='cqt_note', ax=ax)
    
    plt.subplots_adjust(left=0,right=1,bottom=0,top=1)
    plt.savefig(destination_folder + fig_name, dpi = 300, frameon='false') #dpi = resolution in dots per inch

In [None]:
def audio_to_stft(signal, sr, fig_name, destination_folder):

    fig, ax = plt.subplots()
    X = librosa.stft(signal)
    Xdb = librosa.amplitude_to_db(abs(X))

    librosa.display.specshow(Xdb, sr=sr, cmap = 'magma', x_axis='time', y_axis='log')
    plt.subplots_adjust(left=0,right=1,bottom=0,top=1)
    plt.savefig(destination_folder + fig_name, dpi = 300, frameon='false') #dpi = resolution in dots per inch

In [None]:
def audio_to_cqt(signal, sr, fig_name, destination_folder):
    fig, ax = plt.subplots()
    
    C = np.abs(librosa.cqt(signal, sr=sr))
    img = librosa.display.specshow(librosa.amplitude_to_db(C, ref=np.max),
                               sr=sr, x_axis='time', y_axis='cqt_note', ax=ax)
    
    plt.subplots_adjust(left=0,right=1,bottom=0,top=1)
    plt.savefig(destination_folder + fig_name, dpi = 1000, frameon='false') #dpi = resolution in dots per inch

In [None]:
## Convert to image
        
def convert_audio(path_to_folder, path_to_destination_folder, files_to_convert, transform_name, dataset):
    warnings.filterwarnings('ignore') # ignore warnings for reading audio files
    for file in os.listdir(path_to_folder):
        i =file
        
        if i in files_to_convert:
            image_name = file.split(".")[0] ## assuming file names is of the form: "unique_id.wav"
            audio_signal, sr = librosa.load(path_to_folder + file, sr=None)
            print(file)
            # files from user with ID: pWFMPFBys1bBerYz5Si4Gb8brGn1, did not contain cough sounds and created problems during conversion and hence were not
            if len(audio_signal) > 0 and file!='pWFMPFBys1bBerYz5Si4Gb8brGn1_cough-shallow.wav' and file!='pWFMPFBys1bBerYz5Si4Gb8brGn1_cough-heavy.wav' and file.split(".")[0] + ".png" not in os.listdir(path_to_destination_folder):
                audio_to_hcqt(audio_signal, sr, image_name, path_to_destination_folder)

In [None]:
############## Needed only when running in google colab to delete unnecessary folders in order to create more space in the available disk
for folder in glob.glob('Coswara-Data/2020*'):
  shutil.rmtree(folder)
for folder in glob.glob('Coswara-Data/2021*'):
  shutil.rmtree(folder)

In [None]:
### Create 2 folder, Cough_Heavy and Cough_Shallow that include all cough heavy and cough shallow samples respectively
## create directory if it does not exist

path_to_cough_heavy = 'Cough_Heavy'
path_to_cough_shallow = 'Cough_Shallow'
dir_cough_heavy_exists = os.path.exists(path_to_cough_heavy)
dir_cough_shallow_exists = os.path.exists(path_to_cough_shallow)

if not dir_cough_heavy_exists:
  os.makedirs(path_to_cough_heavy)

if not dir_cough_heavy_exists:
  os.makedirs(path_to_cough_shallow)

initial_path = "Coswara-Data/Extracted_data/"
for folder in os.listdir(initial_path):
      for subfolder in os.listdir(initial_path + folder):
        if os.path.isdir(initial_path + folder + "/" + subfolder): 
          for file in os.listdir(initial_path + folder + "/" + subfolder):
              if file.split(".")[0] == "cough-heavy":
                shutil.copyfile(initial_path + folder + "/" + subfolder + "/" + file, path_to_cough_heavy + "/" + subfolder + "_" + file)
              elif file.split(".")[0] == "cough-shallow":
                shutil.copyfile(initial_path + folder + "/" + subfolder + "/" + file, path_to_cough_shallow + "/" + subfolder + "_" + file)

In [None]:
# connect to google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
def convert_Coswara(transform_name):
    path_to_folder = "Cough_Shallow/"  # path to sound files
    csv_path = "Coswara-Data/combined_data.csv" # path to "combine_data.csv" which is provided with the dataset and contains metadata information about the samples
    dataset = "Coswara"
    path_to_destination_folder = "drive/MyDrive/" + transform_name + "_" + cough_type+ "/" # path to the destination folder where the created images will be saved

    ## create directory if it does not exist
    dir_exists = os.path.exists(path_to_destination_folder)

    if not dir_exists:
      os.makedirs(path_to_destination_folder)
    with open(csv_path) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        files_to_convert = []
        for idx, row in enumerate(csv_reader):
            if row[0] !='file_name' and row[0] + "_" + cough_type + ".png" not in os.listdir(path_to_destination_folder):
                files_to_convert.append(row[0] + "_" + cough_type + ".wav")
               
    convert_audio(path_to_folder, path_to_destination_folder, files_to_convert, transform_name, dataset)

In [None]:
# Repeat for each different transformation
convert_Coswara("hcqt")

# Create csv files for classification

In [None]:
csv_path = "Coswara-Data/combined_data.csv"

In [None]:
def find_COVID_labeled():
    path_to_cough_heavy_images = "drive/MyDrive/hcqt_cough-heavy/" # path to the folder containing the cough heavy converted samples using one of the 4 transformations
    path_to_cough_shallow_images = "drive/MyDrive/hcqt_cough-shallow/" # path to the folder containing the cough shallow converted samples using one of the 4 transformations
    

    with open(csv_path) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        cough_heavy_COVID = []
        cough_heavy_non_COVID = []
        cough_shallow_COVID = []
        cough_shallow_non_COVID = []

        for row in csv_reader:
            if row[3] != 'covid_status' and (row[0] + "_cough-heavy.png" in os.listdir(path_to_cough_heavy_images)):# or row[0] + "_cough-shallow.wav" in cough_shallow):
                if row[3] == 'positive_mild' or row[3] == 'positive_moderate' or row[3] == 'positive_asymptomatic':
                    cough_heavy_COVID.append(row[0])
                elif row[3]!='' and row[3]!='covid_status':
                    cough_heavy_non_COVID.append(row[0])
                    
            elif row[3] != 'covid_status' and (row[0] + "_cough-shallow.png" in os.listdir(path_to_cough_shallow_images)):
                if row[3] == 'positive_mild' or row[3] == 'positive_moderate' or row[3] == 'positive_asymptomatic':
                    cough_shallow_COVID.append(row[0])
                elif row[3]!='' and row[3]!='covid_status':
                    cough_shallow_non_COVID.append(row[0]) 
    
    return cough_heavy_COVID, cough_heavy_non_COVID, cough_shallow_COVID, cough_shallow_non_COVID

In [None]:
def create_labels_csv(cough_type):
    cough_heavy_COVID, cough_heavy_non_COVID, cough_shallow_COVID, cough_shallow_non_COVID = find_COVID_labeled()
    
    if cough_type == "cough-heavy":
      with open('Coswara_cough_heavy_labels.csv', mode='w', newline='') as data:
          data_writer = csv.writer(data, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
          data_writer.writerow(['file_name', 'label'])
          
          with open(csv_path, mode='r') as input_data:
              csv_reader = csv.reader(input_data, delimiter=',')
              for row in csv_reader:
                      if row[0] in cough_heavy_COVID:
                          data_writer.writerow([row[0] + "_cough-heavy.png", 'pos'])
                      elif row[0] in cough_heavy_non_COVID:
                          data_writer.writerow([row[0] + "_cough-heavy.png", 'neg'])

    elif cough_type == "cough-shallow":
      with open('Coswara_cough_shallow_labels.csv', mode='w', newline='') as data:
          data_writer = csv.writer(data, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
          data_writer.writerow(['file_name', 'label'])
          
          with open(csv_path, mode='r') as input_data:
              csv_reader = csv.reader(input_data, delimiter=',')
              for row in csv_reader:
                      if row[0] in cough_shallow_COVID:
                          data_writer.writerow([row[0] + "_cough-shallow.png", 'pos'])
                      elif row[0] in cough_shallow_non_COVID:
                          data_writer.writerow([row[0] + "_cough-shallow.png", 'neg'])
    
    else:
      print("Cough type is not valid")                   

In [None]:
create_labels_csv(cough_type)