<center> 

# **Cough sound analysis using Deep Learning methods for COVID-19 diagnosis**

### Division of Information Transmission Systems and Material Technology

## Christina Ntourma
</center>

# Data preprocessing

In [None]:
import json
import os
import shutil
from collections import Counter
import csv

In [None]:
# connect to google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
path_to_folder = "drive/MyDrive/KDD_paper_data/" # path to data folder as shared by Cambridge University
path_to_covidandroidnocough = path_to_folder + "covidandroidnocough/cough/"
path_to_covidandroidwithcough = path_to_folder + "covidandroidwithcough/cough/"
path_to_healthyandroidnosymp = path_to_folder + "healthyandroidnosymp/cough/"

path_to_covidwebnocough = path_to_folder + "covidwebnocough/cough/"
path_to_covidwebwithcough = path_to_folder + "covidwebwithcough/cough/"
path_to_healthywebnosymp = path_to_folder + "healthywebnosymp/cough/"

Move files from initial folder to new one <br>
Move only the files that exist in the "files.json" file and are cough files

In [None]:
with open(path_to_folder + 'files.json') as json_file:
    data = json.load(json_file)

    cough_files = []
    cough_ids = []
    cough_id_number = []
    cough_id_user_id = []
    
    covid_android_no_cough_cough_files = []
    covid_android_with_cough_cough_files = []
    healthy_android_no_symp_cough_files = []
    for i in data.keys():
        if i == "covidandroidnocough":
            files = data[i]
            for lists in files:
                for f in lists:
                    if f.split("_")[0] == 'cough':
                        covid_android_no_cough_cough_files.append(f)
                        cough_files.append(f) 
                        cough_ids.append(f.split(".")[0].split("_")[1] + "_" + f.split(".")[0].split("_")[2])
                        cough_id_user_id.append(f.split("_")[1])
                        cough_id_number.append(f.split(".")[0].split("_")[2])
        elif i == "covidandroidwithcough":
            files = data[i]
            for lists in files:
                for f in lists:
                    if f.split("_")[0] == 'cough':
                        covid_android_with_cough_cough_files.append(f)
                        cough_files.append(f) 
                        cough_ids.append(f.split(".")[0].split("_")[1] + "_" + f.split(".")[0].split("_")[2])
                        cough_id_user_id.append(f.split("_")[1])
                        cough_id_number.append(f.split(".")[0].split("_")[2])
        elif i == "healthyandroidnosymp":
            files = data[i]
            for lists in files:
                for f in lists:
                    if f.split("_")[0] == 'cough':
                        healthy_android_no_symp_cough_files.append(f)
                        cough_files.append(f) 
                        cough_ids.append(f.split(".")[0].split("_")[1] + "_" + f.split(".")[0].split("_")[2])
                        cough_id_user_id.append(f.split("_")[1])
                        cough_id_number.append(f.split(".")[0].split("_")[2])

    print("Covid android no cough:", len(covid_android_no_cough_cough_files), "samples initially")
    print("\nCovid android with cough:", len(covid_android_with_cough_cough_files), "samples initially")
    print("\nHealthy android no symp:", len(healthy_android_no_symp_cough_files), "samples initially")

Covid android no cough: 64 samples initially

Covid android with cough: 46 samples initially

Healthy android no symp: 137 samples initially


## Android files

### Returning users

Find the returning users and delete the samples that were given by the same user but in less than 24 hours after the previous sample.

In [None]:
import time
import datetime

In [None]:
freq_cough = Counter(cough_id_user_id)

cnt_cough = 0
number_of_android_users_cough = 0
users_with_many_samples = []

for i in freq_cough:
    if freq_cough[i] > 1:
        cnt_cough = cnt_cough + 1
        users_with_many_samples.append(i)
    elif freq_cough[i] == 1:
        number_of_android_users_cough = number_of_android_users_cough + 1
        
print(len(users_with_many_samples), "users gave more than one sample")
print("User ids are:", users_with_many_samples)

30 users gave more than one sample
User ids are: ['0OWWZZ7muU', '2DDMc0SESm', '8PmvbJ4U3o', 'CNz7PwFNQz', 'f75Ucc81Xk', 'JL7oF7Y33d', 'm1KiFqWh9K', 'pIs0irifZL', 'plVETYLnwj', 'wI0AtSFKGI', 'crxRiqIPHi', 'RZPXvUslJL', 'zv02Ygabqh', '1fxxVyop57', '9me0RMtVww', 'b9Tpe9jOFB', 'BaSLS8Z02S', 'BbY4BJdm24', 'Cb2EI7vzqD', 'dnwjNTd0fc', 'IxBgm3RtB3', 'iXqxFSQFhW', 'mYtwaX7NGZ', 'Pf3lZHDYTV', 'PWUKaMIaFV', 'r57l51XPwo', 't1ufxgEal7', 'VN8n8tjozE', 'WxGj36J04J', 'Yarb5WSBeD']


In [None]:
coughs_1 = os.listdir(path_to_covidandroidnocough)
coughs_2 = os.listdir(path_to_covidandroidwithcough)
coughs_3 = os.listdir(path_to_healthyandroidnosymp)

In [None]:
def save_dict_to_json(dictionary, path_to_destination, destination_file_name):
    """Save a python dictionary as json file named destination_file_name (string) at the path_to_destination (string) folder"""
    with open(path_to_destination + destination_file_name + '.json', 'w') as fp:
        json.dump(dictionary, fp, indent=1)

In [None]:
# Create timestamp dict for breath and cough
def create_timestamp_dict(users_with_many_samples, folders, sample_type):
    timestamp_dict = {k: [] for k in users_with_many_samples}

    with open(path_to_folder + 'android_breath2cough.json') as json_br2c:
        breath2cough = json.load(json_br2c)
        for folder in folders: 
            for sample in folder:
                if sample_type == 'cough':
                    if sample.split("_")[1] in users_with_many_samples:
                        timestamp_dict[sample.split("_")[1]].append(sample.split(".")[0].split("_")[2])
    return timestamp_dict

In [None]:
# timestamp unix to normal format

def convert_unix_timestamp(timestamp_dict):
    date_time_dict = {}
    for (k,v) in timestamp_dict.items():
        date_time_list = []
        for timestamp in v:
            s, ms = divmod(int(timestamp), 1000)
            date_time = '{}.{:03d}'.format(time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(s)), ms)
            datetime_object = datetime.datetime.strptime(date_time,'%Y-%m-%d %H:%M:%S.%f')
            date_time_list.append(datetime_object)
        date_time_dict[k] = date_time_list
    return date_time_dict

In [None]:
def save_diff_to_json(date_time_dict, timestamp_dict, sample_type):
    final_dict = {}
    for (k,v) in date_time_dict.items():
        nested_dict = {}
        v.sort() # sort list of dates
        # find difference between all date elements of list for specific key
        date_diff = [j-i for i, j in zip(v[:-1], v[1:])] 

        # keep only the days for which the dates differ 
        date_diff_days = [i.days for i in date_diff] 

        # Convert timestamp to integer
        for i in range(0, len(timestamp_dict[k])):
            timestamp_dict[k][i] = int(timestamp_dict[k][i])

        # Sort the timestamps in ascending order
        timestamp_dict[k].sort()
        nested_dict['timestamp'] = timestamp_dict[k]
        nested_dict['datetime'] = [str(i) for i in v]
        nested_dict['days_diff'] = date_diff_days
        folder_list = []

        for i in range(len(timestamp_dict[k])):
            if sample_type == "breath":
                file_name = sample_type + "s_" + k + "_" + str(timestamp_dict[k][i]) + ".wav"
            else:
                file_name = sample_type + "_" + k + "_" + str(timestamp_dict[k][i]) + ".wav"

            if file_name in coughs_1: 
                folder_list.append("covid android no cough")
            elif file_name in coughs_2:
                folder_list.append("covid android with cough")
            elif file_name in coughs_3:
                folder_list.append("healhty android no symptop")
        nested_dict['folder'] = folder_list
        final_dict[k] = nested_dict
    save_dict_to_json(final_dict, "./", "date_diff_" + sample_type)
    print(final_dict)
    return final_dict

In [None]:
def print_results(final_dict, sample_type):
    days_dict = {}
    for (k,v) in final_dict.items():
        zeros = 0
        greater_than_0 = 0
        for i in v['days_diff']:
            if i == 0:
                zeros = zeros + 1
            elif i > 0:
                greater_than_0 = greater_than_0 + 1
        days_dict[k] = v['days_diff'] 
        print("User with ID:", k, "gave", len(final_dict[k]['timestamp']), sample_type, "samples")
        print(final_dict[k]['datetime'])
        print("The time distance between the samples in ascending chronological order is", final_dict[k]['days_diff'], "days")
        print(zeros, "time distances between samples were less than 24 hours")
        print(greater_than_0, "time distances between samples greater less than 24 hours")
        print(final_dict[k]['timestamp'])
        print(final_dict[k]['folder'])
        print()
    return days_dict

In [None]:
cough_folders = [coughs_1, coughs_2, coughs_3]

timestamp_cough_dict = create_timestamp_dict(users_with_many_samples, cough_folders, "cough")

date_time_cough_dict = convert_unix_timestamp(timestamp_cough_dict)

final_cough_dict = save_diff_to_json(date_time_cough_dict, timestamp_cough_dict, "cough")

{'0OWWZZ7muU': {'timestamp': [1588233167879, 1588572683666, 1588837383148], 'datetime': ['2020-04-30 07:52:47.879000', '2020-05-04 06:11:23.666000', '2020-05-07 07:43:03.148000'], 'days_diff': [3, 3], 'folder': ['covid android no cough', 'covid android no cough', 'covid android no cough']}, '2DDMc0SESm': {'timestamp': [1586853741017, 1587018711166, 1587299604576, 1587454317988, 1588143205284, 1588670910844, 1589180456392], 'datetime': ['2020-04-14 08:42:21.017000', '2020-04-16 06:31:51.166000', '2020-04-19 12:33:24.576000', '2020-04-21 07:31:57.988000', '2020-04-29 06:53:25.284000', '2020-05-05 09:28:30.844000', '2020-05-11 07:00:56.392000'], 'days_diff': [1, 3, 1, 7, 6, 5], 'folder': ['covid android with cough', 'covid android with cough', 'covid android with cough', 'covid android with cough', 'covid android with cough', 'covid android with cough', 'covid android no cough']}, '8PmvbJ4U3o': {'timestamp': [1587892244740, 1587970252151, 1588054955527, 1588144326527, 1588229636769, 15884

In [None]:
print("Results for cough samples")
days_cough_dict = print_results(final_cough_dict, "cough")

Results for cough samples
User with ID: 0OWWZZ7muU gave 3 cough samples
['2020-04-30 07:52:47.879000', '2020-05-04 06:11:23.666000', '2020-05-07 07:43:03.148000']
The time distance between the samples in ascending chronological order is [3, 3] days
0 time distances between samples were less than 24 hours
2 time distances between samples greater less than 24 hours
[1588233167879, 1588572683666, 1588837383148]
['covid android no cough', 'covid android no cough', 'covid android no cough']

User with ID: 2DDMc0SESm gave 7 cough samples
['2020-04-14 08:42:21.017000', '2020-04-16 06:31:51.166000', '2020-04-19 12:33:24.576000', '2020-04-21 07:31:57.988000', '2020-04-29 06:53:25.284000', '2020-05-05 09:28:30.844000', '2020-05-11 07:00:56.392000']
The time distance between the samples in ascending chronological order is [1, 3, 1, 7, 6, 5] days
0 time distances between samples were less than 24 hours
6 time distances between samples greater less than 24 hours
[1586853741017, 1587018711166, 15872

**Removing process**

Deal only with users that have samples with time distance between smaller than 24 hours. Delete the sample that causes the time distance to be less than 24 hours.

In [None]:
users_with_good_time_distances = []
users_that_need_preprocessing = []
for (k,v) in final_cough_dict.items():
        zeros = 0
        for i in v['days_diff']:
            if i == 0:
                zeros = zeros + 1
                
        if zeros == 0:
            users_with_good_time_distances.append(k)
        else:
            users_that_need_preprocessing.append(k)
print("User ids of users with good time distances for all of their samples:", users_with_good_time_distances)
print(len(users_with_good_time_distances), "users in total")
print("\nUser ids of users with bad time distances for all of their samples:", users_that_need_preprocessing)
print(len(users_that_need_preprocessing), "users in total")

User ids of users with good time distances for all of their samples: ['0OWWZZ7muU', '2DDMc0SESm', 'f75Ucc81Xk', 'JL7oF7Y33d', 'plVETYLnwj', 'wI0AtSFKGI', '1fxxVyop57', '9me0RMtVww', 'b9Tpe9jOFB', 'dnwjNTd0fc', 'IxBgm3RtB3', 'mYtwaX7NGZ', 'Pf3lZHDYTV', 'r57l51XPwo', 't1ufxgEal7', 'VN8n8tjozE', 'WxGj36J04J']
17 users in total

User ids of users with bad time distances for all of their samples: ['8PmvbJ4U3o', 'CNz7PwFNQz', 'm1KiFqWh9K', 'pIs0irifZL', 'crxRiqIPHi', 'RZPXvUslJL', 'zv02Ygabqh', 'BaSLS8Z02S', 'BbY4BJdm24', 'Cb2EI7vzqD', 'iXqxFSQFhW', 'PWUKaMIaFV', 'Yarb5WSBeD']
13 users in total


In [None]:
print(len(final_cough_dict.keys()), "users gave more than one samples")

30 users gave more than one samples


From 286 users, 30 users gave more than one sample and from these 30 users 17 users gave their samples with a time distance smaller than 24 hours
`users_that_need_preprocessing` --> contains the IDs of users that gave samples with time distance smaller than 24 hours. Some of these samples will not be used because samples given in less than 24 hours, by the same user, do not provide any extra information

In [None]:
coughs_to_be_ignored = ["cough_8PmvbJ4U3o_1587970252151.wav","cough_8PmvbJ4U3o_1588229636769.wav","cough_8PmvbJ4U3o_1588749596379.wav","cough_CNz7PwFNQz_1587622517669.wav","cough_CNz7PwFNQz_1587795233563.wav","cough_CNz7PwFNQz_1588140467941.wav","cough_CNz7PwFNQz_1588314464492.wav","cough_CNz7PwFNQz_1588658896379.wav","cough_CNz7PwFNQz_1589263941654.wav","cough_CNz7PwFNQz_1589436648061.wav","cough_m1KiFqWh9K_1588686054731.wav","cough_pIs0irifZL_1588424717259.wav","cough_crxRiqIPHi_1588687422136.wav","cough_RZPXvUslJL_1589697402295.wav","cough_zv02Ygabqh_1587291131315.wav","cough_zv02Ygabqh_1587712162451.wav","cough_zv02Ygabqh_1588064017866.wav","cough_BaSLS8Z02S_1589173559338.wav","cough_BbY4BJdm24_1587186094542.wav","cough_Cb2EI7vzqD_1587107534334.wav","cough_iXqxFSQFhW_1587457638246.wav","cough_iXqxFSQFhW_1587539831818.wav","cough_PWUKaMIaFV_1587272539398.wav","cough_PWUKaMIaFV_1587450421284.wav","cough_PWUKaMIaFV_1587877311116.wav","cough_PWUKaMIaFV_1588065464618.wav","cough_PWUKaMIaFV_1588224156757.wav","cough_PWUKaMIaFV_1588514456570.wav","cough_PWUKaMIaFV_1588747237057.wav","cough_PWUKaMIaFV_1589005404391.wav","cough_PWUKaMIaFV_1589176976421.wav","cough_PWUKaMIaFV_1589347840646.wav","cough_PWUKaMIaFV_1590071279367.wav","cough_Yarb5WSBeD_1588226072210.wav","cough_Yarb5WSBeD_1588483584608.wav","cough_Yarb5WSBeD_1588832877170.wav","cough_Yarb5WSBeD_1588668288571.wav", "cough_Yarb5WSBeD_1589179842997.wav","cough_Yarb5WSBeD_1589606161469.wav","cough_Yarb5WSBeD_1590126575899.wav"]
print("Deleted", len(coughs_to_be_ignored), "samples because of the time distance they had with other samples of the same user")

Deleted 40 samples because of the time distance they had with other samples of the same user


In [None]:
'''fs = ["cough/android/covidandroidnocough", "cough/android/covidandroidwithcough", "cough/android/healthyandroidnosymp", "cough/web/covidwebnocough", "cough/web/covidwebwithcough", "cough/web/healthywebnosymp"]
fs = [path_to_covidandroidnocough, path_to_covidandroidwithcough, path_to_healthyandroidnosymp]

for file in coughs_to_be_ignored:
    for f in fs:
        print(os.listdir(f))
        if file.split(".")[0] + ".wav" in os.listdir(f) :
            print(file)'''

In [None]:
# The files contained in these three lists will be moved to another directory containing android files divided by category
covidandroidnocough_samples = []
covidandroidwithcough_samples = []
healthyandroidnosymp_samples = []

for file in os.listdir(path_to_covidandroidnocough):
    if file not in coughs_to_be_ignored:
        covidandroidnocough_samples.append(file)
    
for file in os.listdir(path_to_covidandroidwithcough):
    if file not in coughs_to_be_ignored:
        covidandroidwithcough_samples.append(file)    

for file in os.listdir(path_to_healthyandroidnosymp):
    if file not in coughs_to_be_ignored:
        healthyandroidnosymp_samples.append(file)

print("Covid android no cough samples to be used:", len(covidandroidnocough_samples))
print("Covid android with cough samples to be used:", len(covidandroidwithcough_samples))
print("Healhty android no symptom samples to be used:", len(healthyandroidnosymp_samples))
print("Total android cough samples to be used:", len(covidandroidnocough_samples) + len(covidandroidwithcough_samples) + len(healthyandroidnosymp_samples))

Covid android no cough samples to be used: 52
Covid android with cough samples to be used: 41
Healhty android no symptom samples to be used: 115
Total android cough samples to be used: 208


## Web files

In [None]:
path_to_covidwebnocough = path_to_folder + "covidwebnocough/"
path_to_covidwebwithcough = path_to_folder + "covidwebwithcough/"
path_to_healthywebnosymp = path_to_folder + "healthywebnosymp/"

# destination paths
destination_path_to_folder = "drive/MyDrive/cough/web/"
destination_path_to_covidwebnocough = destination_path_to_folder + "covidwebnocough/"
destination_path_to_covidwebwithcough = destination_path_to_folder + "covidwebwithcough/"
destination_path_to_healthywebnosymp = destination_path_to_folder + "healthywebnosymp/"

In [None]:
# create destination folder for web files if it does not exist
# web files are grouped by user but we want to group them by category 
# and name them according to the date they were given on and the user ID

dir_covidwebnocough_exists = os.path.exists(destination_path_to_covidwebnocough)
dir_covidwebwithcough_exists = os.path.exists(destination_path_to_covidwebwithcough)
dir_healthywebnosymptom_exists = os.path.exists(destination_path_to_healthywebnosymp)

if not dir_covidwebnocough_exists:
  os.makedirs(destination_path_to_covidwebnocough)

if not dir_covidwebwithcough_exists:
  os.makedirs(destination_path_to_covidwebwithcough)

if not dir_healthywebnosymptom_exists:
  os.makedirs(destination_path_to_healthywebnosymp)

for folder in os.listdir(path_to_covidwebnocough):
    for file in os.listdir(path_to_covidwebnocough + folder):
        if file == 'audio_file_cough.wav':
            file_name = "cough_" + folder + ".wav"
            shutil.copy(path_to_covidwebnocough + folder + "/" + file, destination_path_to_covidwebnocough + file_name)
            
for folder in os.listdir(path_to_covidwebwithcough):
    for file in os.listdir(path_to_covidwebwithcough + folder):
        if file == 'audio_file_cough.wav':
            file_name = "cough_" + folder + ".wav"
            shutil.copy(path_to_covidwebwithcough + folder + "/" + file, destination_path_to_covidwebwithcough + file_name)
        
for folder in os.listdir(path_to_healthywebnosymp):
    for file in os.listdir(path_to_healthywebnosymp + folder):
        if file == 'audio_file_cough.wav':
            file_name = "cough_" + folder + ".wav"
            shutil.copy(path_to_healthywebnosymp + folder + "/" + file, destination_path_to_healthywebnosymp + file_name)

## Some samples from healthy web no symptom folders do not exist in the "files.json" file so they will be deleted:

with open(path_to_folder + 'files.json') as json_file:
    data = json.load(json_file)
    
    found = False
    for file in os.listdir(destination_path_to_healthywebnosymp):
        for l in data["healthywebnosymp"]:
            f = file.split(".")[0].split("_")[1] + "_" + file.split(".")[0].split("_")[2] + "_" +  file.split(".")[0].split("_")[3] + "_" +  file.split(".")[0].split("_")[4]
            if f in l:
                found = True
        if not found:
            os.remove(destination_path_to_healthywebnosymp + file)
        found = False

### Copy android files to the correct directory

In [None]:
# destination paths
destination_path_to_folder = "drive/MyDrive/cough/android/"
destination_path_to_covidandroidnocough = destination_path_to_folder + "covidandroidnocough/"
destination_path_to_covidandroidwithcough = destination_path_to_folder + "covidandroidwithcough/"
destination_path_to_healthyandroidnosymp = destination_path_to_folder + "healthyandroidnosymp/"

In [None]:
# create destination folder for android files if it does not exist

dir_covidandroidnocough_exists = os.path.exists(destination_path_to_covidandroidnocough)
dir_covidandroidwithcough_exists = os.path.exists(destination_path_to_covidandroidwithcough)
dir_healthyandroidnosymptom_exists = os.path.exists(destination_path_to_healthyandroidnosymp)

if not dir_covidandroidnocough_exists:
  os.makedirs(destination_path_to_covidandroidnocough)

if not dir_covidandroidwithcough_exists:
  os.makedirs(destination_path_to_covidandroidwithcough)

if not dir_healthyandroidnosymptom_exists:
  os.makedirs(destination_path_to_healthyandroidnosymp)

for file in os.listdir(path_to_covidandroidnocough):
    if file in covidandroidnocough_samples:
        shutil.copy(path_to_covidandroidnocough + "/" + file, destination_path_to_covidandroidnocough + file)
            
for file in os.listdir(path_to_covidandroidwithcough):
    if file in covidandroidwithcough_samples:
        shutil.copy(path_to_covidandroidwithcough + "/" + file, destination_path_to_covidandroidwithcough + file)
    
for file in os.listdir(path_to_healthyandroidnosymp):
    if file in healthyandroidnosymp_samples:
        shutil.copy(path_to_healthyandroidnosymp + "/" + file, destination_path_to_healthyandroidnosymp + file)

## Create csv file with labels for classification

In [None]:
path_to_android = "drive/MyDrive/cough/android/"
path_to_web = "drive/MyDrive/cough/web/"

covidnocough_android = path_to_android + "covidandroidnocough/"
covidwithcough_android = path_to_android + "covidandroidwithcough/"
healthy_android = path_to_android + "healthyandroidnosymp/"

covidnocough_web = path_to_web + "covidwebnocough/"
covidwithcough_web = path_to_web + "covidwebwithcough/"
healthy_web = path_to_web + "healthywebnosymp/"

with open('Cambridge_labels.csv', mode='w', newline='') as data:
        data_writer = csv.writer(data, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        data_writer.writerow(['file_name', 'label'])
        
        for file in os.listdir(covidnocough_android):
            data_writer.writerow([file.split(".")[0] + ".png", 'pos'])
        for file in os.listdir(covidwithcough_android):
            data_writer.writerow([file.split(".")[0] + ".png", 'pos'])
        for file in os.listdir(covidnocough_web):
            data_writer.writerow([file.split(".")[0] + ".png", 'pos'])
        for file in os.listdir(covidwithcough_web):
            data_writer.writerow([file.split(".")[0] + ".png", 'pos'])
        for file in os.listdir(healthy_android):
            data_writer.writerow([file.split(".")[0] + ".png", 'neg'])
        for file in os.listdir(healthy_web):
            data_writer.writerow([file.split(".")[0] + ".png", 'neg'])

# Audio to image conversion

In [None]:
import matplotlib
import numpy as np
import sys
import pickle
import time
import scipy

#for loading and visualizing audio files
import librosa
import librosa.display
import pywt
import statistics

import warnings
from os import listdir
from os.path import isfile, join
import random

import pandas as pd
import subprocess
from pathlib import Path
from matplotlib import pyplot as plt
from math import sqrt

In [None]:
def audio_to_hcqt(signal, sr, fig_name, destination_folder):
    fig, ax = plt.subplots()
    
    C = np.abs(librosa.hybrid_cqt(signal, sr=sr))
    img = librosa.display.specshow(librosa.amplitude_to_db(C, ref=np.max),
                               sr=sr, x_axis='time', y_axis='cqt_note', ax=ax)
    
    plt.subplots_adjust(left=0,right=1,bottom=0,top=1)
    plt.savefig(destination_folder + fig_name, dpi = 300, frameon='false') #dpi = resolution in dots per inch

In [None]:
def audio_to_cqt(signal, sr, fig_name, destination_folder):
    fig, ax = plt.subplots()
    
    C = np.abs(librosa.cqt(signal, sr=sr))
    img = librosa.display.specshow(librosa.amplitude_to_db(C, ref=np.max),
                               sr=sr, x_axis='time', y_axis='cqt_note', ax=ax)
    
    plt.subplots_adjust(left=0,right=1,bottom=0,top=1)
    plt.savefig(destination_folder + fig_name, dpi = 300, frameon='false') #dpi = resolution in dots per inch

In [None]:
def audio_to_stft(signal, sr, fig_name, destination_folder):

    fig, ax = plt.subplots()
    X = librosa.stft(signal)
    Xdb = librosa.amplitude_to_db(abs(X))

    librosa.display.specshow(Xdb, sr=sr, cmap = 'magma', x_axis='time', y_axis='log')
    plt.subplots_adjust(left=0,right=1,bottom=0,top=1)
    plt.savefig(destination_folder + fig_name, dpi = 300, frameon='false') #dpi = resolution in dots per inch

In [None]:
def audio_to_mel(signal, sr, fig_name, destination_folder):
    fig, ax = plt.subplots()
    S = librosa.feature.melspectrogram(y=signal, sr=sr)
    S_dB = librosa.power_to_db(S, ref=np.max)
    librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='log')
    plt.subplots_adjust(left=0,right=1,bottom=0,top=1)
    plt.savefig(destination_folder + fig_name, dpi = 300, frameon='false') #dpi = resolution in dots per inch

In [None]:
## Convert to image
        
def convert_audio(path_to_folder, path_to_destination_folder, files_to_convert, transform_name):
    warnings.filterwarnings('ignore') # ignore warnings for reading audio files
    for file in os.listdir(path_to_folder):
        print(file)
        if file in files_to_convert:
            ## assuming file names is of the form: "unique_id.wav"
            audio_signal, sr = librosa.load(path_to_folder + file, sr=16000)
            
            if len(audio_signal) > 0:
                audio_to_hcqt(audio_signal, sr, file.split(".")[0] + ".png", path_to_destination_folder) # change according to the transformation to be used
              

In [None]:
def convert_Cambridge(transform_name):
    path_to_folder = "drive/MyDrive/cough/" 
    
    csv_path = "Cambridge_labels.csv"
    path_to_destination_folder = transform_name + "/"

    dir_exists = os.path.exists(path_to_destination_folder)

    if not dir_exists:
      os.makedirs(path_to_destination_folder)
    
    with open(csv_path) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        files_to_convert = []
        for idx, row in enumerate(csv_reader):
            if row[0] !='file_name' and row[0] not in os.listdir(path_to_destination_folder):
                files_to_convert.append(row[0].split(".")[0] + ".wav")

    for folders in os.listdir(path_to_folder):
        for f in os.listdir(path_to_folder + "/" + folders):            
            convert_audio(path_to_folder + "/" + folders + "/" + f + "/", path_to_destination_folder, files_to_convert, transform_name)

In [None]:
convert_Cambridge("hcqt")

In [None]:
convert_Cambridge("cqt")

In [None]:
convert_Cambridge("mel")

In [None]:
convert_Cambridge("stft")