<center> 

# **Cough sound analysis using Deep Learning methods for COVID-19 diagnosis**

### Division of Information Transmission Systems and Material Technology

## Christina Ntourma
</center>

# Imports

In [None]:
import matplotlib
import subprocess
import numpy as np
import csv
import json
import os

import sys
import pickle
import time

#for loading and visualizing audio files
import librosa
import librosa.display
import pywt
import statistics

import warnings
from os import listdir
from os.path import isfile, join
import random

import pandas as pd
import subprocess
from pathlib import Path
from matplotlib import pyplot as plt
from math import sqrt

# Data Preprocessing

## Download COUGHVID dataset (version 2.0)
Download dataset directly from zenodo-useful for running using google colab

In [None]:
!pip install zenodo-get
DOI = '10.5281/zenodo.4498364' # DOI for downloading the correct version from zenodo
!zenodo_get $DOI

Collecting zenodo-get
  Downloading zenodo_get-1.3.2-py2.py3-none-any.whl (17 kB)
Collecting wget
  Downloading wget-3.2.zip (10 kB)
Building wheels for collected packages: wget
  Building wheel for wget (setup.py) ... [?25l[?25hdone
  Created wheel for wget: filename=wget-3.2-py3-none-any.whl size=9672 sha256=bde8bd7216bd088630f8aa9999d98b9f2e35a6d857e8576fb04450d90d5a7170
  Stored in directory: /root/.cache/pip/wheels/a1/b6/7c/0e63e34eb06634181c63adacca38b79ff8f35c37e3c13e3c02
Successfully built wget
Installing collected packages: wget, zenodo-get
Successfully installed wget-3.2 zenodo-get-1.3.2
Title: The COUGHVID crowdsourcing dataset: A corpus for the study of large-scale cough analysis algorithms
Keywords: COVID-19, cough sound database, automatic cough classification
Publication date: 2021-02-03
DOI: 10.5281/zenodo.4498364
Total size: 1271.1 MB

Link: https://zenodo.org/api/files/9c9fadb1-c9ed-4cfa-a0c3-1209d2c93c10/public_dataset.zip   size: 1271.1 MB

Checksum is correct. (1

In [None]:
!unzip "/content/public_dataset.zip" 

[1;30;43mΗ έξοδος ροής περικόπηκε στις τελευταίες 5000 γραμμές.[0m
  inflating: public_dataset/e8e7a8fc-a199-459b-8b47-96aae07cafdc.webm  
  inflating: public_dataset/e8eaf768-9997-4a63-ad2e-4a01c5adee86.json  
  inflating: public_dataset/e8eaf768-9997-4a63-ad2e-4a01c5adee86.webm  
  inflating: public_dataset/e8ebf8aa-107c-4f94-b24b-526e4f5f1b64.json  
  inflating: public_dataset/e8ebf8aa-107c-4f94-b24b-526e4f5f1b64.ogg  
  inflating: public_dataset/e8f0479e-9e75-41e7-806d-dfad3b86ac82.json  
  inflating: public_dataset/e8f0479e-9e75-41e7-806d-dfad3b86ac82.webm  
  inflating: public_dataset/e8f243e4-d456-4b40-bf1e-b6bab899453f.json  
  inflating: public_dataset/e8f243e4-d456-4b40-bf1e-b6bab899453f.webm  
  inflating: public_dataset/e8f45f08-6e1d-4c06-ae96-ce9fe43c91d1.json  
  inflating: public_dataset/e8f45f08-6e1d-4c06-ae96-ce9fe43c91d1.webm  
  inflating: public_dataset/e8f4b378-f21c-40a6-add1-2831323f544e.json  
  inflating: public_dataset/e8f4b378-f21c-40a6-add1-2831323f544e.web

## Preprocessing

In [None]:
# For EPFL dataset: convert .ogg and .webm files to .wav files
def convert_files(folder):
    """Convert files from .webm and .ogg to .wav
    folder: path to coughvid database and metadata_compiled csv"""
    %%shell
    df = pd.read_csv(folder + 'metadata_compiled.csv')
    names_to_convert = df.uuid.to_numpy()
    for counter, name in enumerate(names_to_convert):
        if (counter%1000 == 0):
            print("Finished {0}/{1}".format(counter,len(names_to_convert)))
        if os.path.isfile(folder + name + '.webm'):
            path_to_old_file = folder + name + ".webm"
            path_to_new_file = folder + name + ".wav"
            !ffmpeg -hide_banner -i $path_to_old_file $path_to_new_file
            #subprocess.call(["ffmpeg", "-i", folder+name+".webm", folder+name+".wav"], shell=True)
        elif os.path.isfile(folder + name + '.ogg'):
            path_to_old_file = folder + name + ".ogg"
            path_to_new_file = folder + name + ".wav"
            !ffmpeg -hide_banner -i $path_to_old_file $path_to_new_file
            #subprocess.call(["ffmpeg", "-i", folder+name+".ogg", folder+name+".wav"], shell=True)
        else:
            print("Error: No file name {0}".format(name))

In [None]:
convert_files('public_dataset/')

[1;30;43mΗ έξοδος ροής περικόπηκε στις τελευταίες 5000 γραμμές.[0m
Output #0, wav, to 'public_dataset/fcf92aa7-26ea-4cd1-a771-18b5f3024342.wav':
  Metadata:
    ISFT            : Lavf57.83.100
    Stream #0:0(eng): Audio: pcm_s16le ([1][0][0][0] / 0x0001), 48000 Hz, mono, s16, 768 kb/s (default)
    Metadata:
      encoder         : Lavc57.107.100 pcm_s16le
size=     405kB time=00:00:04.31 bitrate= 768.2kbits/s speed= 339x    
video:0kB audio:405kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.018808%
Input #0, matroska,webm, from 'public_dataset/fcfa2d24-1802-464c-9c0b-3497f85892cc.webm':
  Metadata:
    encoder         : Chrome
  Duration: N/A, start: 0.000000, bitrate: N/A
    Stream #0:0(eng): Audio: opus, 48000 Hz, mono, fltp (default)
Stream mapping:
  Stream #0:0 -> #0:0 (opus (native) -> pcm_s16le (native))
Press [q] to stop, [?] for help
Output #0, wav, to 'public_dataset/fcfa2d24-1802-464c-9c0b-3497f85892cc.wav':
  Metadata:
    ISFT            : Lavf

In [None]:
# Samples with cough detected 
csv_path = "public_dataset/metadata_compiled.csv" # path to "metadata_compiled.csv" which is provided with the dataset and contains metadata information about the samples

with open(csv_path) as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')

    cough = []
    cough_ids = []

    for row in csv_reader:
        if row[2] != 'cough_detected':
            if float(row[2]) >= 0.8: # samples with p>=0.8 are considered to cantain cough
                cough.append(float(row[2]))
                cough_ids.append(row[0])
        
    for i in cough_ids:
        if cough_ids.count(i) != 1:
                print(i, "occurs more than once")
                
    print("Cough samples:", cough_ids)
    print(len(cough), "samples containing cough were detected")

Cough samples: ['00039425-7f3a-42aa-ac13-834aaa2b6b92', '0009eb28-d8be-4dc1-92bb-907e53bc5c7a', '001328dc-ea5d-4847-9ccf-c5aa2a3f2d0f', '0028b68c-aca4-4f4f-bb1d-cb4ed5bbd952', '00291cce-36a0-4a29-9e2d-c1d96ca17242', '0029d048-898a-4c70-89c7-0815cdcf7391', '002d28bc-7806-4dfb-9c9b-afa8cb623cac', '002db0bd-e57f-4c30-ade0-16640d424eb7', '0033d1d5-ae8c-42f6-a557-822d6ab691cf', '00343395-c86c-47e3-a10f-fa3036f4572f', '0037f67c-0d6f-42e6-ab94-66499d4d2bf4', '003c7941-a3ef-4daf-b8a8-5c77c82504b0', '003e1bf6-2ecf-4a00-9198-29db34e8d7ac', '00420d07-89f8-464d-88dd-8e893f0c3f56', '0044964d-f7e9-43e3-831f-ae3e6374a9a4', '0044cb7b-448c-44e5-8302-ad8bd106fe3e', '004c24d8-e8cd-4755-86f6-5a1d8c7920c7', '005887c9-4bb1-4f13-86b2-1c7b3cee0881', '005ae1c2-7bc2-44c9-bc88-cf0af4ca762f', '005b8518-03ba-4bf5-86d2-005541442357', '005bca13-18d1-4514-a086-cc8c3fafeeb2', '005d63f8-a8e2-4684-a2bc-d65ffbb13707', '0063937c-5758-4d7b-803f-0c53f102e5fb', '0066b126-104a-45a6-a88e-0697c6baa0aa', '006d8d1c-2bf6-46a6-8ef2

### Experts' annotation

In [None]:
csv_path = "public_dataset/metadata_compiled.csv" # path to "metadata_compiled.csv" which is provided with the dataset and contains metadata information about the samples

In [None]:
# Find the cough samples that have been labeled by an expert at least once
# If the diagnosis field does not contain any value then the sample is either not labeled or it did not contain cough sound

with open(csv_path) as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')

    labeled_samples = []

    for row in csv_reader:
      if row[0] in cough_ids:
        if (row[19] != 'diagnosis_1' and row[19] != '') or (row[29] != 'diagnosis_2' and row[29] != '') or (row[39] != 'diagnosis_3' and row[39] != '') or (row[49] != 'diagnosis_4' and row[49] != ''): 
          labeled_samples.append(row[0])

    print(len(labeled_samples), "samples were labeled by an expert")

2804 samples were labeled by an expert


In [None]:
# Cough samples labeled by experts 

with open(csv_path) as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')

    expert_1 = {} # dictionary: key: userID, value: (status given by user, label given by corresponding expert)
    expert_2 = {}
    expert_3 = {}
    expert_4 = {}
    COVID_labeled = []

    for row in csv_reader:
      if row[0] in cough_ids:
        if row[19] != 'diagnosis_1' and row[19] != '': 
          expert_1[row[0]] = (row[10], row[19]) # row[10]--> status declared by user, row[19]-->diagnosis given by expert 1
          if row[19] == 'COVID-19':
            COVID_labeled.append(row[0])

        if row[29] != 'diagnosis_2' and row[29] != '': 
          expert_2[row[0]] = (row[10], row[29])
          if row[29] == 'COVID-19' and row[0] not in COVID_labeled:
            COVID_labeled.append(row[0])

        if row[39] != 'diagnosis_3' and row[39] != '': 
          expert_3[row[0]] = (row[10], row[39])
          if row[39] == 'COVID-19' and row[0] not in COVID_labeled:
            COVID_labeled.append(row[0])

        if row[49] != 'diagnosis_4' and row[49] != '': 
          expert_4[row[0]] = (row[10], row[49])
          if row[49] == 'COVID-19' and row[0] not in COVID_labeled:
            COVID_labeled.append(row[0])

In [None]:
with open(csv_path) as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    contradictions = []
    
    labels = {}
    for row in csv_reader:
      diagnosis = []
      # if the sample has been labelled by all experts
      if row[0] in expert_1.keys() and row[0] in expert_2.keys() and row[0] in expert_3.keys() and row[0] in expert_4.keys():
        # diagnosis list contains the diagnosis of each expert
        diagnosis.append(expert_1[row[0]][1])
        diagnosis.append(expert_2[row[0]][1])
        diagnosis.append(expert_3[row[0]][1])
        diagnosis.append(expert_4[row[0]][1])
        # if all 4 experts had given the same diagnosis then the list would contain the same value 4 times
        if diagnosis.count(diagnosis[0]) < 4:
          contradictions.append(row[0])
          labels[row[0]] = diagnosis[3] # expert 4 has the best precision
        else:
          labels[row[0]] = diagnosis[0] # all experts have given the same diagnosis
        

      elif row[0] in expert_1.keys() and row[0] in expert_2.keys() and row[0] in expert_3.keys():
        diagnosis.append(expert_1[row[0]][1])
        diagnosis.append(expert_2[row[0]][1])
        diagnosis.append(expert_3[row[0]][1])
        if diagnosis.count(diagnosis[0]) < 3:
          contradictions.append(row[0])
          labels[row[0]] = diagnosis[2] # expert 3 has the best precision
        else:
          labels[row[0]] = diagnosis[0] # all experts have given the same diagnosis


      elif row[0] in expert_1.keys() and row[0] in expert_2.keys() and row[0] in expert_4.keys():
        diagnosis.append(expert_1[row[0]][1])
        diagnosis.append(expert_2[row[0]][1])
        diagnosis.append(expert_4[row[0]][1])
        if diagnosis.count(diagnosis[0]) < 3:
          contradictions.append(row[0])
          labels[row[0]] = diagnosis[3] # expert 4 has the best precision
        else:
          labels[row[0]] = diagnosis[0] # all experts have given the same diagnosis


      elif row[0] in expert_2.keys() and row[0] in expert_3.keys() and row[0] in expert_4.keys():
        diagnosis.append(expert_2[row[0]][1])
        diagnosis.append(expert_3[row[0]][1])
        diagnosis.append(expert_4[row[0]][1])
        if diagnosis.count(diagnosis[0]) < 3:
          contradictions.append(row[0])
          labels[row[0]] = diagnosis[3] # expert 4 has the best precision
        else:
          labels[row[0]] = diagnosis[0] # all experts have given the same diagnosis


      elif row[0] in expert_1.keys() and row[0] in expert_3.keys() and row[0] in expert_4.keys():
        diagnosis.append(expert_1[row[0]][1])
        diagnosis.append(expert_3[row[0]][1])
        diagnosis.append(expert_4[row[0]][1])
        if diagnosis.count(diagnosis[0]) < 3:
          contradictions.append(row[0])
          labels[row[0]] = diagnosis[3] # expert 4 has the best precision
        else:
          labels[row[0]] = diagnosis[0] # all experts have given the same diagnosis
      
      elif row[0] in expert_1.keys() and row[0] in expert_2.keys():
        diagnosis.append(expert_1[row[0]][1])
        diagnosis.append(expert_2[row[0]][1])
        if diagnosis.count(diagnosis[0]) < 2:
          contradictions.append(row[0])
          labels[row[0]] = diagnosis[1] # expert 2 has the best precision
        else:
          labels[row[0]] = diagnosis[0] # all experts have given the same diagnosis


      elif row[0] in expert_1.keys() and row[0] in expert_3.keys():
        diagnosis.append(expert_1[row[0]][1])
        diagnosis.append(expert_3[row[0]][1])
        if diagnosis.count(diagnosis[0]) < 2:
          contradictions.append(row[0])
          labels[row[0]] = diagnosis[2] # expert 3 has the best precision
        else:
          labels[row[0]] = diagnosis[0] # all experts have given the same diagnosis


      elif row[0] in expert_1.keys() and row[0] in expert_4.keys():
        diagnosis.append(expert_1[row[0]][1])
        diagnosis.append(expert_4[row[0]][1])
        if diagnosis.count(diagnosis[0]) < 2:
          contradictions.append(row[0])
          labels[row[0]] = diagnosis[3] # expert 4 has the best precision
        else:
          labels[row[0]] = diagnosis[0] # all experts have given the same diagnosis


      elif row[0] in expert_2.keys() and row[0] in expert_3.keys():
        diagnosis.append(expert_2[row[0]][1])
        diagnosis.append(expert_3[row[0]][1])
        if diagnosis.count(diagnosis[0]) < 2:
          contradictions.append(row[0])
          labels[row[0]] = diagnosis[2] # expert 3 has the best precision
        else:
          labels[row[0]] = diagnosis[0] # all experts have given the same diagnosis


      elif row[0] in expert_2.keys() and row[0] in expert_4.keys():
        diagnosis.append(expert_2[row[0]][1])
        diagnosis.append(expert_4[row[0]][1])
        if diagnosis.count(diagnosis[0]) < 2:
          contradictions.append(row[0])
          labels[row[0]] = diagnosis[3] # expert 4 has the best precision
        else:
          labels[row[0]] = diagnosis[0] # all experts have given the same diagnosis


      elif row[0] in expert_3.keys() and row[0] in expert_4.keys():
        diagnosis.append(expert_3[row[0]][1])
        diagnosis.append(expert_4[row[0]][1])
        if diagnosis.count(diagnosis[0]) < 2:
          contradictions.append(row[0])
          labels[row[0]] = diagnosis[3] # expert 4 has the best precision
        else:
          labels[row[0]] = diagnosis[0] # all experts have given the same diagnosis

      elif row[0] in expert_1.keys():
        if expert_1[row[0]][1] == '': # Έλεγχος για το αν σε κάποιο δείγμα που έχει γίνει labelled μόνο από έναν expert, ο expert δεν έχει δώσει διάγνωση
          print("exp1")
        diagnosis.append(expert_1[row[0]][1])
        labels[row[0]] = diagnosis[0]
      elif row[0] in expert_2.keys():
        if expert_2[row[0]][1] == '':
          print("exp2")
        diagnosis.append(expert_2[row[0]][1])
        labels[row[0]] = diagnosis[0]
      elif row[0] in expert_3.keys():
        if expert_3[row[0]][1] == '':
          print("exp3")
        diagnosis.append(expert_3[row[0]][1])
        labels[row[0]] = diagnosis[0]
      elif row[0] in expert_4.keys():
        if expert_4[row[0]][1] == '':
          print("exp4")
        diagnosis.append(expert_4[row[0]][1])
        labels[row[0]] = diagnosis[0]

print("Files with contradictions:", contradictions)
print("Contradictions", len(contradictions))
print("Total number of annotated samples:", len(labels))

Files with contradictions: ['01567151-7bb2-45ee-9aa8-a1332b5941ea', '01ff40e8-63e6-4570-a463-9778ea30cad7', '0f8fb3e0-1a30-4bd3-982a-24342a0bdc70', '19cb6e8b-d747-4770-8770-9211d31f2fc6', '1dd3b212-e969-4ede-a9d9-f24b711e2028', '1ed9491a-4036-4308-bc44-5036fc2e9f28', '1f02c230-3a22-4ee1-81e4-7e7237b36c03', '218f522f-d3b0-4370-a93f-e8a70d958950', '21aee478-6d13-45ea-be4d-4f29fd244798', '29426ec2-55bf-4ce0-a591-bd7ea2dd9a6c', '2cc2fd2e-6314-424a-977b-7237f935fb65', '2e4456dd-bb47-45f7-b2ef-7b2d2f2859c6', '2f651988-a4c4-4042-a123-f0cc68a961a5', '30b282bb-affd-4449-81b3-f5bcc8877bee', '30d6b101-9736-4522-a2d9-4f010d3e72b0', '314cd5c3-0030-4ea1-83b3-d6845897b903', '31e5a9cf-6a3e-43a3-8e0f-65c99f5748e9', '336b15df-b282-4956-bb68-548ec950bb2f', '3733c427-2c04-48ee-8e8d-82f38a184b04', '37ac823b-4e39-4e85-a6c1-5e1e79979a13', '38be0751-98d7-45a2-a325-4853efd0a174', '3a283026-4937-4cc3-ac9e-9d9382a56d10', '4260e6fd-0a12-48de-9bfd-1b8644c2186b', '481443c9-17a9-4194-9525-8c5fc2cd15fc', '48b287c0-5c

In [None]:
with open(csv_path) as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    status = [] # contains all possible values for status columns
    exp_1 = [] # contains all different labels given by expert 1
    exp_2 = [] # contains all different labels given by expert 2 
    exp_3 = [] # contains all different labels given by expert 3
    exp_4 = [] # contains all different labels given by expert 4

    for row in csv_reader:
      if row[10] not in status:
        status.append(row[10])
      if row[19] not in exp_1:
        exp_1.append(row[19])
      if row[29] not in exp_2:
        exp_2.append(row[29])
      if row[39] not in exp_3:
        exp_3.append(row[39])
      if row[49] not in exp_4:
        exp_4.append(row[49])
    print("All possible status values:", status)
    print("All different labels given by expert 1:", exp_1)
    print("All different labels given by expert 2:", exp_2)
    print("All different labels given by expert 3:", exp_3)
    print("All different labels given by expert 4:", exp_4)

All possible status values: ['status', '', 'healthy', 'COVID-19', 'symptomatic']
All different labels given by expert 1: ['diagnosis_1', '', 'healthy_cough', 'lower_infection', 'COVID-19', 'obstructive_disease', 'upper_infection']
All different labels given by expert 2: ['diagnosis_2', '', 'lower_infection', 'COVID-19', 'obstructive_disease', 'healthy_cough', 'upper_infection']
All different labels given by expert 3: ['diagnosis_3', '', 'healthy_cough', 'lower_infection', 'upper_infection', 'obstructive_disease', 'COVID-19']
All different labels given by expert 4: ['diagnosis_4', '', 'upper_infection', 'obstructive_disease', 'lower_infection', 'healthy_cough', 'COVID-19']


In [None]:
experts = [expert_1, expert_2, expert_3, expert_4]
cnt = 1
for expert in experts:
  correct = 0
  wrong = 0
  accuracy = 0
  for (k,v) in expert.items():
    # στην περίπτωση που ο χρήστης δεν έχει δώσει status ή ο expert δεν έχει κάνει διάγνωση, το αρχείο δε λαμβάνεται υπόψη στον υπολογισμό του accuracy του expert
    if v[0] != '' and v[1] != '': 
    
      if v[0] == v[1]:
        correct = correct + 1
      elif v[0] != v[1] and v[0] == 'healthy' and v[1] == 'healthy_cough':
        correct = correct + 1
      elif v[0] != v[1] and v[0] == 'symptomatic' and (v[1] == 'lower_infection' or v[1] == 'upper_infection' or v[1] == 'obstructive_disease'):
        correct = correct + 1
      else: 
        wrong = wrong + 1
  accuracy = correct/(correct+wrong)
  print("Expert's", cnt ,"precision is", round(accuracy*100, 4) , "%")
  cnt = cnt + 1

Expert's 1 precision is 32.656 %
Expert's 2 precision is 37.2434 %
Expert's 3 precision is 40.0894 %
Expert's 4 precision is 44.026 %


### Create the labels csv for the annotated subset of the dataset

In [None]:
path_to_labels_csv = 'EPFL_small_labels.csv' # path to the new csv that will be created

with open(path_to_labels_csv, mode='w', newline='') as data:
    data_writer = csv.writer(data, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    data_writer.writerow(['file_name', 'label'])
    
    with open(csv_path, mode='r') as input_data:
        csv_reader = csv.reader(input_data, delimiter=',')
        for row in csv_reader:
          if row[0]!= 'uuid' and row[0] in labels.keys():
            if labels[row[0]] == 'COVID-19':
                data_writer.writerow([row[0] + ".png", 'pos'])
            else:
                data_writer.writerow([row[0] + ".png", 'neg'])

### Create the lables csv for the whole dataset

In [None]:
def find_COVID_labeled():
    with open(csv_path) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        COVID = []
        non_COVID = []

        for row in csv_reader:
            if row[2] != 'cough_detected' and float(row[2]) >= 0.8:
                if row[10] == 'COVID-19':
                    COVID.append(row[0])
                elif row[10]!='' and row[10]!='status':
                    non_COVID.append(row[0])
    
    return COVID, non_COVID

In [None]:
def create_labels_csv():
    COVID, non_COVID = find_COVID_labeled()
    labels_csv_path = 'EPFL_labels.csv' # path to the new csv containing the labels of the data

    with open(labels_csv_path, mode='w', newline='') as data:
        data_writer = csv.writer(data, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        data_writer.writerow(['file_name', 'label'])
        
        with open(csv_path, mode='r') as input_data:
            csv_reader = csv.reader(input_data, delimiter=',')
            for row in csv_reader:
                    if row[0] in COVID:
                        data_writer.writerow([row[0] + ".png", 'pos'])
                    elif row[0] in non_COVID:
                        data_writer.writerow([row[0] + ".png", 'neg'])

In [None]:
create_labels_csv()

# Audio to image conversion

In [None]:
def audio_to_mel(signal, sr, fig_name, destination_folder):
    fig, ax = plt.subplots()
    S = librosa.feature.melspectrogram(y=signal, sr=sr)
    S_dB = librosa.power_to_db(S, ref=np.max)
    img = librosa.display.specshow(S_dB, x_axis='time', y_axis='mel', sr=sr, fmax=20000, ax=ax)
    plt.subplots_adjust(left=0,right=1,bottom=0,top=1)
    plt.savefig(destination_folder + fig_name, dpi = 300, frameon='false')

In [None]:
def audio_to_hcqt(signal, sr, fig_name, destination_folder):
    fig, ax = plt.subplots()
    C = np.abs(librosa.hybrid_cqt(signal, sr=sr))
    img = librosa.display.specshow(librosa.amplitude_to_db(C, ref=np.max),
                               sr=sr, x_axis='time', y_axis='cqt_note', ax=ax)
    
    plt.subplots_adjust(left=0,right=1,bottom=0,top=1)
    plt.savefig(destination_folder + fig_name, dpi = 300, frameon='false') #dpi = resolution in dots per inch

In [None]:
def audio_to_stft(signal, sr, fig_name, destination_folder):

    fig, ax = plt.subplots()
    X = librosa.stft(signal)
    Xdb = librosa.amplitude_to_db(abs(X))

    librosa.display.specshow(Xdb, sr=sr, cmap = 'magma', x_axis='time', y_axis='log')
    plt.subplots_adjust(left=0,right=1,bottom=0,top=1)
    plt.savefig(destination_folder + fig_name, dpi = 300, frameon='false') #dpi = resolution in dots per inch

In [None]:
def audio_to_cqt(signal, sr, fig_name, destination_folder):
    fig, ax = plt.subplots()
    
    C = np.abs(librosa.cqt(signal, sr=sr))
    img = librosa.display.specshow(librosa.amplitude_to_db(C, ref=np.max),
                               sr=sr, x_axis='time', y_axis='cqt_note', ax=ax)
    
    plt.subplots_adjust(left=0,right=1,bottom=0,top=1)
    plt.savefig(destination_folder + fig_name, dpi = 1000, frameon='false') #dpi = resolution in dots per inch

In [None]:
dataset = "EPFL"

In [None]:
## Convert to image
        
def convert_audio(path_to_folder, path_to_destination_folder, files_to_convert, transform_name, dataset):
    warnings.filterwarnings('ignore') # ignore warnings for reading audio files
    for file in os.listdir(path_to_folder):
      if file.split(".")[1] == "wav":
        
        i = file.split(".")[0]
        
        if i in files_to_convert:
            image_name = file.split(".")[0] ## assuming file names is of the form: "unique_id.wav"
            audio_signal, sr = librosa.load(path_to_folder + file, sr=None)
            print(file)
            if len(audio_signal) > 0:
                audio_to_hcqt(audio_signal, sr, image_name, path_to_destination_folder) #### Change according to the transformation to be used
          

In [None]:
def convert_EPFL(transform_name):
    path_to_folder = "public_dataset/"  # path to sound files

    # path to csv file that will be used in order for the appropriate samples to be converted
    # could be either for the annotated subset or for the whole dataset
    csv_path = "EPFL_small_labels.csv" 

    path_to_destination_folder = "drive/MyDrive/" + transform_name + "_small/"
    dataset = "EPFL"
    
    ## create directory if it does not exist
    dir_exists = os.path.exists(path_to_destination_folder)

    if not dir_exists:
      os.makedirs(path_to_destination_folder)
    
    with open(csv_path) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        files_to_convert = []
        for idx, row in enumerate(csv_reader):
            
            if row[0] !='file_name' and row[0] not in os.listdir(path_to_destination_folder):
                files_to_convert.append(row[0].split(".")[0])
           
    convert_audio(path_to_folder, path_to_destination_folder, files_to_convert, transform_name, dataset)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Repeat for each different transformation
convert_EPFL("hcqt")

88bad32c-a9b8-427d-abb9-a9b14202805a.wav
8cb53fee-67d8-48f6-85eb-b74eab71d4bf.wav
0c4d0b5c-1007-4426-8444-e71a1c2732f3.wav
1853c64d-a058-471d-8409-84fd2afb5edd.wav
e6264c42-d888-4a82-97ac-6c5a40c1cc19.wav
efe41fcb-0a86-4fd3-ad6c-e85f4cde8550.wav
aa78817e-f5e3-470b-b516-6006c59fb1fa.wav
abf2af4d-9369-40e9-873b-73dca08ae7a1.wav
16af8331-f890-4f2e-bf49-2b42985d379e.wav
76f9d2cf-0ee1-4c38-9f49-02e59b8d6fb3.wav
6978f6b4-6485-46c2-90a3-70cf5b944868.wav
36d918eb-8f0c-4ab2-a7de-7aa27f58800c.wav
db72efa4-78a9-40e7-a450-0affcd4c96b6.wav
4ff82172-fda0-453c-80f2-1cf5aaffa8f1.wav
ba6f354e-2189-4b93-b037-c07ee2e72f40.wav
9e949ada-abdf-41f2-8d03-bbd3d1ea4754.wav
b4acf2a8-1a5d-4fcc-9f7f-26b510041553.wav
82879368-f357-4032-b87a-5268db64e247.wav
d293ce7d-1898-4c66-bfd2-c3c329e82408.wav
4ff8d59c-1714-4de5-bdd0-15e6f5bafb6b.wav
93e121aa-1c8b-4b9a-b446-cbe4ddbcdaf4.wav
6cc200db-e499-4074-906e-8c335c298fcd.wav
39dde9fb-3225-40e4-b8dd-9173f435930d.wav
5b2342d2-01f8-4ad7-bf13-217601cd3c13.wav
e206e88e-4a9e-4e