In [1]:
import os
import pandas as pd
import librosa
import librosa.display
import numpy as np
from tqdm.notebook import tqdm
import webrtcvad
from matplotlib import pyplot as plt
import soundfile
import pickle

In [2]:
import sys
sys.path.append(r'../../..')
import VAT

In [3]:
### Setup paths and directories
dataset_path = ".."
rawdata_path = os.path.join(dataset_path, "Extracted_data")
out_path = "raw"
metadata_path = os.path.join(dataset_path, "combined_data.csv")

In [4]:
metadata = pd.read_csv(metadata_path, sep = ",")

In [5]:
metadata["covid"] = 'X'
metadata.loc[metadata["covid_status"] == "healthy", "covid"] = 0
metadata.loc[(metadata["covid_status"] == "positive_mild") | (metadata["covid_status"] == "positive_moderate"), "covid"] = 1
metadata = metadata.loc[metadata["covid"] != 'X']

In [6]:
print(metadata[['covid','id']].groupby(['covid']).count().rename(columns={'id':'N_entries'}))

       N_entries
covid           
0           1433
1            591


In [7]:
# Balance the number of each classes
num = len(metadata.loc[metadata["covid"] == 1])

balanced_data = pd.concat([
    metadata.loc[metadata["covid"] == 1],
    metadata.loc[metadata["covid"] == 0].sample(n = num)
    # metadata.loc[metadata["covid"] == 2].sample(n = num)
])

print(balanced_data[['covid','id']].groupby(['covid']).count().rename(columns={'id':'N_entries'}))

       N_entries
covid           
0            591
1            591


In [18]:
dataVAT = {
    "filename": [],
    "signal": [],
    "label": []
}

sampleRate = 48000

for uuid, covid in tqdm(zip(balanced_data["id"].values, balanced_data["covid"].values), total = len(balanced_data)):

    file_path = os.path.join(rawdata_path, uuid, "cough-heavy.wav")
    
    if not os.path.exists(file_path):
        print(f"could not find audio file for uuid: {uuid}")
        continue

    signal, sr = librosa.load(file_path, sr = None)

    if signal.size == 0:
        print("Empty audio")
        continue

    if sr != sampleRate:
        print("Resampling...")
        signal = librosa.resample(signal, orig_sr = sr, target_sr = sampleRate)

    pcm16 = VAT.float_to_pcm16(signal)
    frame_duration = 0.01
    vad_mode = 3
    vad_res = VAT.detectVoiceActivity(pcm16, sampleRate, frame_duration, vad_mode)
    sample, start = VAT.sampleVoiceActivity(signal, vad_res, frame_duration, sampleRate)

    if sample is None:
        continue

    dataVAT["filename"].append(uuid)
    dataVAT["signal"].append(sample)
    dataVAT["label"].append(covid)


  0%|          | 0/1182 [00:00<?, ?it/s]

ERROR: No active voice detected.
ERROR: No active voice detected.
ERROR: No active voice detected.
ERROR: No active voice detected.
ERROR: No active voice detected.
ERROR: No active voice detected.
Empty audio
ERROR: No active voice detected.
ERROR: No active voice detected.
ERROR: No active voice detected.
ERROR: No active voice detected.
Resampling...
ERROR: No active voice detected.
Resampling...
Resampling...
ERROR: No active voice detected.
ERROR: No active voice detected.
ERROR: No active voice detected.
Resampling...
ERROR: No active voice detected.
Resampling...
ERROR: No active voice detected.
ERROR: No active voice detected.
Resampling...
Resampling...
ERROR: No active voice detected.
Resampling...
Resampling...
ERROR: No active voice detected.
ERROR: No active voice detected.
ERROR: No active voice detected.
ERROR: No active voice detected.
ERROR: No active voice detected.
ERROR: No active voice detected.
ERROR: No active voice detected.
ERROR: No active voice detected.
ERRO

In [19]:
print("covid: 0, {}".format(np.sum(np.asarray(dataVAT["label"]) == 0)))
print("covid: 1, {}".format(np.sum(np.asarray(dataVAT["label"]) == 1)))

covid: 0, 539
covid: 1, 553


In [20]:
signals = np.asarray(dataVAT["signal"])
signals.shape

(1092, 48000)

In [21]:
with open("vat_48000.pkl", "wb") as f:
    pickle.dump(dataVAT, f)