In [1]:
from scipy.signal import butter, lfilter, freqz, filtfilt, sosfiltfilt
import scipy.io.wavfile

import librosa
import librosa.display

from pydub import AudioSegment

import numpy as np
import glob 
import os

In [2]:
# gets all file_locations in a directory 
def get_file_locations(dir_location):
    return [i for i in glob.glob(dir_location + "/*.wav")]

# loads all the files and returns a list
def load_files(file_locations):
    loaded_files = []
    for i in file_locations:
        file, sr = librosa.load(i)
        loaded_files.append(file)
        
    return loaded_files 

In [3]:
# orig_copd = get_file_locations("/Users/home/Documents/Respiratory Illness AI/Official_dataset/youtube_copd_coughs")
# orig_asthma = get_file_locations("/Users/home/Documents/Respiratory Illness AI/Official_dataset/original_coughs/Asthma_coughs_Coswara")
# orig_healthy = get_file_locations("/Users/home/Documents/Respiratory Illness AI/Official_dataset/healthy")
orig_COVID = get_file_locations("/Users/home/Documents/Respiratory Illness AI/recordings/unfiltered_COVID")

In [4]:
# functions to standardize the amplitude 
def match_target_amplitude(sound, target_dBFS):
    change_in_dBFS = target_dBFS - sound.dBFS
    return sound.apply_gain(change_in_dBFS)

def normalize(file_name, target_dBFS, location):
    sound = AudioSegment.from_file(file_name, "wav")
    normalized_sound = match_target_amplitude(sound, target_dBFS)
    normalized_sound.export(f"{location}/normalized_{os.path.basename(file_name)}", format="wav")

def normalize_amplitude(audio, location):
    for i in range (len(audio)):
        normalize(audio[i], -30, location)

In [5]:
# normalize_amplitude(orig_copd, "/Users/home/Documents/Respiratory Illness AI/normalizing")
# normalize_amplitude(orig_asthma, "/Users/home/Documents/Respiratory Illness AI/Official_dataset/normalized_coughs/asthma_coswara")
# normalize_amplitude(orig_healthy, "/Users/home/Documents/Respiratory Illness AI/Official_dataset/temporary")
normalize_amplitude(orig_COVID, "/Users/home/Documents/Respiratory Illness AI/recordings/temporary")

In [6]:
# normalized_copd = get_file_locations("/Users/home/Documents/Respiratory Illness AI/normalizing")
# normalized_asthma = get_file_locations("/Users/home/Documents/Respiratory Illness AI/Official_dataset/normalized_coughs/asthma_coswara")
# normalized_healthy = get_file_locations("/Users/home/Documents/Respiratory Illness AI/Official_dataset/temporary")
normalized_COVID = get_file_locations("/Users/home/Documents/Respiratory Illness AI/recordings/temporary")

# loaded_norm_copd = load_files(normalized_copd)
# loaded_norm_asthma = load_files(normalized_asthma, 3)
# loaded_norm_healthy = load_files(normalized_healthy)
loaded_norm_COVID = load_files(normalized_COVID)

In [7]:
# functions to determine first time value

# low pass filter
def butter_lowpass(cutoff, fs, order):
    normal_cutoff = cutoff / (0.5*fs)
    sos = butter(order, normal_cutoff,
                 btype="low", output="sos")
    return sos

def butter_lowpass_filtfilt(data, cutoff, fs, order):
    sos = butter_lowpass(cutoff, fs, order=order)
    y = sosfiltfilt(sos, data)
    return y

def get_low_pass(data, cutoff, fs, order):
    low_passed = []

    for i in range(len(data)):
        b = butter_lowpass_filtfilt(data[i], cutoff, fs, order)
        low_passed.append(b)
    
    return low_passed


# get amplitude envelope
def amplitude_envelope(signal, frame_size, hop_length):
    amplitude_envelope = []

    for i in range(0, len(signal), hop_length): 
        amplitude_envelope_current_frame = max(signal[i:i+frame_size]) 
        amplitude_envelope.append(amplitude_envelope_current_frame)
    
    return np.array(amplitude_envelope)


# calculate first time
def calculate_first_time(cough_values, t):
    segmentation_values = []
    for i, j in zip(t, cough_values):
        if j > 0.018:
            segmentation_values.append(i)
    return segmentation_values

# calls function: amplitude_envelope
def get_first_time(low_passed_data):
    first_times = []
    
    # define variables for amplitude envelope 
    frame_size = 400
    hop_length = 210
    

    for i in range(len(low_passed_data)):
        ae_data = amplitude_envelope(low_passed_data[i], frame_size, hop_length)
        
        frames1 = range(len(ae_data))
        t1 = librosa.frames_to_time(frames1, hop_length = hop_length)
        
        first_time = calculate_first_time(ae_data, t1)
        print(len(first_time), normalized_COVID[i])

        if len(first_time) == 0:
            first_times.append(0)
        else: 
            first_times.append(first_time[0])
        
    return first_times

In [8]:
cutoff = 2500
fs = 48000
order = 20

# lowpassed_copd = get_low_pass(loaded_norm_copd, cutoff, fs, order)
# lowpassed_asthma = get_low_pass(loaded_norm_asthma, cutoff, fs, order)
# lowpassed_healthy = get_low_pass(loaded_norm_healthy, cutoff, fs, order)
lowpassed_COVID = get_low_pass(loaded_norm_COVID, cutoff, fs, order)

# first_times_copd = get_first_time(lowpassed_copd)
# first_times_asthma = get_first_time(lowpassed_asthma)
# first_times_healthy = get_first_time(lowpassed_healthy)
first_times_COVID = get_first_time(lowpassed_COVID)

103 /Users/home/Documents/Respiratory Illness AI/recordings/temporary/normalized_b97f011c-062b-4fdd-a65d-4f1d8ab1bd09.wav
115 /Users/home/Documents/Respiratory Illness AI/recordings/temporary/normalized_BDygu23lyUbBq2NlqfzRxGoMm9B2-1877.wav
55 /Users/home/Documents/Respiratory Illness AI/recordings/temporary/normalized_C1jXleNkfzaI6sbPJL883EGGRLj2-2161.wav
210 /Users/home/Documents/Respiratory Illness AI/recordings/temporary/normalized_AVbhXf3KiUf9ZmkjvWzDOxevAPH2-1974.wav
165 /Users/home/Documents/Respiratory Illness AI/recordings/temporary/normalized_bbe5b8d6-21cf-4718-b263-0fb779d13784.wav
278 /Users/home/Documents/Respiratory Illness AI/recordings/temporary/normalized_b3add103-8854-4aba-8448-18361df4dac2.wav
129 /Users/home/Documents/Respiratory Illness AI/recordings/temporary/normalized_BT453L7uDzL5IH0BOlDRJhWZWEL2-2518.wav
371 /Users/home/Documents/Respiratory Illness AI/recordings/temporary/normalized_b7dc909a-c1e6-4c75-b312-060b1ff136e0.wav
595 /Users/home/Documents/Respiratory

142 /Users/home/Documents/Respiratory Illness AI/recordings/temporary/normalized_az5zELVN7ObicxGrBLMoX8ki1LF2-2226.wav
87 /Users/home/Documents/Respiratory Illness AI/recordings/temporary/normalized_b1945513-b993-4398-95b8-15c742513e12.wav
169 /Users/home/Documents/Respiratory Illness AI/recordings/temporary/normalized_BT453L7uDzL5IH0BOlDRJhWZWEL2-2755.wav
200 /Users/home/Documents/Respiratory Illness AI/recordings/temporary/normalized_bx564wHvi9d0HyJTtdi0haZuU493-520.wav
216 /Users/home/Documents/Respiratory Illness AI/recordings/temporary/normalized_c54e2543-7e25-4a57-83fe-6b227220d00c.wav
383 /Users/home/Documents/Respiratory Illness AI/recordings/temporary/normalized_bcaf041b-32c6-4c56-b9a3-1d76d1afc99f.wav
281 /Users/home/Documents/Respiratory Illness AI/recordings/temporary/normalized_bac8326b-2652-443d-8b58-af96cdf5284d.wav
115 /Users/home/Documents/Respiratory Illness AI/recordings/temporary/normalized_b9d3a42e-caae-49a0-87c0-9e766112ee5d.wav
192 /Users/home/Documents/Respirato

111 /Users/home/Documents/Respiratory Illness AI/recordings/temporary/normalized_akdaeGnDlhgMKnIjAhpslMiMJO43-3619.wav
198 /Users/home/Documents/Respiratory Illness AI/recordings/temporary/normalized_bH6WQVvClVRDer6iOFvaGiz0luf1-3533.wav
328 /Users/home/Documents/Respiratory Illness AI/recordings/temporary/normalized_c1d86c76-5622-4dbb-9da1-65aebf6a0f0b.wav
88 /Users/home/Documents/Respiratory Illness AI/recordings/temporary/normalized_b74176fe-cc78-4daa-8541-10ba8968439d.wav
127 /Users/home/Documents/Respiratory Illness AI/recordings/temporary/normalized_be265d6b-9bc3-4499-a099-940ad4479dde.wav


In [10]:
# segment normalized coughs
def segment_signal(normalized_filenames, original_filenames, t1, location):
    for i in range(len(normalized_filenames)):
        time_1 = t1[i] * 1000
        time_2 = int(time_1) + 330
        sound = AudioSegment.from_wav(normalized_filenames[i])
        
        name = os.path.basename(original_filenames[i])
        
        new = sound[time_1:time_2]
        new.export(f"{location}/segmented_{name}", format="wav")

In [11]:
segment_signal(normalized_COVID, 
               orig_COVID, 
               first_times_COVID, 
               "/Users/home/Documents/Respiratory Illness AI/recordings/unfiltered_COVID")

# segment_signal(normalized_asthma, 
#                orig_asthma, 
#                first_times_asthma, 
#                "/Users/home/Documents/Respiratory Illness AI/Official_dataset/segmented_coughs/asthma_coswara")

In [None]:
for i in filenames:
    y, sr = librosa.load(i)
    print(librosa.get_duration(y))