# Load Libraries

In [None]:
import warnings
warnings.filterwarnings('ignore')

import ast
import os
import time
import csv
import librosa
import librosa.display

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from IPython.display import Audio, display
from tqdm import tqdm
from collections import Counter
from pprint import pprint
%matplotlib inline

from functions.functions_cough import (
    get_cough, 
    convert_events_to_seconds, 
    label_generator
    )

# Load data

In [None]:
df_all = pd.read_csv('Results/Data/data_all.csv')
df_all

In [None]:
Counter(df_all['dataset'].tolist())

# Details

In [None]:
list_dataset_name = [
    'coswara', 
    'coughvid', 
    'esc50', 
    'fsdkaggle', 
    'virufy',
    ]

segment_length = 0.1
fs = 22050

# Print Cough Examples

In [None]:
for dataset_name in list_dataset_name:
    print(dataset_name)
    
    df_all = pd.read_csv(f'Results/Data/data_summary_{dataset_name}.csv')
    df_all = df_all[df_all['label']==0].reset_index(drop=True)
    df_all['label_onset'] = ''
        
    total_len = len(df_all)
    if total_len > 1:
        total_len = 1

    for i in tqdm(range(total_len)):

        filepath = df_all['filepath'][i] # Audio path
        dataset = df_all['dataset'][i] # Dataset name
        filename = df_all['filename'][i]
        
        label = df_all['label'][i]
        age = df_all['age'][i]
        gender = df_all['gender'][i]
        status = df_all['status'][i]

        print(f'{dataset} {filename} {label}')
        
        # try:
        if True:
            if label == 1:
                title = 'Cough Audio Waveform'
            else:
                title = 'Non-Cough Audio Waveform'
            
            (y, sr) = librosa.load(filepath) # mono=True
            duration = librosa.get_duration(y=y, sr=sr)

            # Create a figure with subplots
            fig, ax = plt.subplots(figsize=(6, 2))
            
            # Plot waveform
            librosa.display.waveshow(y, sr=sr, ax=ax)
            ax.set_title(title)
            ax.set_xlabel('Time (s)')
            ax.set_ylabel('Amplitude')
            ax.grid(True)
            ax.minorticks_on()  # Enable minor ticks for finer control

            plt.tight_layout()
            plt.show()
    
            # Play audio
            display(Audio(data=y, rate=sr))
        # except:
        #     pass

# Print Audio + Absolute Curve + Spectrogram

In [None]:
for dataset_name in list_dataset_name:
    print(dataset_name)
    
    df_all = pd.read_csv(f'Results/Data/data_summary_{dataset_name}.csv')
    df_all = df_all[df_all['label']==1].reset_index(drop=True)
    df_all['label_onset'] = ''
        
    total_len = len(df_all)
    if total_len > 1:
        total_len = 1

    for i in tqdm(range(total_len)):

        filepath = df_all['filepath'][i] # Audio path
        dataset = df_all['dataset'][i] # Dataset name
        filename = df_all['filename'][i]
        
        label = df_all['label'][i]
        age = df_all['age'][i]
        gender = df_all['gender'][i]
        status = df_all['status'][i]

        print(f'{dataset} {filename} {label}')
        
        # try:
        if True:
            (y, sr) = librosa.load(filepath) # mono=True
            duration = librosa.get_duration(y=y, sr=sr)
            
            (
                cough_events, 
                # low_events, 
                silent_events, 
                hop_length, 
                energy,
                threshold_cough) = get_cough(y, segment_length, fs)
            
            # Create a figure with subplots
            fig, axs = plt.subplots(3, 1, figsize=(6, 5), sharex=True)

            if label == 1:
                title = 'Cough Audio Waveform'
            else:
                title = 'Non-Cough Audio Waveform'
            
            # Plot waveform
            librosa.display.waveshow(y, sr=sr, ax=axs[0])
            axs[0].set_title(title)
            axs[0].set_ylabel('Amplitude')
            axs[0].grid(True)
    
            # Energy Curve
            times = np.arange(len(energy)) * hop_length / sr
            axs[1].plot(times, energy, color='purple')
            axs[1].set_title('Absolute Amplitude Curve')
            axs[1].set_ylabel('|Amplitude|')
            axs[1].grid(True)
            
            # Plot spectrogram
            D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
            img = librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log', ax=axs[2])
            axs[2].set_title('Spectrogram (dB)')
            axs[2].set_xlabel('Time (s)')
            axs[2].set_ylabel('Frequency (Hz)')
            
            reduced_ticks = [0, 128, 1024, 8192]  # fewer, spaced-out ticks
            axs[2].set_yticks(reduced_ticks)
            axs[2].set_yticklabels([str(t) for t in reduced_ticks])

            # Add colour bar below the spectrogram
            cbar = fig.colorbar(img, ax=axs[2], orientation='horizontal', pad=0.3)
            cbar.set_label('Amplitude (dB)')

            plt.tight_layout()
            plt.show()
    
            # Play audio
            display(Audio(data=y, rate=sr))
        # except:
        #     pass

# Print Audio + Absolute Curve + Highlights

In [None]:
for dataset_name in list_dataset_name:
    print(dataset_name)
    
    df_all = pd.read_csv(f'Results/Data/data_summary_{dataset_name}.csv')
    df_all = df_all[df_all['label']==1].reset_index(drop=True)
    df_all['label_onset'] = ''
        
    total_len = len(df_all)
    if total_len > 1:
        total_len = 3

    for i in tqdm(range(total_len)):

        filepath = df_all['filepath'][i] # Audio path
        dataset = df_all['dataset'][i] # Dataset name
        filename = df_all['filename'][i]
        
        label = df_all['label'][i]
        age = df_all['age'][i]
        gender = df_all['gender'][i]
        status = df_all['status'][i]

        print(f'{dataset} {filename} {label}')
        
        # try:
        if True:
            (y, sr) = librosa.load(filepath) # mono=True
            duration = librosa.get_duration(y=y, sr=sr)
    
            (
                cough_events, 
                # low_events, 
                silent_events, 
                hop_length, 
                energy,
                threshold_cough) = get_cough(y, segment_length, fs)

            cough_events_pp = convert_events_to_seconds(cough_events, segment_length, hop_length, sr)
    
            time_intervals, labels = label_generator(cough_events_pp, duration, segment_length)
            df_all['label_onset'][i] = labels
    
            # Create a figure with subplots
            fig, axs = plt.subplots(2, 1, figsize=(6, 4), sharex=True)

            if label == 1:
                title = 'Cough Audio Waveform'
            else:
                title = 'Non-Cough Audio Waveform'
            
            # Plot waveform
            librosa.display.waveshow(y, sr=sr, ax=axs[0])
            axs[0].set_title(title)
            axs[0].set_ylabel('Amplitude')
            axs[0].grid(True)
            axs[0].minorticks_on()  # Enable minor ticks for finer control
    
            # Energy Curve
            times = np.arange(len(energy)) * hop_length / sr
            axs[1].plot(times, energy, color='blue', label='Mean Absolute')
            axs[1].axhline(y=threshold_cough, color='black', linestyle='dotted', linewidth=2, label='Dotted Line')  # Dotted 
            axs[1].set_title('Absolute Curve')
            axs[1].set_xlabel('Time (s)')
            axs[1].set_ylabel('|Amplitude|')
            axs[1].grid(True)
            plt.legend(loc='upper right')
            
    
            alpha = 0.3
            
            # Highlight detected cough events on waveform
            def highlight(events, color, label):
                for start, end in events:
                    t_start = start * hop_length / sr
                    t_end = (end + 1) * hop_length / sr
                    axs[0].axvspan(t_start, t_end, color=color, alpha=alpha, label=label)
        
            highlight(silent_events, 'green', 'Silent')
            highlight(cough_events, 'red', 'Cough')
    
            # Add legend to top-right of the waveform plot
            handles = [
                plt.Line2D([0], [0], color='green', lw=4, alpha=alpha, label='Silent'),
                plt.Line2D([0], [0], color='red', lw=4, alpha=alpha, label='Cough'),
            ]
            axs[0].legend(
                handles=handles, 
                loc='upper right',
                fontsize='small',       # or use a numeric value like 8
                handlelength=1.5,       # shorten the length of the legend lines
                frameon=True)
            
            plt.tight_layout()
            plt.show()
    
            # Play audio
            display(Audio(data=y, rate=sr))
        # except:
        #     pass