# Load Libraries

In [2]:
import warnings
warnings.filterwarnings('ignore')

import os
import time
import joblib
import json
import csv
import pathlib
import librosa
import librosa.display
import scipy.stats
from scipy.stats import skew, kurtosis
from scipy.signal import hilbert

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from tqdm.notebook import tqdm
from PIL import Image
from matplotlib import pyplot
from collections import Counter
from pydub import AudioSegment # sudo apt install ffmpeg
from pprint import pprint
%matplotlib inline

from concurrent.futures import ThreadPoolExecutor

In [3]:
def slice_audio(file_path, window_size, new_sample_rate):
    audio = AudioSegment.from_wav(file_path)
    audio = audio.set_frame_rate(new_sample_rate)  # Change sample rate
    duration = len(audio)  # Duration in milliseconds
    slices = []

    for start in range(0, duration, window_size * 1000):
        end = min(start + window_size * 1000, duration)
        slice = audio[start:end]
        slices.append(slice)

    return slices

In [4]:
list_dataset_name = [
    'coswara', 
    'coughvid', 
    'esc50', 
    'fsdkaggle', 
    'virufy',
    ]

window_size = 1  # Window size in seconds
new_sample_rate = 16000  # New sample rate in Hz

for dataset_name in list_dataset_name:
    print(dataset_name)
    
    file_path_save = f"Dataset/Sliced_Wav/{dataset_name}_{window_size}/"
    
    if not os.path.exists(file_path_save):
        os.makedirs(file_path_save)
    
    df_all = pd.read_csv(f'Results/Data/data_summary_{dataset_name}.csv')
        
    df_results = []
        
    for i in tqdm(range(len(df_all))):
        
        try:
            data_set = df_all['dataset'][i]
            file_path = df_all['filepath'][i]
            file_name = df_all['filename'][i]

            age = df_all['age'][i]
            gender = df_all['gender'][i]
            label = df_all['label'][i]
            status = df_all['status'][i]


            # Get slice of audio
            slices = slice_audio(file_path, window_size, new_sample_rate)

            for j, slice in enumerate(slices):
                file_name_save = f"{file_name}_{j}.wav"
                slice.export(file_path_save + file_name_save, format="wav")

                results = [data_set, 
                           file_path, file_name, 
                           file_path_save + file_name_save, file_name_save,
                           age, gender, label, status]

                df_results.append(results)
                
        except Exception as error:
            pass
#             print(error)
                  
    columns = ['dataset', 
               'filepath', 'filename',
               'filepathslice', 'filenameslice',
               'age', 'gender', 'label', 'status']
    
    df_results = pd.DataFrame(df_results, columns=columns)
    df_results.to_csv(f'Dataset/Sliced_Wav/dataset_{data_set}_{window_size}.csv', index=False)


coswara


  0%|          | 0/24712 [00:00<?, ?it/s]

coughvid


  0%|          | 0/34434 [00:00<?, ?it/s]

esc50


  0%|          | 0/2000 [00:00<?, ?it/s]

fsdkaggle


  0%|          | 0/11073 [00:00<?, ?it/s]

virufy


  0%|          | 0/121 [00:00<?, ?it/s]

In [5]:
df_results

Unnamed: 0,dataset,filepath,filename,filepathslice,filenameslice,age,gender,label,status
0,virufy,Dataset/virufy-data/clinical/segmented/neg/neg...,neg-0421-083-cough-m-53-1.wav,Dataset/Sliced_Wav/virufy_1/neg-0421-083-cough...,neg-0421-083-cough-m-53-1.wav_0.wav,53,male,1,negative
1,virufy,Dataset/virufy-data/clinical/segmented/neg/neg...,neg-0421-083-cough-m-53-1.wav,Dataset/Sliced_Wav/virufy_1/neg-0421-083-cough...,neg-0421-083-cough-m-53-1.wav_1.wav,53,male,1,negative
2,virufy,Dataset/virufy-data/clinical/segmented/neg/neg...,neg-0421-083-cough-m-53-17.wav,Dataset/Sliced_Wav/virufy_1/neg-0421-083-cough...,neg-0421-083-cough-m-53-17.wav_0.wav,53,male,1,negative
3,virufy,Dataset/virufy-data/clinical/segmented/neg/neg...,neg-0421-083-cough-m-53-17.wav,Dataset/Sliced_Wav/virufy_1/neg-0421-083-cough...,neg-0421-083-cough-m-53-17.wav_1.wav,53,male,1,negative
4,virufy,Dataset/virufy-data/clinical/segmented/neg/neg...,neg-0421-083-cough-m-53-13.wav,Dataset/Sliced_Wav/virufy_1/neg-0421-083-cough...,neg-0421-083-cough-m-53-13.wav_0.wav,53,male,1,negative
...,...,...,...,...,...,...,...,...,...
237,virufy,Dataset/virufy-data/clinical/segmented/neg/neg...,neg-0422-098-cough-f-24-1.wav,Dataset/Sliced_Wav/virufy_1/neg-0422-098-cough...,neg-0422-098-cough-f-24-1.wav_1.wav,24,female,1,negative
238,virufy,Dataset/virufy-data/clinical/segmented/neg/neg...,neg-0422-098-cough-f-24-0.wav,Dataset/Sliced_Wav/virufy_1/neg-0422-098-cough...,neg-0422-098-cough-f-24-0.wav_0.wav,24,female,1,negative
239,virufy,Dataset/virufy-data/clinical/segmented/neg/neg...,neg-0422-098-cough-f-24-0.wav,Dataset/Sliced_Wav/virufy_1/neg-0422-098-cough...,neg-0422-098-cough-f-24-0.wav_1.wav,24,female,1,negative
240,virufy,Dataset/virufy-data/clinical/segmented/neg/neg...,neg-0422-098-cough-f-24-5.wav,Dataset/Sliced_Wav/virufy_1/neg-0422-098-cough...,neg-0422-098-cough-f-24-5.wav_0.wav,24,female,1,negative
