In [1]:
import os
import random
import librosa
import numpy as np
import pandas as pd

from tqdm import tqdm

In [2]:
HOP_LENGTH = 512        # number of samples between successive frames
WINDOW_LENGTH = 512     # length of the window in samples
N_MEL = 128             # number of Mel bands to generate


def compute_melspectrogram_with_fixed_length(audio, sampling_rate, num_of_samples=128):
    try:
        # compute a mel-scaled spectrogram
        melspectrogram = librosa.feature.melspectrogram(y=audio, 
                                                        sr=sampling_rate, 
                                                        hop_length=HOP_LENGTH,
                                                        win_length=WINDOW_LENGTH, 
                                                        n_mels=N_MEL)

        # convert a power spectrogram to decibel units (log-mel spectrogram)
        melspectrogram_db = librosa.power_to_db(melspectrogram, ref=np.max)
        
        melspectrogram_length = melspectrogram_db.shape[1]
        
        # pad or fix the length of spectrogram 
        if melspectrogram_length != num_of_samples:
            melspectrogram_db = librosa.util.fix_length(melspectrogram_db, 
                                                        size=num_of_samples, 
                                                        axis=1, 
                                                        constant_values=(0, -80.0))
    except Exception as e:
        print("\nError encountered while parsing files\n>>", e)
        return None 
    
    return melspectrogram_db

# DCASE2024

In [25]:
root_path = "F:/DATAS/DCASE2024Task2ASD/"
mts = ["bearing", "fan", "gearbox", "slider", "ToyCar", "ToyTrain", "valve"]
l2m = {0:"bearing", 1:"fan", 2:"gearbox", 3:"slider", 4:"ToyCar", 5:"ToyTrain", 6:"valve"}
m2l = {"bearing":0, "fan":1, "gearbox":2, "slider":3, "ToyCar":4, "ToyTrain":5, "valve":6}
subpath = [root_path+f"dev_{mt}/{mt}/train/" for mt in mts]
with open("./dcase2024cls.csv", 'w') as fout:
    fout.write(",slice_file_name,fold,classID\n")
    idx = 0
    for j,filepath in enumerate(subpath):
        for item in os.listdir(filepath):
            fout.write(f"{idx},{item},{np.random.randint(1, 11)},{j}\n")
            idx+=1
print("end")

end


In [26]:
# load the csv metadata file into a Pandas DataFrame structure
DCASE2024_METADATA_PATH = "./dcase2024cls.csv"
dcase2024_metadata_df = pd.read_csv(DCASE2024_METADATA_PATH,
                               usecols=["slice_file_name", "fold", "classID"],
                               dtype={"fold": "str", "classID" : "uint8"})

dcase2024_metadata_df

Unnamed: 0,slice_file_name,fold,classID
0,section_00_source_train_normal_0001_pro_A_vel_...,1,0
1,section_00_source_train_normal_0002_pro_A_vel_...,1,0
2,section_00_source_train_normal_0003_pro_A_vel_...,5,0
3,section_00_source_train_normal_0004_pro_A_vel_...,7,0
4,section_00_source_train_normal_0005_pro_A_vel_...,9,0
...,...,...,...
6995,section_00_target_train_normal_0006_v1pat_B_v2...,5,6
6996,section_00_target_train_normal_0007_v1pat_B_v2...,6,6
6997,section_00_target_train_normal_0008_v1pat_B_v2...,8,6
6998,section_00_target_train_normal_0009_v1pat_A_v2...,7,6


# COUGHVID

In [27]:
metadata_path = "C:/Program Files (zk)/PythonFiles/AClassification/SoundDL-CoughVID/datasets/waveinfo.csv"
metadata = pd.read_csv(metadata_path, delimiter=',', header=0, index_col=0)
metadata = metadata.where(metadata["cough_detected"]>0.65).where(metadata["duration"] < 13).where(metadata["duration"] > 0.86).dropna(axis=0)
# metadata = metadata.iloc[:, [0, 2, 9, 10]]

# datadf1.where(datadf1["cough_detected"]>0.65).where(datadf1["duration"] < 13).where(datadf1["duration"] > 0.86).dropna(axis=0)
print(metadata.groupby("status")["filename"].count())
# metadata

status
0.0    6062
1.0     551
2.0     322
Name: filename, dtype: int64


In [4]:
root_path = "C:/Program Files (zk)/PythonFiles/AClassification/SoundDL-CoughVID/datasets/"
df = pd.read_csv(root_path+"waveinfo_labedfine_forcls.csv", delimiter=',', header=0, index_col=0)
print(df.groupby("status_full")["filename"].count())
cough_metadata_df = df.iloc[:, [0, 6]]
# df = pd.read_csv(root_path+"waveinfo_labedfine_staaSSL.csv", delimiter=',', header=0, index_col=0)
# df.groupby("status_full")["filename"].count()
# iterate through all dataset examples and compute log-mel spectrograms
foldcol = []
N = len(cough_metadata_df)
fold_per = N // 10
for i in range(1, 10):
    foldcol.extend([i]*fold_per)
foldcol.extend([10]*(N-len(foldcol)))

random.shuffle(foldcol)
cough_metadata_df["fold"] = foldcol
cough_metadata_df.reset_index(drop=True)
cough_metadata_df = cough_metadata_df.iloc[:, [0, 2, 1]]
cough_metadata_df.rename(columns={"status_full": "classID"}, inplace=True)
cough_metadata_df

status_full
0    2114
1    3288
2     939
Name: filename, dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cough_metadata_df["fold"] = foldcol


Unnamed: 0,filename,fold,classID
15871,F:/DATAS/COUGHVID-public_dataset_v3/coughvid_2...,3,0
321,F:/DATAS/COUGHVID-public_dataset_v3/coughvid_2...,8,0
660,F:/DATAS/COUGHVID-public_dataset_v3/coughvid_2...,10,0
16744,F:/DATAS/COUGHVID-public_dataset_v3/coughvid_2...,1,0
2249,F:/DATAS/COUGHVID-public_dataset_v3/coughvid_2...,2,0
...,...,...,...
17850,F:/DATAS/COUGHVID-public_dataset_v3/coughvid_2...,8,2
17890,F:/DATAS/COUGHVID-public_dataset_v3/coughvid_2...,8,2
17897,F:/DATAS/COUGHVID-public_dataset_v3/coughvid_2...,8,2
17920,F:/DATAS/COUGHVID-public_dataset_v3/coughvid_2...,10,2


In [5]:
import sys
sys.path.append(r'C:/Program Files (zk)/PythonFiles/AClassification/AudioClassification-Pytorch-KZhao/')
from ackit.data_utils.audio import AudioSegment

In [7]:
SOUND_DURATION = 2.95   # fixed duration of an audio excerpt in seconds
features = []
root_path = "F:/DATAS/COUGHVID-public_dataset_v3/"


# iterate through all dataset examples and compute log-mel spectrograms
for index, row in tqdm(cough_metadata_df.iterrows(), total=len(cough_metadata_df)):
    file_path = row["filename"]
    sample_rate = 22050
    
    audioseg = AudioSegment.from_file(file_path)
    audioseg.vad()
    audioseg.resample(target_sample_rate=sample_rate)
    audioseg.crop(duration=2.95, mode="eval")
    
    # audio, sample_rate = librosa.load(file_path, duration=SOUND_DURATION, res_type='kaiser_fast')
    melspectrogram = compute_melspectrogram_with_fixed_length(audioseg.samples, sample_rate)
    label = row["classID"]
    fold = row["fold"]
    
    features.append([melspectrogram, label, fold])

# convert into a Pandas DataFrame 
us8k_df = pd.DataFrame(features, columns=["melspectrogram", "label", "fold"])
us8k_df.to_pickle(root_path+"coughvid_df.pkl")

  samples, sample_rate = librosa.core.load(file)  # , dtype='float32')
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
  samples, sample_rate = librosa.core.load(file)  # , dtype='float32')
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
  samples, sample_rate = librosa.core.load(file)  # , dtype='float32')
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 6341/6341 [11:42<00:00,  9.02it/s]


In [None]:
import pandas as pd
data_df = pd.read_pickle("F:/DATAS/COUGHVID-public_dataset_v3/coughvid_df.pkl")

In [10]:
data_df

Unnamed: 0,melspectrogram,label,fold
0,"[[-52.615319624048965, -23.322014074735876, -2...",0,3
1,"[[-80.0, -80.0, -44.78798640958335, -40.054080...",0,8
2,"[[-62.32242690271187, -48.21961690240145, -58....",0,10
3,"[[-49.213589557069795, -59.35612308674955, -49...",0,1
4,"[[-80.0, -54.18822816762162, -26.4072946875931...",0,2
...,...,...,...
6336,"[[-80.0, -58.633532093013265, -42.125659267305...",2,8
6337,"[[-80.0, -63.66613437373536, -27.8700516191618...",2,8
6338,"[[-80.0, -67.6875122024679, -40.49516677310231...",2,8
6339,"[[-73.49425746511902, -52.76433224441048, -41....",2,10


In [14]:
for i in range(1, 11):
    print(data_df[data_df["fold"]==i].groupby("label")["fold"].count())

label
0    198
1    352
2     84
Name: fold, dtype: int64
label
0    211
1    328
2     95
Name: fold, dtype: int64
label
0    201
1    342
2     91
Name: fold, dtype: int64
label
0    234
1    303
2     97
Name: fold, dtype: int64
label
0    222
1    317
2     95
Name: fold, dtype: int64
label
0    213
1    319
2    102
Name: fold, dtype: int64
label
0    209
1    322
2    103
Name: fold, dtype: int64
label
0    224
1    317
2     93
Name: fold, dtype: int64
label
0    191
1    352
2     91
Name: fold, dtype: int64
label
0    211
1    336
2     88
Name: fold, dtype: int64


# DCASE2020

In [30]:
SOUND_DURATION = 2.95   # fixed duration of an audio excerpt in seconds

features = []

root_path = "F:/DATAS/DCASE2024Task2ASD/"


# iterate through all dataset examples and compute log-mel spectrograms
for index, row in tqdm(dcase2024_metadata_df.iterrows(), total=len(dcase2024_metadata_df)):
    subpath = f"/dev_{l2m[row['classID']]}/{l2m[row['classID']]}/train"
    file_path = f"{root_path}"+subpath+"/{row["slice_file_name"]}"
    audio, sample_rate = librosa.load(file_path, duration=SOUND_DURATION, res_type='kaiser_fast')
    
    melspectrogram = compute_melspectrogram_with_fixed_length(audio, sample_rate)
    label = row["classID"]
    fold = row["fold"]
    
    features.append([melspectrogram, label, fold])

# convert into a Pandas DataFrame 
us8k_df = pd.DataFrame(features, columns=["melspectrogram", "label", "fold"])
us8k_df.to_pickle(root_path+"us8k_df.pkl")

100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 7000/7000 [01:35<00:00, 72.92it/s]
