In [2]:
import pandas as pd
import os

In [None]:
# Paths to processed audio folders
dataset_path = "../data/proccessed/processed_dataset"
glass_brake = f"{dataset_path}/glass_break"
gunshot = f"{dataset_path}/gunshot"
scream = f"{dataset_path}/scream"
neutral = f"{dataset_path}/neutral"

annotations_path = "../data/annotations/annotations.csv"
os.makedirs(annotations_path, exist_ok=True)

In [4]:
def create_wav_dataframe(folder_path):
    """
    Creates a DataFrame with labels (folder name) and file paths (.wav files) for a given folder.

    Args:
        folder_path (str): Path to the folder containing .wav files.

    Returns:
        pd.DataFrame: A DataFrame with 'label' and 'path' columns.
    """
    if not os.path.isdir(folder_path):
        raise ValueError(f"Provided path '{folder_path}' is not a valid directory.")
    
    # Extract the folder name as the label
    label = os.path.basename(folder_path.rstrip(os.sep)) 
    # Collect all .wav file paths
    wav_files = [
        os.path.join(folder_path, file) 
        for file in os.listdir(folder_path) 
        if file.lower().endswith('.wav')
    ]
    
    # Create the DataFrame
    data = {
        "label": [label] * len(wav_files),
        "path": wav_files
    }
    return pd.DataFrame(data)

In [5]:
glass_df = create_wav_dataframe(glass_brake)
glass_df

Unnamed: 0,label,path
0,glass_break,../data/proccessed/processed_dataset/glass_bre...
1,glass_break,../data/proccessed/processed_dataset/glass_bre...
2,glass_break,../data/proccessed/processed_dataset/glass_bre...
3,glass_break,../data/proccessed/processed_dataset/glass_bre...
4,glass_break,../data/proccessed/processed_dataset/glass_bre...
...,...,...
264,glass_break,../data/proccessed/processed_dataset/glass_bre...
265,glass_break,../data/proccessed/processed_dataset/glass_bre...
266,glass_break,../data/proccessed/processed_dataset/glass_bre...
267,glass_break,../data/proccessed/processed_dataset/glass_bre...


In [6]:
gunshot_df = create_wav_dataframe(gunshot)
gunshot_df

Unnamed: 0,label,path
0,gunshot,../data/proccessed/processed_dataset/gunshot\1...
1,gunshot,../data/proccessed/processed_dataset/gunshot\1...
2,gunshot,../data/proccessed/processed_dataset/gunshot\1...
3,gunshot,../data/proccessed/processed_dataset/gunshot\1...
4,gunshot,../data/proccessed/processed_dataset/gunshot\1...
...,...,...
1929,gunshot,../data/proccessed/processed_dataset/gunshot\8...
1930,gunshot,../data/proccessed/processed_dataset/gunshot\8...
1931,gunshot,../data/proccessed/processed_dataset/gunshot\8...
1932,gunshot,../data/proccessed/processed_dataset/gunshot\8...


In [7]:
scream_df = create_wav_dataframe(scream)
scream_df

Unnamed: 0,label,path
0,scream,../data/proccessed/processed_dataset/scream\0-...
1,scream,../data/proccessed/processed_dataset/scream\0-...
2,scream,../data/proccessed/processed_dataset/scream\0-...
3,scream,../data/proccessed/processed_dataset/scream\0-...
4,scream,../data/proccessed/processed_dataset/scream\0-...
...,...,...
2106,scream,../data/proccessed/processed_dataset/scream\ZQ...
2107,scream,../data/proccessed/processed_dataset/scream\ZQ...
2108,scream,../data/proccessed/processed_dataset/scream\ZQ...
2109,scream,../data/proccessed/processed_dataset/scream\ZQ...


In [8]:
neutral_df = create_wav_dataframe(neutral)
neutral_df

Unnamed: 0,label,path
0,neutral,../data/proccessed/processed_dataset/neutral\0...
1,neutral,../data/proccessed/processed_dataset/neutral\0...
2,neutral,../data/proccessed/processed_dataset/neutral\0...
3,neutral,../data/proccessed/processed_dataset/neutral\0...
4,neutral,../data/proccessed/processed_dataset/neutral\0...
...,...,...
3309,neutral,../data/proccessed/processed_dataset/neutral\Y...
3310,neutral,../data/proccessed/processed_dataset/neutral\Y...
3311,neutral,../data/proccessed/processed_dataset/neutral\Y...
3312,neutral,../data/proccessed/processed_dataset/neutral\Y...


In [9]:
#creating combined dataframe
data_path = pd.concat([glass_df, gunshot_df, scream_df, neutral_df], axis = 0)

In [10]:
# Function to check if files exist and remove rows with missing files
def check_files_exist_and_remove(data_path):
    # Create a list to store the indices of the missing files
    missing_files = []
    
    # Iterate over each row in the dataframe
    for index, row in data_path.iterrows():
        # Check if the file exists at the given path
        if not os.path.exists(row['path']):
            missing_files.append(index)
    
    # Remove the rows with missing files
    if missing_files:
        data_path = data_path.drop(missing_files)
        print(f"Removed {len(missing_files)} rows with missing files.")
    else:
        print("All files are present!")
    
    return data_path

# Assuming 'data_path' is your dataframe
data_path = check_files_exist_and_remove(data_path)

All files are present!


In [None]:
data_path.to_csv(annotations_path,index=False)
data_path

Unnamed: 0,label,path
0,glass_break,../data/proccessed/processed_dataset/glass_bre...
1,glass_break,../data/proccessed/processed_dataset/glass_bre...
2,glass_break,../data/proccessed/processed_dataset/glass_bre...
3,glass_break,../data/proccessed/processed_dataset/glass_bre...
4,glass_break,../data/proccessed/processed_dataset/glass_bre...
...,...,...
3309,neutral,../data/proccessed/processed_dataset/neutral\Y...
3310,neutral,../data/proccessed/processed_dataset/neutral\Y...
3311,neutral,../data/proccessed/processed_dataset/neutral\Y...
3312,neutral,../data/proccessed/processed_dataset/neutral\Y...
