In [1]:
import os
import pandas as pd

# Define categories and their corresponding IDs
categories = {
    'Cargo': {'class_id': 1, 'class_name': 'Cargo'},
    'Passenger': {'class_id': 2, 'class_name': 'Passenger'},
    'Tanker': {'class_id': 3, 'class_name': 'Tanker'},
    'Tug': {'class_id': 4, 'class_name': 'Tug'},
    # Add other categories here
}

# Define the root directory of your dataset
root_dir = 'D:/ocean-vue/Augmented Dataset/'

# Initialize empty lists to store metadata information
recording_ids = []
file_names = []
folder_ids = []
class_ids = []
class_names = []
file_paths = []

# Loop through categories and extract metadata
for category, category_info in categories.items():
    category_path = os.path.join(root_dir, category)
    class_id = category_info['class_id']
    class_name = category_info['class_name']
    
    for folder_name in os.listdir(category_path):
        folder_path = os.path.join(category_path, folder_name)
        
        for file_name in os.listdir(folder_path):
            if file_name.endswith('.wav'):
                recording_id = os.path.splitext(file_name)[0]  # Remove file extension
                recording_ids.append(recording_id)
                file_names.append(file_name)
                folder_ids.append(folder_name)  # Use the folder name as folder ID
                class_ids.append(class_id)
                class_names.append(class_name)
                file_paths.append(folder_path+ '\\' + file_name)

# Create a DataFrame from the collected metadata
metadata_df = pd.DataFrame({
    'Recording ID': recording_ids,
    'File_name': file_names,
    'Folder_ID': folder_ids,
    'Class_ID': class_ids,
    'Class': class_names,
    'File_path': file_paths
})

In [2]:
# Sort the DataFrame by Class and then Recording ID
metadata_df['Recording ID'] = metadata_df['Recording ID'].astype(int)
metadata_df.sort_values(by=['Class', 'Recording ID'], inplace=True)

In [3]:
metadata_df

Unnamed: 0,Recording ID,File_name,Folder_ID,Class_ID,Class,File_path
0,1,1.wav,1,1,Cargo,D:/ocean-vue/Augmented Dataset/Cargo\1\1.wav
50,2,2.wav,2,1,Cargo,D:/ocean-vue/Augmented Dataset/Cargo\2\2.wav
61,3,3.wav,3,1,Cargo,D:/ocean-vue/Augmented Dataset/Cargo\3\3.wav
72,4,4.wav,4,1,Cargo,D:/ocean-vue/Augmented Dataset/Cargo\4\4.wav
83,5,5.wav,5,1,Cargo,D:/ocean-vue/Augmented Dataset/Cargo\5\5.wav
...,...,...,...,...,...,...
453,134,134.wav,134,4,Tug,D:/ocean-vue/Augmented Dataset/Tug\134\134.wav
454,135,135.wav,135,4,Tug,D:/ocean-vue/Augmented Dataset/Tug\135\135.wav
455,136,136.wav,136,4,Tug,D:/ocean-vue/Augmented Dataset/Tug\136\136.wav
456,137,137.wav,137,4,Tug,D:/ocean-vue/Augmented Dataset/Tug\137\137.wav


In [4]:
# Save the DataFrame to a CSV file
metadata_df.to_csv('metadata.csv', index=False)