This file takes the original path of the files in the rtbc-begging database, which is divided into folders by individual, year, and file, and moves them directly to the rtbc-begging folder, renaming the files to: "specie_year_[n].wav" where [n] is an integer representing the song.

In [1]:
from pathlib import Path
import shutil

# Get the current working directory
cwd = Path.cwd()

# Move two levels up to reach the root of the project
project_root = cwd.parents[1]

# Build the path to the base folder and search folder
base_folder = project_root / 'Original_datasets' / 'rtbc-begging'
search_folder = base_folder / 'begging'

# Traverse all subdirectories and find .wav files
for full_path in search_folder.rglob('*.wav'):
    try:
        path_parts = full_path.parts
        specie = path_parts[-3]   # E.g.: '32PC1'
        year = path_parts[-2]     # E.g.: '2021-22'
        filename = full_path.name # E.g.: '3.wav'

        base_name = f"{specie}_{year}_{full_path.stem}"
        extension = full_path.suffix

        # Initial destination path
        new_filename = f"{base_name}{extension}"
        new_path = base_folder / new_filename

        if new_path.exists():
            print(f"Skipped (already exists): {new_path}")
        else:
            # Move the file
            #shutil.move(str(full_path), str(new_path))
            print(f"Moved: {full_path} → {new_path}") 


    except IndexError:
        print(f"Could not process: {full_path}")

In [2]:
import pandas as pd

Original_metada_folder = project_root / 'Original_metadata' / 'rtbc_metadata'
file_path = Original_metada_folder /  'metadata.csv'

# Load the new CSV file
df = pd.read_csv(file_path)
print(df['file_name'].head())

# Modify the 'file_name' column to change the format
# From: 32PC1/2022-23/57.wav  → To: 32PC1_2022-23_57.wav
df['file_name'] = df['file_name'].apply(lambda x: x.replace('/', '_'))
print(df['file_name'].head())


0    32PC1/2022-23/57.wav
1     32PC1/2022-23/0.wav
2    32PC1/2022-23/74.wav
3    32PC1/2022-23/20.wav
4    32PC1/2022-23/60.wav
Name: file_name, dtype: object
0    32PC1_2022-23_57.wav
1     32PC1_2022-23_0.wav
2    32PC1_2022-23_74.wav
3    32PC1_2022-23_20.wav
4    32PC1_2022-23_60.wav
Name: file_name, dtype: object


In [3]:
Output_metada_folder = project_root / 'Output_metadata' / 'rtbc_metadata'
file_path = Output_metada_folder /  'rtbc_metadata.csv'

df.to_csv(file_path, index=False)