In [15]:
"""
Functions to preprocess the .mp3 files into mel-spectrograms.
"""

# At the top of src/preprocess.py

import pandas as pd
import numpy as np
import librosa
import os
import re  # For parsing the multi_hot_label strings
import logging
from tqdm import tqdm # For progress bar: pip install tqdm

import sys

cwd = os.getcwd()
PROJECT_ROOT = os.path.abspath(os.path.join(cwd, '../..')) # NOTE: remember to change if change the directory structure

# Add project root to Python's module search path
sys.path.append(PROJECT_ROOT)
import src.utils as utils  
from src.utils import get_audio_path as get_audio_path 
import config # NOTE: need to check config to make sure all paths and parameters are correct


# --- Setup Logging ---
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# --- Define Helper Function (or import from utils.py) ---
def get_audio_path(audio_dir, track_id):
    """Constructs the path to an FMA audio file."""
    tid_str = '{:06d}'.format(track_id)
    return os.path.join(audio_dir, tid_str[:3], tid_str + '.mp3')

def parse_numpy_array_string(array_str):
    """
    Parse strings like '[np.float32(1.0), np.float32(0.0), ...]' into a list of integers.
    This is needed because ast.literal_eval cannot handle 'np.float32()' in the string.
    """
    if not isinstance(array_str, str):
        return []
    
    try:
        # Extract all the float values using regular expressions
        float_matches = re.findall(r'np\.float32\((\d+\.\d+)\)', array_str)
        
        # Convert matches to integers (1.0 -> 1, 0.0 -> 0)
        values = []
        for match in float_matches:
            value = float(match)
            # Convert to integer if it's 0.0 or 1.0
            if value == 1.0:
                values.append(1)
            elif value == 0.0:
                values.append(0)
            else:
                values.append(value)  # Keep as float if not 0 or 1
                
        return values
    except Exception as e:
        logging.warning(f"Error parsing array string: {e}")
        return []

def extract_mel_spectrogram(audio_path, params):
    """Loads audio, computes Log-Mel Spectrogram using parameters from config."""
    try:
        # 1. Pull all preprocessing settings out of the params dict
        sr                  = params["sample_rate"]              
        duration            = params["segment_duration_seconds"] 
        samples_per_segment = params["samples_per_segment"]      
        n_fft               = params["n_fft"]                    
        hop_length          = params["hop_length"]               
        n_mels              = params["n_mels"]                   

        # 2. Load the audio file
        #    - resamples to `sr`,  
        #    - only reads up to `duration` seconds
        y, loaded_sr = librosa.load(
            audio_path,
            sr=sr,
            duration=duration
        )

        # 3. Pad if shorter than expected duration (librosa pads by default, but explicit is safer)
        if len(y) < samples_per_segment:
            # 3a. If too short, pad the end with zeros
            y = np.pad(y, (0, samples_per_segment - len(y)))
        elif len(y) > samples_per_segment:
            # 3b. If too long (rare, but just in case), truncate
            y = y[:samples_per_segment]

        # 4. Compute the Mel spectrogram
        melspectrogram = librosa.feature.melspectrogram(
            y=y,
            sr=sr,
            n_fft=n_fft,
            hop_length=hop_length,
            n_mels=n_mels
        )

        # 5. Convert power to decibel (log scale), normalizing by the max power
        log_melspectrogram = librosa.power_to_db(
            melspectrogram,
            ref=np.max
        )

        # 6. Return the final 2D array (shape: [n_mels, time_frames])
        return log_melspectrogram

    except Exception as e:
        # 7. If anything fails (file missing, decode error, etc.), log a warning...
        logging.warning(
            f"Could not process file {os.path.basename(audio_path)}: {e}"
        )
        # 8. ...and return None so downstream code can skip it gracefully
        return None

# Load metadata and parse the multi_hot_label column
metadata_path = os.path.join(config.PATHS['PROCESSED_DATA_DIR'], 'small_subset_multihot.csv')
logging.info(f"Loading metadata from {metadata_path}")

try:
    # Load the CSV file
    metadata_df = pd.read_csv(metadata_path, index_col='track_id')
    print("CSV loaded successfully.")
    print(f"DataFrame shape: {metadata_df.shape}")
    print(f"Columns: {metadata_df.columns.tolist()}")
    
    # Apply the parser to the multi_hot_label column
    if 'multi_hot_label' in metadata_df.columns:
        print("\nParsing multi_hot_label column...")
        metadata_df['multi_hot_label'] = metadata_df['multi_hot_label'].apply(parse_numpy_array_string)
        
        # Show the first few parsed values
        print("\nFirst 3 parsed values:")
        for i, val in enumerate(metadata_df['multi_hot_label'].head(3)):
            print(f"{i+1}: {val} (type: {type(val)})")
        
        # Verify the parsing worked correctly
        total_labels = sum(len(labels) for labels in metadata_df['multi_hot_label'])
        print(f"\nTotal number of labels across all tracks: {total_labels}")
        print(f"Average labels per track: {total_labels / len(metadata_df):.2f}")
    
    print("\nMetadata processing completed successfully.")
    
except Exception as e:
    logging.error(f"Failed to load or parse metadata CSV '{metadata_path}': {e}", exc_info=True)
    print(f"ERROR: Failed to load or parse {metadata_path}")

CSV loaded successfully.
DataFrame shape: (8000, 3)
Columns: ['audio_path', 'multi_hot_label', 'split']

Parsing multi_hot_label column...

First 3 parsed values:
1: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] (type: <class 'list'>)
2: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] (type: <class 'list'>)
3: [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] (type: <class 'list'>)

Total number of labels across all tracks: 176000
Average labels per track: 22.00

Metadata processing completed successfully.


In [8]:
os.getcwd()
PROJECT_ROOT = os.path.abspath(os.path.join(cwd, '../..'))
print(PROJECT_ROOT)

/home/zhuoyuan/CSprojects/musicClaGen


In [10]:
print(config.FMA_FEATURES_DIR)

/home/zhuoyuan/CSprojects/musicClaGen/data/processed/fma_features


In [16]:
# Cell 2: Choose a Track ID and Extract its Info

# --- !!! CHOOSE A TRACK ID TO TEST !!! ---
# Pick an ID known to be in your small_subset_multihot.csv (e.g., 2, 5, 10, 140, 141)
test_track_id = 141
# --- !!! --- !!! --- !!! --- !!! --- !!! ---

if test_track_id not in metadata_df.index:
    print(f"ERROR: Test track ID {test_track_id} not found in the loaded metadata.")
else:
    # Get the row for the test track
    test_row = metadata_df.loc[test_track_id]

    # Extract the necessary info
    audio_path = test_row['audio_path']
    multi_hot_label = test_row['multi_hot_label'] # Now properly parsed as a list of integers
    split = test_row['split']

    print(f"--- Processing Test Track ID: {test_track_id} ---")
    print(f"Audio Path: {audio_path}")
    print(f"Label Vector: {multi_hot_label}")
    print(f"Split: {split}")

    # Verify the audio file exists before proceeding
    if not os.path.exists(audio_path):
        print(f"\nERROR: Audio file does not exist at path: {audio_path}")
        # Stop or handle error
    else:
        print(f"\nAudio file found. Proceeding with feature extraction...")

        # --- Create output directory if needed ---
        features_output_dir = config.PATHS['FMA_FEATURES_DIR']
        os.makedirs(features_output_dir, exist_ok=True)
        print(f"Ensured features directory exists: {features_output_dir}")

        # --- Call the extraction function ---
        log_melspec = extract_mel_spectrogram(audio_path, config.PREPROCESSING_PARAMS)

        # --- Check result, save feature, and show manifest entry ---
        if log_melspec is not None:
            print(f"\nSuccessfully extracted Mel Spectrogram. Shape: {log_melspec.shape}")

            # Define paths for saving
            feature_filename = f"{test_track_id}.npy"
            absolute_feature_path = os.path.join(features_output_dir, feature_filename)
            # Use os.path.relpath to make path relative to PROJECT_ROOT for manifest
            relative_feature_path = os.path.join(os.path.relpath(features_output_dir, config.PROJECT_ROOT), feature_filename)
            # Replace backslashes on Windows if necessary for consistency
            relative_feature_path = relative_feature_path.replace('\\', '/')


            # Save the feature array
            try:
                np.save(absolute_feature_path, log_melspec)
                print(f"\nSaved feature array to: {absolute_feature_path}")

                # Prepare the manifest entry dictionary (what would go in the final CSV)
                manifest_entry = {
                    'track_id': test_track_id,
                    'feature_path': relative_feature_path,
                    'label_vector': multi_hot_label,
                    'split': split
                }

                print("\nManifest Entry for this track:")
                print(manifest_entry)

                # Optional: Load the saved file back to verify
                loaded_spec = np.load(absolute_feature_path)
                print(f"\nVerification: Loaded saved spectrogram. Shape: {loaded_spec.shape}")
                assert np.array_equal(log_melspec, loaded_spec)
                print("Verification successful.")

            except Exception as e:
                print(f"\nERROR: Failed to save feature for track {test_track_id}: {e}")
        else:
            print(f"\nFeature extraction failed for track {test_track_id}.")

--- Processing Test Track ID: 141 ---
Audio Path: /home/zhuoyuan/CSprojects/musicClaGen/data/raw/fma_audio/fma_small/000/000141.mp3
Label Vector: [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Split: training

Audio file found. Proceeding with feature extraction...
Ensured features directory exists: /home/zhuoyuan/CSprojects/musicClaGen/data/processed/fma_features/fma_small

Successfully extracted Mel Spectrogram. Shape: (128, 1876)

Saved feature array to: /home/zhuoyuan/CSprojects/musicClaGen/data/processed/fma_features/fma_small/141.npy

Manifest Entry for this track:
{'track_id': 141, 'feature_path': 'data/processed/fma_features/fma_small/141.npy', 'label_vector': [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'split': 'training'}

Verification: Loaded saved spectrogram. Shape: (128, 1876)
Verification successful.
