In [1]:
# Cell 1

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, Subset
# Use AutoProcessor for Wav2Vec2-BERT - it bundles feature_extractor and tokenizer (if needed)
from transformers import AutoModelForAudioClassification, AutoProcessor

from torch.optim import AdamW
import pandas as pd
import numpy as np
import os
import sys
import ast # For parsing string representations of lists/arrays
import logging
import time
from sklearn.metrics import hamming_loss, jaccard_score, f1_score # Add more as needed
from tqdm.notebook import tqdm # Use notebook version of tqdm
import librosa # Needed for loading raw audio now



# --- Project Setup ---
# Detect if running in notebook or script to adjust path

cwd = os.getcwd()
PROJECT_ROOT = os.path.abspath(os.path.join(cwd, '../../')) # NOTE: remember to change if change the directory structure



print(f"PROJECT_ROOT detected as: {PROJECT_ROOT}")
if PROJECT_ROOT not in sys.path:
    print(f"Adding {PROJECT_ROOT} to sys.path")
    sys.path.append(PROJECT_ROOT)

# --- Config and Utils ---
try:
    import config # Import your configuration file
    # Optionally import utils if needed, e.g., for get_audio_path if not defined here
    # import src.utils as utils
except ModuleNotFoundError:
     print("ERROR: Cannot import config or utils. Make sure PROJECT_ROOT is correct and src is importable.")
     # Or add src to path: sys.path.insert(0, os.path.join(PROJECT_ROOT, 'src'))
     # import config
     # import utils


# --- Setup Logging ---
for handler in logging.root.handlers[:]: logging.root.removeHandler(handler) # Clear previous
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s',
                    handlers=[logging.StreamHandler(sys.stdout)])

print("Imports and basic setup complete.")

  from .autonotebook import tqdm as notebook_tqdm


PROJECT_ROOT detected as: /workspace/musicClaGen
Adding /workspace/musicClaGen to sys.path
/workspace/musicClaGen
Imports and basic setup complete.


In [2]:
# Cell 2 

In [3]:
# Cell 2
# --- Load Config ---
# Ensure config.py has the correct paths in the PATHS dict
manifest_path = config.PATHS.get('SMALL_MULTILABEL_PATH', os.path.join(config.PATHS['PROCESSED_DATA_DIR'], 'small_subset_multihot.csv')) # Use .get for safety
genre_list_path = config.PATHS.get('GENRE_LIST_PATH', os.path.join(config.PATHS['PROCESSED_DATA_DIR'], 'unified_genres.txt'))
model_save_dir = config.PATHS['MODELS_DIR']

# Ensure config.py has MODEL_PARAMS dict with model_checkpoint
model_checkpoint = config.MODEL_PARAMS['model_checkpoint'] # e.g., "facebook/w2v-bert-2.0" - VERIFY!
learning_rate = config.MODEL_PARAMS['learning_rate']
batch_size = config.MODEL_PARAMS['batch_size'] # Use the small BS for notebook test
num_epochs_debug = 1 # <<<--- RUN ONLY 1 EPOCH FOR DEBUGGING ---<<<
weight_decay = config.MODEL_PARAMS['weight_decay']
gradient_accumulation_steps = config.MODEL_PARAMS['gradient_accumulation_steps']

# --- Load unified genre list ---
try:
    with open(genre_list_path, 'r') as f:
        unified_genres = [line.strip() for line in f if line.strip()]
    num_labels = len(unified_genres) # should be the number of labels defined in the unified_genres.txt file, in this case it should be 22.
    logging.info(f"Loaded {num_labels} unified genres from {genre_list_path}")
    if num_labels == 0: raise ValueError("Genre list is empty!")
except Exception as e:
    logging.error(f"Failed to load or process unified genre list: {e}", exc_info=True)
    raise SystemExit("Cannot proceed without genre list.")

# --- Setup Device ---
device = torch.device(config.DEVICE if torch.cuda.is_available() else "cpu")
logging.info(f"Using device: {device}")
if not torch.cuda.is_available() and config.DEVICE=="cuda":
     logging.warning("CUDA selected but not available, falling back to CPU.")

# --- Create Save Directory ---
os.makedirs(model_save_dir, exist_ok=True)

2025-05-04 04:27:10,712 - INFO - Loaded 22 unified genres from /workspace/musicClaGen/data/processed/unified_genres.txt
2025-05-04 04:27:10,715 - INFO - Using device: cuda


In [4]:
# # Cell 2: Load Config & Define Constants (Modified for CPU Debugging)

# import torch # Ensure torch is imported
# import os
# import logging
# import config # Your config file

# # --- Load Config ---
# # Ensure config.py has the correct paths in the PATHS dict
# # Use the key pointing to your manifest with raw audio paths
# manifest_path = config.PATHS.get('SMALL_MULTILABEL_PATH', os.path.join(config.PATHS['PROCESSED_DATA_DIR'], 'small_subset_multihot.csv'))
# genre_list_path = config.PATHS.get('GENRE_LIST_PATH', os.path.join(config.PATHS['PROCESSED_DATA_DIR'], 'unified_genres.txt'))
# model_save_dir = config.PATHS['MODELS_DIR']

# # Ensure config.py has MODEL_PARAMS dict with model_checkpoint
# model_checkpoint = config.MODEL_PARAMS['model_checkpoint'] # e.g., "facebook/w2v-bert-2.0"
# learning_rate = config.MODEL_PARAMS['learning_rate']
# batch_size = config.MODEL_PARAMS['batch_size']
# num_epochs_debug = 1 # Keep as 1 for debug run
# weight_decay = config.MODEL_PARAMS['weight_decay']
# gradient_accumulation_steps = config.MODEL_PARAMS['gradient_accumulation_steps']

# # --- Load unified genre list ---
# try:
#     with open(genre_list_path, 'r') as f:
#         unified_genres = [line.strip() for line in f if line.strip()]
#     num_labels = len(unified_genres)
#     logging.info(f"Loaded {num_labels} unified genres from {genre_list_path}")
#     if num_labels == 0: raise ValueError("Genre list is empty!")
# except Exception as e:
#     logging.error(f"Failed to load or process unified genre list: {e}", exc_info=True)
#     raise SystemExit("Cannot proceed without genre list.")

# # --- Setup Device (FORCED TO CPU FOR DEBUGGING) ---
# # Original line: device = torch.device(config.DEVICE if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu") # <<<--- TEMPORARY CHANGE FOR DEBUGGING CUDA ERROR
# logging.info(f"Using device: {device} (Forced CPU for debugging)")
# # --- End Temporary Change ---

# # --- Create Save Directory ---
# os.makedirs(model_save_dir, exist_ok=True)

# print("Cell 2 setup complete (Device forced to CPU for debugging).")

In [5]:
print(manifest_path)

/workspace/musicClaGen/data/processed/small_subset_multihot.csv


# Cell 3

In [6]:
# # Cell 3: Dataset Class Definition (Raw Audio Version) This cell uses the regex parser to parse the multi_hot_label string back into a list of integers.



# # Define(recollect)the regex parser from preprocess.py if needed, 
# # otherwise use ast.literal_eval--- 
# # NOTE: After changing usage.ipynb 05/03/2025, should fall back to ast.literal_eval now. Clean code later

# import re

# def parse_numpy_array_string(array_str):
#     """
#     Parse strings like '[np.float32(1.0), np.float32(0.0), ...]' into a list of integers.
#     This is needed because ast.literal_eval cannot handle 'np.float32()' in the string.
#     """
#     if not isinstance(array_str, str):
#         return []
    
#     try:
#         # Extract all the float values using regular expressions
#         float_matches = re.findall(r'np\.float32\((\d+\.\d+)\)', array_str)
        
#         # Convert matches to integers (1.0 -> 1, 0.0 -> 0)
#         values = []
#         for match in float_matches:
#             value = float(match)
#             # Convert to integer if it's 0.0 or 1.0
#             if value == 1.0:
#                 values.append(1)
#             elif value == 0.0:
#                 values.append(0)
#             else:
#                 values.append(value)  # Keep as float if not 0 or 1
                
#         return values
#     except Exception as e:
#         logging.warning(f"Error parsing array string: {e}")
#         return []

# class FMARawAudioDataset(Dataset):
#     """
#     Loads raw audio waveforms and labels from manifest, uses Hugging Face
#     feature extractor (like ASTFeatureExtractor or Wav2Vec2Processor) on the fly.
#     """
#     def __init__(self, manifest_path, feature_extractor):
#         """
#         Args:
#             manifest_path (str): Path to the final manifest CSV file.
#             feature_extractor: Initialized Hugging Face AutoFeatureExtractor or AutoProcessor.
#         """
#         logging.info(f"Initializing FMARawAudioDataset from: {manifest_path}")
#         if feature_extractor is None:
#              raise ValueError("FMARawAudioDataset requires a feature_extractor/processor instance.")

#         self.feature_extractor = feature_extractor
#         # Get target sampling rate directly from the extractor/processor
#         try:
#              # Works for Wav2Vec2Processor, ASTFeatureExtractor, etc.
#              self.target_sr = self.feature_extractor.sampling_rate
#              logging.info(f"Target sampling rate set from feature extractor: {self.target_sr} Hz")
#         except AttributeError:
#              logging.warning("Could not get sampling_rate from feature_extractor, using config.")
#              # Fallback to config if needed, but ensuring match is crucial
#              self.target_sr = config.PREPROCESSING_PARAMS['sample_rate']


#         logging.info(f"Loading manifest from: {manifest_path}")
#         try:
#             self.manifest = pd.read_csv(manifest_path)
#             # Ensure index is set if needed elsewhere, or use default range index
#             if 'track_id' in self.manifest.columns:
#                  self.manifest = self.manifest.set_index('track_id', drop=False)

#             # --- Parse the 'multi_hot_label' string back into a list ---
#             # Here: if we decide to use raw audio, we use regex parser; 
#             #       if we decide to use mel spectrogram, we use ast.literal_eval

#             # Choose the correct parser based on how labels were saved in the CSV
#             # If saved as '[1.0, 0.0,...]' use ast.literal_eval
#             # label_parser = ast.literal_eval
#             # If saved as '[np.float32(1.0)...]' uncomment and use regex parser
#             label_parser = parse_numpy_array_string

#             self.manifest['multi_hot_label'] = self.manifest['multi_hot_label'].apply(label_parser)
#             logging.info(f"Loaded and parsed manifest with {len(self.manifest)} entries.")
#             # Check the first parsed label
#             logging.info(f"Example parsed label (first entry): {self.manifest['multi_hot_label'].iloc[0]}")

#         except Exception as e:
#             logging.error(f"Error loading or parsing manifest {manifest_path}: {e}", exc_info=True)
#             raise

#     def __len__(self):
#         """Returns the total number of samples in the dataset."""
#         return len(self.manifest)

#     def __getitem__(self, idx):
#         """
#         Loads raw audio for index idx, processes it with the feature extractor,
#         and returns the processed inputs and labels.
#         """
#         if torch.is_tensor(idx): idx = idx.tolist() # Handle tensor indices

#         # Get the row data from the manifest
#         row = self.manifest.iloc[idx]
#         track_id = row.get('track_id', self.manifest.index[idx]) # Get track_id safely
#         label_vector = row['multi_hot_label'] # Already parsed list/array

#         # Construct absolute audio path if necessary
#         audio_path = row['audio_path']

#         #NOTE: originally, the mel-spectrogram's path is relative  but the raw audio's path is absolute, so we need to make sure the audio_path is absolute
#         # So we are check if the audio_path is absolute or relative in case we load the wrong data, if it's relative, we need to join it with the PROJECT_ROOT
#         if not os.path.isabs(audio_path):
#              # Assumes path in manifest is relative to PROJECT_ROOT
#              audio_path = os.path.join(config.PROJECT_ROOT, audio_path)

#         try:
#             # --- 1. Load RAW Audio Waveform ---
#             # Load full 30s clip at the TARGET sample rate required by the processor
#             waveform, loaded_sr = librosa.load(
#                 audio_path,
#                 sr=self.target_sr, # Use processor's sampling rate
#                 duration=30.0     # Load the full 30 seconds
#             )
#             # Ensure minimum length if needed (though duration should handle it)
#             min_samples = int(0.1 * self.target_sr) # Example: require at least 0.1s
#             if len(waveform) < min_samples:
#                  raise ValueError(f"Audio signal for track {track_id} too short after loading.")

#             # --- 2. Apply Feature Extractor ---
#             # Pass the raw waveform numpy array
#             # The extractor handles normalization, padding/truncation, tensor conversion
            
#             max_length = 5000

#             inputs = self.feature_extractor(
#                 waveform,
#                 sampling_rate=self.target_sr,
#                 return_tensors="pt",
#                 return_attention_mask=True # Request attention mask
#             )

#             # --- 3. Prepare Outputs ---
#             # Squeeze unnecessary batch dimension added by the extractor
#             # Key name ('input_values', 'input_features') depends on the specific extractor
#             feature_tensor = inputs.get('input_values', inputs.get('input_features'))
#             if feature_tensor is None:
#                 raise KeyError("Expected 'input_values' or 'input_features' key from feature_extractor output.")
#             feature_tensor = feature_tensor.squeeze(0) # Remove batch dim -> [Channels?, Freq?, Time] or [SeqLen, Dim]

#             attention_mask = inputs.get('attention_mask', None)
#             if attention_mask is not None:
#                  attention_mask = attention_mask.squeeze(0)

#             # Convert label list/array to float tensor for BCE loss
#             label_tensor = torch.tensor(label_vector, dtype=torch.float32)

#             # Return dictionary matching model's expected input names
#             model_input_dict = {"labels": label_tensor}
#             # Use the key the feature extractor provided
#             if 'input_values' in inputs:
#                  model_input_dict['input_values'] = feature_tensor
#             elif 'input_features' in inputs:
#                  model_input_dict['input_features'] = feature_tensor

#             if attention_mask is not None:
#                  model_input_dict['attention_mask'] = attention_mask

#             return model_input_dict

#         except FileNotFoundError:
#              logging.error(f"Audio file not found for track {track_id} at {audio_path}")
#              raise # Or implement skipping logic with collate_fn
#         except Exception as e:
#             logging.error(f"Error loading/processing track {track_id} at {audio_path}: {e}", exc_info=True)
#             raise # Or implement skipping logic


# print("FMARawAudioDataset class defined.")

In [7]:
# # # Cell 3: Dataset Class Definition (Raw Audio Version) This cell uses the ast.literal_eval parser to parse the multi_hot_label string back into a list of integers.


# import re
# import ast 



# # # Define(recollect)the regex parser from preprocess.py if needed,
# # # otherwise use ast.literal_eval---
# # # NOTE: After changing usage.ipynb 05/03/2025, should fall back to ast.literal_eval now. Clean code later
# # def parse_numpy_array_string(array_str):
# #     """
# #     Parse strings like '[np.float32(1.0), np.float32(0.0), ...]' into a list of integers.
# #     This is needed because ast.literal_eval cannot handle 'np.float32()' in the string.
# #     """
# #     if not isinstance(array_str, str):
# #         return []
# #     try:
# #         # Extract all the float values using regular expressions
# #         # Refined regex to handle numbers with or without decimal points
# #         float_matches = re.findall(r'np\.float32\(([\d\.]+)\)', array_str)
# #         # Convert matches to floats then maybe int (use float for BCE loss)
# #         values = []
# #         for match in float_matches:
# #             value = float(match)
# #             values.append(1.0 if value == 1.0 else 0.0) # Store as float 0.0 or 1.0
# #         return values
# #     except Exception as e:
# #         logging.warning(f"Error parsing array string: {e}")
# #         return []
# # --- End commented out parser ---


# class FMARawAudioDataset(Dataset):
#     """
#     Loads raw audio waveforms and labels from manifest, uses Hugging Face
#     feature extractor (like ASTFeatureExtractor or Wav2Vec2Processor/AutoFeatureExtractor) on the fly.
#     Assumes padding/truncation will be handled by a collate function.
#     """
#     def __init__(self, manifest_path, feature_extractor):
#         """
#         Args:
#             manifest_path (str): Path to the final manifest CSV file (e.g., small_subset_multihot.csv).
#             feature_extractor: Initialized Hugging Face AutoFeatureExtractor instance.
#         """
#         logging.info(f"Initializing FMARawAudioDataset from: {manifest_path}")
#         if feature_extractor is None:
#              raise ValueError("FMARawAudioDataset requires a feature_extractor instance.")

#         self.feature_extractor = feature_extractor
#         try:
#              self.target_sr = self.feature_extractor.sampling_rate
#              logging.info(f"Target sampling rate set from feature extractor: {self.target_sr} Hz")
#         except AttributeError:
#              logging.error("Could not get sampling_rate from feature_extractor.", exc_info=True)
#              raise

#         logging.info(f"Loading manifest from: {manifest_path}")
#         try:
#             self.manifest = pd.read_csv(manifest_path)
#             if 'track_id' in self.manifest.columns:
#                  self.manifest = self.manifest.set_index('track_id', drop=False)

#             # --- Parse the 'multi_hot_label' string back into a list ---
#             # Use ast.literal_eval assuming labels were saved as standard list strings '[1.0, 0.0,...]'
#             logging.info("Attempting to parse 'multi_hot_label' column using ast.literal_eval...")
#             label_parser = ast.literal_eval # <<<--- Using ast.literal_eval
#             # label_parser = parse_numpy_array_string # Keep commented out

#             # Ensure the column name matches your CSV ('multi_hot_label' based on your previous code)
#             label_col_name = 'multi_hot_label'
#             if label_col_name not in self.manifest.columns:
#                  raise KeyError(f"Column '{label_col_name}' not found in manifest CSV at {manifest_path}")

#             self.manifest[label_col_name] = self.manifest[label_col_name].apply(label_parser)

#             # Verification step
#             first_label = self.manifest[label_col_name].iloc[0]
#             if not isinstance(first_label, list):
#                  raise TypeError(f"Parsed label is not a list, check parser/CSV format. Got type: {type(first_label)}")
#             global num_labels # Make sure num_labels is defined/loaded in Cell 2
#             if len(first_label) != num_labels:
#                 logging.warning(f"Parsed label length ({len(first_label)}) does not match expected num_labels ({num_labels}). Check parsing or unified_genres.txt.")
#             logging.info(f"Example parsed label (type {type(first_label)}, length {len(first_label)}): {str(first_label)[:100]}...")

#             logging.info(f"Loaded and parsed manifest with {len(self.manifest)} entries.")

#         except FileNotFoundError:
#              logging.error(f"Manifest file not found: {manifest_path}", exc_info=True)
#              raise
#         except Exception as e:
#             logging.error(f"Error loading or parsing manifest {manifest_path}: {e}", exc_info=True)
#             raise

#     def __len__(self):
#         """Returns the total number of samples in the dataset."""
#         return len(self.manifest)

#     def __getitem__(self, idx):
#         """
#         Loads raw audio for index idx, processes it with the feature extractor,
#         and returns the processed inputs and labels.
#         """
#         if torch.is_tensor(idx): idx = idx.tolist() # Handle tensor indices

#         try:
#             # Get the row data from the manifest
#             row = self.manifest.iloc[idx]
#             track_id = row.get('track_id', self.manifest.index[idx]) # Get track_id safely
#             multi_hot_label = row['multi_hot_label'] # Use the correct column name
#             audio_path = row['audio_path']

#             # Construct absolute audio path if necessary
#             # NOTE: originally, the mel-spectrogram's path is relative  but the raw audio's path is absolute, so we need to make sure the audio_path is absolute
#             # So we are check if the audio_path is absolute or relative in case we load the wrong data, if it's relative, we need to join it with the PROJECT_ROOT
#             if not os.path.isabs(audio_path):
#                 # Assumes path in manifest is relative to PROJECT_ROOT defined in config
#                 audio_path = os.path.join(config.PROJECT_ROOT, audio_path)

#             # --- 1. Load RAW Audio Waveform ---
#             waveform, loaded_sr = librosa.load(
#                 audio_path,
#                 sr=self.target_sr, # Use extractor's sampling rate
#                 duration=30.0      # Load the full 30 seconds
#             )
#             min_samples = int(0.1 * self.target_sr)
#             if len(waveform) < min_samples:
#                  raise ValueError(f"Audio signal for track {track_id} too short after loading.")

#             # --- 2. Apply Feature Extractor ---
#             # Let the Data Collator handle padding/truncation later
#             inputs = self.feature_extractor(
#                 waveform,
#                 sampling_rate=self.target_sr,
#                 return_tensors="pt",
#                 # REMOVED padding/truncation args
#                 return_attention_mask=True # Keep requesting mask
#             )

#             # --- 3. Prepare Outputs ---
#             feature_tensor = inputs.get('input_values', inputs.get('input_features'))
#             if feature_tensor is None:
#                 raise KeyError(f"Expected 'input_values' or 'input_features' key from feature_extractor output for track {track_id}. Got keys: {inputs.keys()}")
#             feature_tensor = feature_tensor.squeeze(0)

#             attention_mask = inputs.get('attention_mask', None)
#             if attention_mask is not None:
#                 attention_mask = attention_mask.squeeze(0)

#             # Convert label list/array to float tensor
#             label_tensor = torch.tensor(multi_hot_label, dtype=torch.float32)

#             # Return dictionary
#             model_input_dict = {"labels": label_tensor}
#             input_key = 'input_values' if 'input_values' in inputs else 'input_features'
#             model_input_dict[input_key] = feature_tensor
#             if attention_mask is not None:
#                 model_input_dict['attention_mask'] = attention_mask

#             return model_input_dict

#         except FileNotFoundError:
#             logging.error(f"Audio file not found for track {track_id} at {audio_path}")
#             # Return None ONLY IF collate_fn handles it, otherwise raise
#             raise
#         except Exception as e:
#             logging.error(f"Error in __getitem__ for track {track_id}: {e}", exc_info=True)
#             # Return None ONLY IF collate_fn handles it, otherwise raise
#             raise

# print("FMARawAudioDataset class defined (using raw audio, feature extractor, ast.literal_eval for labels).")


In [8]:
# # # Cell 3: Dataset Class Definition (Raw Audio Version) This cell uses the ast.literal_eval parser to parse the multi_hot_label string back into a list of integers.


# import re
# import ast 



# # # Define(recollect)the regex parser from preprocess.py if needed,
# # # otherwise use ast.literal_eval---
# # # NOTE: After changing usage.ipynb 05/03/2025, should fall back to ast.literal_eval now. Clean code later
# # def parse_numpy_array_string(array_str):
# #     """
# #     Parse strings like '[np.float32(1.0), np.float32(0.0), ...]' into a list of integers.
# #     This is needed because ast.literal_eval cannot handle 'np.float32()' in the string.
# #     """
# #     if not isinstance(array_str, str):
# #         return []
# #     try:
# #         # Extract all the float values using regular expressions
# #         # Refined regex to handle numbers with or without decimal points
# #         float_matches = re.findall(r'np\.float32\(([\d\.]+)\)', array_str)
# #         # Convert matches to floats then maybe int (use float for BCE loss)
# #         values = []
# #         for match in float_matches:
# #             value = float(match)
# #             values.append(1.0 if value == 1.0 else 0.0) # Store as float 0.0 or 1.0
# #         return values
# #     except Exception as e:
# #         logging.warning(f"Error parsing array string: {e}")
# #         return []
# # --- End commented out parser ---


# class FMARawAudioDataset(Dataset):
#     """
#     Loads raw audio waveforms and labels from manifest, uses Hugging Face
#     feature extractor (like ASTFeatureExtractor or Wav2Vec2Processor/AutoFeatureExtractor) on the fly.
#     Assumes padding/truncation will be handled by a collate function.
#     """
#     def __init__(self, manifest_path, feature_extractor):
#         """
#         Args:
#             manifest_path (str): Path to the final manifest CSV file (e.g., small_subset_multihot.csv).
#             feature_extractor: Initialized Hugging Face AutoFeatureExtractor instance.
#         """
#         logging.info(f"Initializing FMARawAudioDataset from: {manifest_path}")
#         if feature_extractor is None:
#              raise ValueError("FMARawAudioDataset requires a feature_extractor instance.")

#         self.feature_extractor = feature_extractor
#         try:
#              self.target_sr = self.feature_extractor.sampling_rate
#              logging.info(f"Target sampling rate set from feature extractor: {self.target_sr} Hz")
#         except AttributeError:
#              logging.error("Could not get sampling_rate from feature_extractor.", exc_info=True)
#              raise

#         logging.info(f"Loading manifest from: {manifest_path}")
#         try:
#             self.manifest = pd.read_csv(manifest_path)
#             if 'track_id' in self.manifest.columns:
#                  self.manifest = self.manifest.set_index('track_id', drop=False)

#             # --- Parse the 'multi_hot_label' string back into a list ---
#             # Use ast.literal_eval assuming labels were saved as standard list strings '[1.0, 0.0,...]'
#             logging.info("Attempting to parse 'multi_hot_label' column using ast.literal_eval...")
#             label_parser = ast.literal_eval # <<<--- Using ast.literal_eval
#             # label_parser = parse_numpy_array_string # Keep commented out

#             # Ensure the column name matches your CSV ('multi_hot_label' based on your previous code)
#             label_col_name = 'multi_hot_label'
#             if label_col_name not in self.manifest.columns:
#                  raise KeyError(f"Column '{label_col_name}' not found in manifest CSV at {manifest_path}")

#             self.manifest[label_col_name] = self.manifest[label_col_name].apply(label_parser)

#             # Verification step
#             first_label = self.manifest[label_col_name].iloc[0]
#             if not isinstance(first_label, list):
#                  raise TypeError(f"Parsed label is not a list, check parser/CSV format. Got type: {type(first_label)}")
#             global num_labels # Make sure num_labels is defined/loaded in Cell 2
#             if len(first_label) != num_labels:
#                 logging.warning(f"Parsed label length ({len(first_label)}) does not match expected num_labels ({num_labels}). Check parsing or unified_genres.txt.")
#             logging.info(f"Example parsed label (type {type(first_label)}, length {len(first_label)}): {str(first_label)[:100]}...")

#             logging.info(f"Loaded and parsed manifest with {len(self.manifest)} entries.")

#         except FileNotFoundError:
#              logging.error(f"Manifest file not found: {manifest_path}", exc_info=True)
#              raise
#         except Exception as e:
#             logging.error(f"Error loading or parsing manifest {manifest_path}: {e}", exc_info=True)
#             raise

#     def __len__(self):
#         """Returns the total number of samples in the dataset."""
#         return len(self.manifest)

#     def __getitem__(self, idx):
#         """
#         Loads raw audio for index idx, processes it with the feature extractor,
#         and returns the processed inputs and labels.
#         """
#         if torch.is_tensor(idx): idx = idx.tolist() # Handle tensor indices

#         try:
#             # Get the row data from the manifest
#             row = self.manifest.loc[idx]
#             track_id = row.get('track_id', self.manifest.index[idx]) # Get track_id safely
#             multi_hot_label = row['multi_hot_label'] # Use the correct column name
#             audio_path = row['audio_path']

#             # Construct absolute audio path if necessary
#             # NOTE: originally, the mel-spectrogram's path is relative  but the raw audio's path is absolute, so we need to make sure the audio_path is absolute
#             # So we are check if the audio_path is absolute or relative in case we load the wrong data, if it's relative, we need to join it with the PROJECT_ROOT
#             if not os.path.isabs(audio_path):
#                 # Assumes path in manifest is relative to PROJECT_ROOT defined in config
#                 audio_path = os.path.join(config.PROJECT_ROOT, audio_path)

#             # --- 1. Load RAW Audio Waveform ---
#             waveform, loaded_sr = librosa.load(
#                 audio_path,
#                 sr=self.target_sr, # Use extractor's sampling rate
#                 duration=30.0      # Load the full 30 seconds
#             )
#             min_samples = int(0.1 * self.target_sr)
#             if len(waveform) < min_samples:
#                  raise ValueError(f"Audio signal for track {track_id} too short after loading.")

#             # --- 2. Apply Feature Extractor ---
#             # Let the Data Collator handle padding/truncation later
#             inputs = self.feature_extractor(
#                 waveform,
#                 sampling_rate=self.target_sr,
#                 return_tensors="pt",
#                 # REMOVED padding/truncation args
#                 return_attention_mask=True # Keep requesting mask
#             )

#             # --- 3. Prepare Outputs ---
#             feature_tensor = inputs.get('input_values', inputs.get('input_features'))
#             if feature_tensor is None:
#                 raise KeyError(f"Expected 'input_values' or 'input_features' key from feature_extractor output for track {track_id}. Got keys: {inputs.keys()}")
#             feature_tensor = feature_tensor.squeeze(0)

#             attention_mask = inputs.get('attention_mask', None)
#             if attention_mask is not None:
#                 attention_mask = attention_mask.squeeze(0)

#             # Convert label list/array to float tensor
#             label_tensor = torch.tensor(multi_hot_label, dtype=torch.float32)

#             # Return dictionary
#             model_input_dict = {"labels": label_tensor}
#             input_key = 'input_values' if 'input_values' in inputs else 'input_features'
#             model_input_dict[input_key] = feature_tensor
#             if attention_mask is not None:
#                 model_input_dict['attention_mask'] = attention_mask

#             return model_input_dict

#         except FileNotFoundError:
#             logging.error(f"Audio file not found for track {track_id} at {audio_path}")
#             # Return None ONLY IF collate_fn handles it, otherwise raise
#             raise
#         except Exception as e:
#             logging.error(f"Error in __getitem__ for track {track_id}: {e}", exc_info=True)
#             # Return None ONLY IF collate_fn handles it, otherwise raise
#             raise

# print("FMARawAudioDataset class defined (using raw audio, feature extractor, ast.literal_eval for labels).")


In [9]:
# Cell 3: Dataset Class Definition (Raw Audio Version - Corrected .loc access)

import torch
from torch.utils.data import Dataset
import pandas as pd
import numpy as np
import os
import ast # For parsing label string '[1.0, 0.0,...]'
import re  # Keep import for the commented out function below
import logging
import librosa
# Ensure config is imported from a previous cell or uncomment:
# import config

# --- Optional: Keep custom parser commented out for reference ---
# # Define(recollect)the regex parser from preprocess.py if needed,
# # otherwise use ast.literal_eval---
# # NOTE: After changing usage.ipynb 05/03/2025, should fall back to ast.literal_eval now. Clean code later
# def parse_numpy_array_string(array_str):
#     """
#     Parse strings like '[np.float32(1.0), np.float32(0.0), ...]' into a list of integers.
#     This is needed because ast.literal_eval cannot handle 'np.float32()' in the string.
#     """
#     if not isinstance(array_str, str): return []
#     try:
#         # Match digits, optionally followed by a decimal and more digits
#         float_matches = re.findall(r'np\.float32\(([\d\.]+)\)', array_str)
#         values = []
#         for match_str in float_matches:
#             value = float(match_str) # Convert string match to float
#             values.append(1.0 if value == 1.0 else 0.0) # Store as float 0.0 or 1.0
#         return values
#     except Exception as e:
#         logging.warning(f"Error parsing array string: {e}")
#         return []
# --- End commented out parser ---


class FMARawAudioDataset(Dataset):
    """
    Loads raw audio waveforms and labels from manifest, uses Hugging Face
    feature extractor (like ASTFeatureExtractor or Wav2Vec2Processor/AutoFeatureExtractor) on the fly.
    Assumes padding/truncation will be handled by a collate function.
    """
    def __init__(self, manifest_path, feature_extractor):
        """
        Args:
            manifest_path (str): Path to the final manifest CSV file (e.g., small_subset_multihot.csv).
            feature_extractor: Initialized Hugging Face AutoFeatureExtractor instance.
        """
        # Ensure num_labels is available globally or passed if needed for verification
        global num_labels
        if 'num_labels' not in globals():
             logging.error("Global variable 'num_labels' not found. Load it first (e.g., from Cell 2).")
             # Alternative: pass num_labels as an argument to __init__

        logging.info(f"Initializing FMARawAudioDataset from: {manifest_path}")
        if feature_extractor is None:
             raise ValueError("FMARawAudioDataset requires a feature_extractor instance.")

        self.feature_extractor = feature_extractor
        try:
             self.target_sr = self.feature_extractor.sampling_rate
             logging.info(f"Target sampling rate set from feature extractor: {self.target_sr} Hz")
        except AttributeError:
             logging.error("Could not get sampling_rate from feature_extractor.", exc_info=True)
             raise

        logging.info(f"Loading manifest from: {manifest_path}")
        try:
            self.manifest = pd.read_csv(manifest_path)
            # Set index to track_id AFTER loading, keep column too if needed elsewhere
            if 'track_id' in self.manifest.columns:
                 self.manifest = self.manifest.set_index('track_id', drop=False) # Keep column if row.get('track_id'...) is used
            else:
                 logging.warning("Manifest CSV does not contain 'track_id' column. Using DataFrame index.")
                 # Make sure index IS the track_id
                 if not pd.api.types.is_integer_dtype(self.manifest.index):
                      logging.warning("Manifest index is not integer type. Ensure it matches track IDs.")


            # --- Parse the 'multi_hot_label' string back into a list ---
            # NOTE: After changing usage.ipynb 05/03/2025, should fall back to ast.literal_eval now. Clean code later
            # Use ast.literal_eval assuming labels were saved as standard list strings '[1.0, 0.0,...]'
            logging.info("Attempting to parse 'multi_hot_label' column using ast.literal_eval...")
            label_parser = ast.literal_eval # <<<--- Using ast.literal_eval
            # label_parser = parse_numpy_array_string # Keep commented out as requested

            label_col_name = 'multi_hot_label'
            if label_col_name not in self.manifest.columns:
                 raise KeyError(f"Column '{label_col_name}' not found in manifest CSV at {manifest_path}")

            self.manifest[label_col_name] = self.manifest[label_col_name].apply(label_parser)

            # --- Verification step ---
            first_label = self.manifest[label_col_name].iloc[0] # Use iloc[0] here to get FIRST row for checking
            if not isinstance(first_label, list):
                 raise TypeError(f"Parsed label is not a list, check parser/CSV format. Got type: {type(first_label)}")
            # Check length against num_labels loaded in Cell 2
            if len(first_label) != num_labels:
                 logging.error(f"FATAL: Parsed label length ({len(first_label)}) does not match expected num_labels ({num_labels}). Check parsing or unified_genres.txt.")
                 raise ValueError("Parsed label length mismatch.")
            logging.info(f"Example parsed label verified (type {type(first_label)}, length {len(first_label)}): {str(first_label)[:100]}...")
            # --- End Verification ---

            logging.info(f"Loaded and parsed manifest with {len(self.manifest)} entries.")

        except FileNotFoundError:
             logging.error(f"Manifest file not found: {manifest_path}", exc_info=True)
             raise
        except Exception as e:
            logging.error(f"Error loading or parsing manifest {manifest_path}: {e}", exc_info=True)
            raise

    def __len__(self):
        """Returns the total number of samples in the dataset."""
        return len(self.manifest)

    def __getitem__(self, idx):
        """
        Loads raw audio for index 'idx' (which is the track_id/index label),
        processes it with the feature extractor,
        and returns the processed inputs and labels.
        """
        if torch.is_tensor(idx): idx = idx.tolist() # Handle tensor indices

        # --- Use idx directly as track_id BEFORE main try block ---
        track_id = idx
        # ---------------------------------------------------------

        try:
            # --- Get the row data using .loc with the track_id ---
            row = self.manifest.loc[track_id] # Use .loc with the index label (track_id)
            # ------------------------------------------------------

            # --- Get required data from the row ---
            multi_hot_label = row['multi_hot_label']
            audio_path = row['audio_path']
            # ---------------------------------------

            # Construct absolute audio path if necessary (keep your NOTE)
            # NOTE: originally, the mel-spectrogram's path is relative  but the raw audio's path is absolute, so we need to make sure the audio_path is absolute
            # So we are check if the audio_path is absolute or relative in case we load the wrong data, if it's relative, we need to join it with the PROJECT_ROOT
            if not os.path.isabs(audio_path):
                audio_path = os.path.join(config.PROJECT_ROOT, audio_path)

            # --- 1. Load RAW Audio Waveform ---
            waveform, loaded_sr = librosa.load(
                audio_path,
                sr=self.target_sr, # Use extractor's sampling rate
                duration=30.0      # Load the full 30 seconds
            )
            min_samples = int(0.1 * self.target_sr)
            if len(waveform) < min_samples:
                 logging.warning(f"Audio signal for track {track_id} too short, returning None.")
                 return None # Requires collate_fn to handle None

            # --- 2. Apply Feature Extractor ---
            # Let the Data Collator handle padding/truncation later
            inputs = self.feature_extractor(
                waveform,
                sampling_rate=self.target_sr,
                return_tensors="pt",
                # REMOVED padding/truncation/max_length args
                return_attention_mask=True # Keep requesting mask
            )

            # --- 3. Prepare Outputs ---
            feature_tensor = inputs.get('input_values', inputs.get('input_features'))
            if feature_tensor is None:
                raise KeyError(f"Expected 'input_values' or 'input_features' key from feature_extractor output. Got keys: {inputs.keys()}")
            feature_tensor = feature_tensor.squeeze(0)

            attention_mask = inputs.get('attention_mask', None)
            if attention_mask is not None:
                attention_mask = attention_mask.squeeze(0)

            # Convert label list to float tensor
            label_tensor = torch.tensor(multi_hot_label, dtype=torch.float32)

            # Return dictionary
            model_input_dict = {"labels": label_tensor}
            input_key = 'input_values' if 'input_values' in inputs else 'input_features'
            model_input_dict[input_key] = feature_tensor
            if attention_mask is not None:
                model_input_dict['attention_mask'] = attention_mask

            return model_input_dict

        except KeyError:
             # This might catch if track_id wasn't found by .loc (handled above),
             # or if column names like 'multi_hot_label', 'audio_path' are wrong in CSV
             logging.error(f"KeyError accessing data for track {track_id}. Check manifest columns.", exc_info=True)
             return None
        except FileNotFoundError:
             logging.error(f"Audio file not found for track {track_id} at {audio_path}")
             return None
        except Exception as e:
            # Use the track_id obtained safely before the try block
            logging.error(f"Error loading/processing track {track_id}: {e}", exc_info=True)
            return None # Return None on generic error

print("FMARawAudioDataset class defined (using raw audio, feature extractor, ast.literal_eval for labels, .loc access).")

FMARawAudioDataset class defined (using raw audio, feature extractor, ast.literal_eval for labels, .loc access).


In [10]:
print(model_checkpoint)

facebook/w2v-bert-2.0


In [11]:
# # Cell 3.5: Define Data Collator for Padding (Corrected Padding Logic)

# import torch
# from dataclasses import dataclass
# from typing import Dict, List, Optional, Union
# # from transformers.feature_extraction_utils import BatchFeature # Not strictly needed here

# @dataclass
# class DataCollatorAudio:
#     """
#     Data collator that dynamically pads the inputs received Feature Extractor.
#     Correctly handles padding for [SequenceLength, FeatureDim] tensors.
#     """
#     padding_value: float = 0.0 # Standard padding for features/audio

#     def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
#         # features is a list of dicts like [{'input_values': tensor1, 'labels': label1, 'attention_mask': mask1}, ...]

#         # --- Pad 'input_values' (or 'input_features') ---
#         input_key = 'input_values' if 'input_values' in features[0] else 'input_features'
#         input_features = [d[input_key] for d in features]

#         # Determine max sequence length *in this batch* (assuming shape [SeqLen, FeatureDim])
#         # Add check for empty list
#         if not input_features:
#              return {}
#         max_len = max(feat.shape[0] for feat in input_features) # <<<--- Get length of FIRST dimension

#         # Pad each feature tensor to max_len along the sequence dimension (first dim)
#         padded_features = []
#         for feat in input_features:
#             # feat shape is [SeqLen, FeatureDim]
#             num_frames = feat.shape[0]
#             num_features = feat.shape[1] # Should be consistent (e.g., 160)
#             pad_width = max_len - num_frames

#             # Pad argument format for 2D tensor: (pad_left_dim1, pad_right_dim1, pad_left_dim0, pad_right_dim0)
#             # We only want to pad the end of the sequence dimension (dim 0)
#             # (0, 0) means no padding on left/right of feature dim (dim 1)
#             # (0, pad_width) means 0 padding before seq dim (dim 0), pad_width padding after
#             padded_feat = torch.nn.functional.pad(feat, (0, 0, 0, pad_width), mode='constant', value=self.padding_value)
#             # Verify shape after padding
#             # print(f"Original shape: {feat.shape}, Padded shape: {padded_feat.shape}, Target max_len: {max_len}")
#             padded_features.append(padded_feat)

#         # Stack the padded features into a batch tensor
#         # Now all tensors in padded_features should have shape [max_len, FeatureDim]
#         try:
#              batch_input_features = torch.stack(padded_features) # Shape: [BatchSize, max_len, FeatureDim]
#         except RuntimeError as e:
#              logging.error(f"RuntimeError during torch.stack. Shapes in batch might still differ or be incompatible.")
#              # Print shapes for debugging
#              for i, p_feat in enumerate(padded_features): logging.error(f" Padded shape {i}: {p_feat.shape}")
#              raise e


#         # --- Prepare Batch Dictionary ---
#         batch = {"input_values": batch_input_features}

#         # --- Pad 'attention_mask' if present ---
#         # Attention mask usually has shape [SeqLen]
#         if "attention_mask" in features[0] and features[0]["attention_mask"] is not None:
#             attention_masks = [d["attention_mask"] for d in features]
#             padded_masks = []
#             for mask in attention_masks:
#                  pad_width = max_len - mask.shape[-1] # Pad last dimension (the sequence length)
#                  # Pad argument format for 1D tensor: (pad_left, pad_right)
#                  padded_mask = torch.nn.functional.pad(mask, (0, pad_width), mode='constant', value=0) # Pad attention mask with 0
#                  padded_masks.append(padded_mask)
#             batch["attention_mask"] = torch.stack(padded_masks) # Shape: [BatchSize, max_len]

#         # --- Stack Labels ---
#         labels = [d["labels"] for d in features]
#         batch["labels"] = torch.stack(labels) # Shape: [BatchSize, num_labels]

#         return batch

# # Create an instance of the collator (do this in Cell 4)
# # data_collator = DataCollatorAudio()
# # print("DataCollatorAudio defined.")

In [12]:
# Cell 3.5: Define Data Collator for Padding (Handles None values)

import torch
from dataclasses import dataclass
from typing import Dict, List, Optional, Union
import logging # Add logging

@dataclass
class DataCollatorAudio:
    """
    Data collator that dynamically pads the inputs received Feature Extractor.
    Handles None values returned by the Dataset on error.
    """
    padding_value: float = 0.0 # Standard padding for features/audio

    def __call__(self, features: List[Optional[Dict[str, Union[List[int], torch.Tensor]]]]) -> Dict[str, torch.Tensor]:
        # features is a list of dicts OR None values from __getitem__

        # --- Filter out None entries ---
        valid_features = [f for f in features if f is not None]
        if not valid_features:
             # If all samples in the batch failed, return an empty dictionary
             # The training loop should ideally handle this (e.g., skip batch)
             logging.warning("Collate function received empty batch after filtering Nones.")
             return {}
        # -----------------------------

        # --- Determine keys and pad based on valid features ---
        input_key = 'input_values' if 'input_values' in valid_features[0] else 'input_features'
        input_features = [d[input_key] for d in valid_features]

        # Determine sequence length dimension based on the FIRST valid tensor
        seq_len_dim = -1
        if len(input_features[0].shape) == 2:
            seq_len_dim = 0 if input_features[0].shape[0] > input_features[0].shape[1] else -1
        elif len(input_features[0].shape) == 1:
             seq_len_dim = 0
        else:
             logging.warning(f"Unexpected tensor shape {input_features[0].shape}, assuming seq len is last dim.")

        max_len = max(feat.shape[seq_len_dim] for feat in input_features)

        # Pad each feature tensor to max_len
        padded_features = []
        for feat in input_features:
            pad_width = max_len - feat.shape[seq_len_dim]
            if seq_len_dim == 0 and len(feat.shape)==2: padding = (0, 0, 0, pad_width) # Pad SeqLen dim (dim 0)
            else: padding = (0, pad_width) # Pad last dim (SeqLen)

            padded_feat = torch.nn.functional.pad(feat, padding, mode='constant', value=self.padding_value)
            padded_features.append(padded_feat)

        # Stack the padded features
        batch_input_features = torch.stack(padded_features)
        batch = {input_key: batch_input_features} # Use the correct key

        # Pad 'attention_mask' if present
        if "attention_mask" in valid_features[0] and valid_features[0]["attention_mask"] is not None:
            attention_masks = [d["attention_mask"] for d in valid_features]
            # Assuming mask is 1D [SeqLen] or 2D [1, SeqLen] etc. - pad last dim
            max_mask_len = max(m.shape[-1] for m in attention_masks)
            padded_masks = []
            for mask in attention_masks:
                 pad_width = max_mask_len - mask.shape[-1]
                 padded_mask = torch.nn.functional.pad(mask, (0, pad_width), mode='constant', value=0)
                 padded_masks.append(padded_mask)
            batch["attention_mask"] = torch.stack(padded_masks)

        # Stack Labels
        labels = [d["labels"] for d in valid_features]
        batch["labels"] = torch.stack(labels)

        return batch

# Create an instance of the collator (do this in Cell 4)
# data_collator = DataCollatorAudio()
# print("DataCollatorAudio defined.")

# Cell 4

In [13]:
# Cell 4: Load Feature Extractor, Create DataLoaders with Custom Collator

from transformers import AutoFeatureExtractor # Use the correct class

# Ensure FMARawAudioDataset and DataCollatorAudio are defined in previous cells

# --- Load Feature Extractor ---
# (Using model_checkpoint defined in Cell 2)
logging.info(f"Loading feature extractor for: {model_checkpoint}")
try:
    # Load the feature extractor associated with Wav2Vec2-BERT
    feature_extractor = AutoFeatureExtractor.from_pretrained(model_checkpoint)
    logging.info("Feature extractor loaded successfully.")
    # Log the expected sample rate
    processor_sr = feature_extractor.sampling_rate
    print(f"Feature extractor expects sample rate: {processor_sr}")
    # Ensure config matches extractor's expected rate
    if config.PREPROCESSING_PARAMS['sample_rate'] != processor_sr:
         logging.warning(f"Config sample rate ({config.PREPROCESSING_PARAMS['sample_rate']}) differs from feature extractor ({processor_sr}). Ensure audio loading uses {processor_sr} Hz.")
         # Update config value if necessary, or ensure Dataset uses processor_sr
         # config.PREPROCESSING_PARAMS['sample_rate'] = processor_sr # Be careful modifying config dynamically

except Exception as e:
    logging.error(f"Could not load feature extractor for {model_checkpoint}. Cannot proceed. Error: {e}", exc_info=True)
    raise SystemExit # Stop execution if extractor fails

# --- Create Full Dataset ---
# Ensure FMARawAudioDataset __init__ accepts feature_extractor
try:
    # Pass the loaded feature_extractor instance
    full_dataset = FMARawAudioDataset(manifest_path, feature_extractor=feature_extractor)
    manifest_df = full_dataset.manifest
except Exception as e:
     logging.error("Failed to instantiate FMARawAudioDataset.", exc_info=True)
     raise SystemExit

# --- Create SMALLER DEBUG Datasets ---
logging.info("Creating DEBUG DataLoaders with small subsets and custom collator...")
try:
    # Get indices for the splits from the manifest
    train_indices = manifest_df[manifest_df['split'] == 'training'].index[:16].tolist() # Small subset for debug
    val_indices = manifest_df[manifest_df['split'] == 'validation'].index[:8].tolist()  # Small subset for debug

    # Create Subset instances
    debug_train_dataset = Subset(full_dataset, train_indices)
    debug_val_dataset = Subset(full_dataset, val_indices)

    # --- Create Data Collator Instance ---
    # (Assumes DataCollatorAudio class is defined in Cell 3.5)
    data_collator = DataCollatorAudio()
    print("DataCollatorAudio instance created.")

    # --- Create DataLoaders using the custom collate_fn ---
    debug_train_dataloader = DataLoader(
        debug_train_dataset,
        batch_size=batch_size, # Use small batch_size from config
        shuffle=True,
        collate_fn=data_collator # Apply custom padding at batch level
        # num_workers=4, # Optional: Add workers later for performance
        # pin_memory=True # Optional: Add if using GPU
    )
    debug_val_dataloader = DataLoader(
        debug_val_dataset,
        batch_size=batch_size, # Use small batch_size from config
        shuffle=False, # No need to shuffle validation data
        collate_fn=data_collator # Apply custom padding at batch level
        # num_workers=4,
        # pin_memory=True
    )
    logging.info(f"DEBUG Dataset sizes: Train={len(debug_train_dataset)}, Val={len(debug_val_dataset)}")
    logging.info("DEBUG DataLoaders with custom collator created.")
except Exception as e:
    logging.error(f"Failed to create DEBUG datasets/dataloaders: {e}", exc_info=True)
    raise SystemExit

2025-05-04 04:27:11,087 - INFO - Loading feature extractor for: facebook/w2v-bert-2.0
2025-05-04 04:27:11,212 - INFO - Feature extractor loaded successfully.
Feature extractor expects sample rate: 16000
2025-05-04 04:27:11,214 - INFO - Initializing FMARawAudioDataset from: /workspace/musicClaGen/data/processed/small_subset_multihot.csv
2025-05-04 04:27:11,215 - INFO - Target sampling rate set from feature extractor: 16000 Hz
2025-05-04 04:27:11,217 - INFO - Loading manifest from: /workspace/musicClaGen/data/processed/small_subset_multihot.csv
2025-05-04 04:27:11,255 - INFO - Attempting to parse 'multi_hot_label' column using ast.literal_eval...
2025-05-04 04:27:11,576 - INFO - Example parsed label verified (type <class 'list'>, length 22): [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...
2025-05-04 04:27:11,578 - INFO - Loaded and parsed manifest with 8000 entries.
2025-05-04 04:27:11,579 - INFO - Creating DEBUG DataLoaders with sm

In [14]:
# Cell 5: Load Wav2Vec2-BERT Model and Modify Head

import torch.nn as nn # Ensure nn is imported
from transformers import AutoModelForAudioClassification

logging.info(f"Loading pre-trained Wav2Vec2-BERT model: {model_checkpoint}")
try:
    # Load the model configured for audio classification
    model = AutoModelForAudioClassification.from_pretrained(
        model_checkpoint,
        num_labels=num_labels,
        ignore_mismatched_sizes=True # Essential for replacing the head
    )
    logging.info("Model loaded initially.")

    # --- Explicit Head Replacement (Recommended) ---
    # Though I have defined num_labels = num_labels on previous step, I want to explicitly replace it again to ensure the head is correct.
    # If the above code is correct, the explicitly approach below might seem redundant but.
    
    # I MUST verify the correct attribute name for the classifier head for Wav2Vec2-BERT. 
    # Common names include 'classifier', 'projector','classification_head'. Use print(model) after loading to check.
    classifier_attr = 'classifier' # <<<--- VERIFY THIS ATTRIBUTE NAME ---<<<

    if hasattr(model, classifier_attr):
        original_classifier = getattr(model, classifier_attr)
        logging.info(f"Found classifier attribute '{classifier_attr}' of type {type(original_classifier)}")

        # Check if it's a simple Linear layer or potentially a sequence/projection
        if isinstance(original_classifier, nn.Linear):
            in_features = original_classifier.in_features
            logging.info(f"Replacing classifier head '{classifier_attr}'. Original out: {original_classifier.out_features}, New out: {num_labels}")
            setattr(model, classifier_attr, nn.Linear(in_features, num_labels))
            print(f"Successfully replaced classifier head '{classifier_attr}'.")
        # Add checks here if Wav2Vec2-BERT uses a different common head structure
        # elif isinstance(original_classifier, nn.Sequential): ... etc.
        else:
             logging.warning(f"Classifier head '{classifier_attr}' is not nn.Linear ({type(original_classifier)}). Attempting replacement might fail or need adjustment.")
             # If you know the structure (e.g., model.projector + model.classifier), adjust accordingly.
             # For now, we assume a direct replacement might work or the implicit loading handled it.

    else:
         logging.warning(f"Could not automatically find classifier attribute '{classifier_attr}'. Ensure head size ({num_labels}) was correctly set via 'num_labels' argument during loading or modify manually.")

    model.to(device)
    logging.info("Wav2Vec2-BERT Model loaded and moved to device.")
    # print(model) # Uncomment this line and run to inspect the model structure and find the classifier name

except Exception as e:
    logging.error(f"Failed to load model '{model_checkpoint}': {e}", exc_info=True)
    raise SystemExit # Stop if model loading fails

2025-05-04 04:27:11,604 - INFO - Loading pre-trained Wav2Vec2-BERT model: facebook/w2v-bert-2.0


Some weights of Wav2Vec2BertForSequenceClassification were not initialized from the model checkpoint at facebook/w2v-bert-2.0 and are newly initialized: ['classifier.bias', 'classifier.weight', 'projector.bias', 'projector.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-05-04 04:27:13,932 - INFO - Model loaded initially.
2025-05-04 04:27:13,934 - INFO - Found classifier attribute 'classifier' of type <class 'torch.nn.modules.linear.Linear'>
2025-05-04 04:27:13,935 - INFO - Replacing classifier head 'classifier'. Original out: 22, New out: 22
Successfully replaced classifier head 'classifier'.
2025-05-04 04:27:14,875 - INFO - Wav2Vec2-BERT Model loaded and moved to device.


In [15]:
# verify the correct attribute name for the classifier head for Wav2Vec2-BERT.
# print(model)


In [16]:
# Optimizer, Loss, Metrics Functoin

In [17]:
# Cell 6: Define Optimizer, Loss Function, and Metrics Calculation

import torch.optim as optim
from sklearn.metrics import hamming_loss, jaccard_score, f1_score # Make sure these are imported

# --- Optimizer ---
optimizer = optim.AdamW(
    model.parameters(),
    lr=learning_rate,
    weight_decay=weight_decay
)
logging.info(f"Optimizer AdamW defined with LR={learning_rate}, Weight Decay={weight_decay}")

# --- Loss Function ---
# Use BCEWithLogitsLoss for multi-label classification (includes Sigmoid)
criterion = nn.BCEWithLogitsLoss().to(device)
logging.info("Loss function BCEWithLogitsLoss defined.")

# --- Metrics Function ---
def compute_metrics(eval_preds):
    """Calculates multi-label metrics from logits and labels."""
    logits, labels = eval_preds
    # Ensure inputs are numpy arrays on CPU
    logits_np = logits.detach().cpu().numpy() if isinstance(logits, torch.Tensor) else logits
    labels_np = labels.detach().cpu().numpy() if isinstance(labels, torch.Tensor) else labels

    # Apply sigmoid and threshold
    probs = 1 / (1 + np.exp(-logits_np)) # Manual sigmoid
    preds = (probs > 0.5).astype(int)
    labels_np = labels_np.astype(int) # Ensure labels are integers

    if labels_np.shape != preds.shape:
         logging.error(f"Shape mismatch in compute_metrics! Labels: {labels_np.shape}, Preds: {preds.shape}")
         # Return default metrics indicating failure
         return {'hamming_loss': 1.0, 'jaccard_samples': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0}

    metrics = {}
    try:
        metrics['hamming_loss'] = hamming_loss(labels_np, preds)
        # Use average='samples' for Jaccard in multi-label scenario
        metrics['jaccard_samples'] = jaccard_score(labels_np, preds, average='samples', zero_division=0)
        metrics['f1_micro'] = f1_score(labels_np, preds, average='micro', zero_division=0)
        metrics['f1_macro'] = f1_score(labels_np, preds, average='macro', zero_division=0)
        # Optional: Add Accuracy (subset accuracy)
        # metrics['accuracy'] = accuracy_score(labels_np, preds) # This is exact match accuracy
    except Exception as e:
         logging.error(f"Error calculating metrics: {e}")
         metrics = {'hamming_loss': 1.0, 'jaccard_samples': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0}

    # Log inside the main evaluate function now for better context
    # logging.info(f"Metrics: Hamming={metrics['hamming_loss']:.4f}, Jaccard(samples)={metrics['jaccard_samples']:.4f}, F1 Micro={metrics['f1_micro']:.4f}, F1 Macro={metrics['f1_macro']:.4f}")
    return metrics

print("Optimizer, Loss, and compute_metrics function defined.")

2025-05-04 04:27:14,934 - INFO - Optimizer AdamW defined with LR=5e-05, Weight Decay=0.01
2025-05-04 04:27:14,936 - INFO - Loss function BCEWithLogitsLoss defined.
Optimizer, Loss, and compute_metrics function defined.


In [18]:
# # Cell 7: Define Training Function for One Epoch 

# def train_epoch(model, dataloader, criterion, optimizer, device, gradient_accumulation_steps):
#     model.train()
#     total_loss = 0
#     num_samples = 0
#     optimizer.zero_grad()

#     progress_bar = tqdm(dataloader, desc="Training", leave=False)
#     for step, batch in enumerate(progress_bar):
#         try:
#             # --- CORRECTED INPUT PREPARATION ---
#             expected_model_input_key = "input_features"  

#             if 'input_values' not in batch: # Check if extractor output key is different
#                  raise KeyError("Batch dictionary missing 'input_values' from Dataset/Extractor.")

#             # Build the dictionary for the model's forward pass
#             model_inputs = {
#                 expected_model_input_key: batch['input_values'].to(device) # Map dataset output key to model input key
#             }
#             if 'attention_mask' in batch and batch['attention_mask'] is not None:
#                  model_inputs['attention_mask'] = batch['attention_mask'].to(device)
#             # --- END CORRECTION ---

#             labels = batch['labels'].to(device)

#             # Forward pass
#             outputs = model(**model_inputs) # Pass the correctly named arguments
#             logits = outputs.logits

#             # Calculate loss
#             loss = criterion(logits, labels)

#             # ... (rest of loss scaling, backward, optimizer step remains the same) ...
#             if torch.isnan(loss):
#                 logging.warning(f"NaN loss detected at step {step}. Skipping batch.")
#                 if (step + 1) % gradient_accumulation_steps != 0: model.zero_grad()
#                 continue
#             scaled_loss = loss / gradient_accumulation_steps
#             scaled_loss.backward()
#             batch_size_actual = labels.size(0)
#             total_loss += loss.item() * batch_size_actual
#             num_samples += batch_size_actual
#             if (step + 1) % gradient_accumulation_steps == 0 or (step + 1) == len(dataloader):
#                 optimizer.step()
#                 optimizer.zero_grad()
#             progress_bar.set_postfix({'loss': f'{loss.item():.4f}'})

#         except Exception as e:
#              logging.error(f"Error during training step {step}, batch keys: {batch.keys()}. Error: {e}", exc_info=True)
#              continue

#     if (step + 1) % gradient_accumulation_steps != 0 and num_samples > 0: # Ensure step was defined
#          optimizer.step(); optimizer.zero_grad()

#     avg_loss = total_loss / num_samples if num_samples > 0 else 0
#     print(f"\nAverage Training Loss for Epoch: {avg_loss:.4f}")
#     return avg_loss

# print("train_epoch function updated.")

In [19]:
# # Cell 7: Define Training Function for One Epoch (with AMP)

# from torch.cuda.amp import autocast, GradScaler # Import AMP utilities


# # def train_epoch(model, dataloader, criterion, optimizer, device, gradient_accumulation_steps, scaler): # Add scaler argument
# #     model.train() # Set model to training mode
# #     total_loss = 0
# #     num_samples = 0
# #     optimizer.zero_grad() # Zero gradients once before the epoch loop

# #     progress_bar = tqdm(dataloader, desc="Training", leave=False)
# #     for step, batch in enumerate(progress_bar):
# #         if batch is None or not batch: continue # Skip potentially None batches if Dataset has errors

# #         try:
# #             # Uses 'input_features' as the model's expected key based on previous debugging
# #             expected_model_input_key = "input_features"
# #             input_data_key = 'input_values' if 'input_values' in batch else 'input_features' # Key from feature extractor output

# #             model_inputs = {}
# #             if input_data_key in batch:
# #                  model_inputs[expected_model_input_key] = batch[input_data_key].to(device)
# #             else:
# #                  raise KeyError(f"Neither 'input_values' nor 'input_features' found in batch.")

# #             if 'attention_mask' in batch and batch['attention_mask'] is not None:
# #                  model_inputs['attention_mask'] = batch['attention_mask'].to(device)

# #             labels = batch['labels'].to(device) # Keep labels separate

# #             # --- Automatic Mixed Precision ---
# #             with autocast(device_type=device.type): # Runs forward pass and loss in mixed precision
# #                 outputs = model(**model_inputs)
# #                 logits = outputs.logits
# #                 loss = criterion(logits, labels) # BCEWithLogitsLoss expects float labels

# #                 # Check for NaN loss immediately after calculation
# #                 if torch.isnan(loss):
# #                     logging.warning(f"NaN loss detected at step {step} *inside autocast*. Skipping batch.")
# #                     # Need to zero grad if skipping before optimizer step in accumulation cycle
# #                     if (step + 1) % gradient_accumulation_steps != 0:
# #                         optimizer.zero_grad() # Zero grad to prevent NaN propagation
# #                     continue # Skip backward and optimizer step

# #                 # Scale loss for gradient accumulation BEFORE scaler.scale()
# #                 scaled_loss = loss / gradient_accumulation_steps
# #             # --- End Autocast ---

# #             # --- Scaler Scales the loss and Calls backward() ---
# #             scaler.scale(scaled_loss).backward()
# #             # ---------------------------------------------

# #             # Accumulate total loss (use the original non-scaled loss for tracking)
# #             batch_size_actual = labels.size(0)
# #             total_loss += loss.item() * batch_size_actual
# #             num_samples += batch_size_actual

# #             # --- Optimizer Step (with Scaler) ---
# #             if (step + 1) % gradient_accumulation_steps == 0 or (step + 1) == len(dataloader):
# #                 # Optional: Unscale gradients before clipping (if clipping)
# #                 # scaler.unscale_(optimizer)
# #                 # torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

# #                 scaler.step(optimizer) # Unscales gradients, steps optimizer, checks for inf/NaN
# #                 scaler.update() # Update scaler for next iteration
# #                 optimizer.zero_grad() # Zero gradients *after* stepping or skipping step
# #             # -----------------------------------

# #             # Update progress bar description with non-scaled loss
# #             progress_bar.set_postfix({'loss': f'{loss.item():.4f}'})

# #         except Exception as e:
# #              logging.error(f"Error during training step {step}, batch keys: {batch.keys()}. Error: {e}", exc_info=True)
# #              # Ensure gradients are zeroed if an error occurs mid-accumulation cycle
# #              optimizer.zero_grad()
# #              continue # Skip this batch on error

# #     # Final calculation should use accumulated totals
# #     avg_loss = total_loss / num_samples if num_samples > 0 else 0
# #     print(f"\nAverage Training Loss for Epoch: {avg_loss:.4f}")
# #     return avg_loss


# def train_epoch(model, dataloader, criterion, optimizer, device, gradient_accumulation_steps, scaler): # Add scaler argument
#     model.train() # Set model to training mode
#     total_loss = 0
#     num_samples = 0
#     optimizer.zero_grad() # Zero gradients once before the epoch loop
    
#     # Add counters for debugging
#     successful_batches = 0
#     print(f"Starting training with {len(dataloader)} batches")

#     progress_bar = tqdm(dataloader, desc="Training", leave=False)
#     for step, batch in enumerate(progress_bar):
#         if batch is None or not batch: 
#             print(f"Skipping empty batch at step {step}")
#             continue # Skip potentially None batches if Dataset has errors

#         try:
#             # Print batch shape information for debugging
#             print(f"Batch {step}: input shape = {batch['input_values'].shape}, label shape = {batch['labels'].shape}")
            
#             # Uses 'input_features' as the model's expected key based on previous debugging
#             expected_model_input_key = "input_features"
#             input_data_key = 'input_values' if 'input_values' in batch else 'input_features' # Key from feature extractor output

#             model_inputs = {}
#             if input_data_key in batch:
#                  model_inputs[expected_model_input_key] = batch[input_data_key].to(device)
#             else:
#                  raise KeyError(f"Neither 'input_values' nor 'input_features' found in batch.")

#             if 'attention_mask' in batch and batch['attention_mask'] is not None:
#                  model_inputs['attention_mask'] = batch['attention_mask'].to(device)

#             labels = batch['labels'].to(device) # Keep labels separate

#             # --- Automatic Mixed Precision ---
#             with torch.autocast(device_type=device.type): # Runs forward pass and loss in mixed precision
#                 outputs = model(**model_inputs)
#                 logits = outputs.logits
#                 loss = criterion(logits, labels) # BCEWithLogitsLoss expects float labels

#                 # Check for NaN loss immediately after calculation
#                 if torch.isnan(loss):
#                     logging.warning(f"NaN loss detected at step {step} *inside autocast*. Skipping batch.")
#                     print(f"NaN loss detected at step {step}. Skipping batch.")
#                     # Need to zero grad if skipping before optimizer step in accumulation cycle
#                     if (step + 1) % gradient_accumulation_steps != 0:
#                         optimizer.zero_grad() # Zero grad to prevent NaN propagation
#                     continue # Skip backward and optimizer step

#                 # Scale loss for gradient accumulation BEFORE scaler.scale()
#                 scaled_loss = loss / gradient_accumulation_steps
#             # --- End Autocast ---

#             # --- Scaler Scales the loss and Calls backward() ---
#             scaler.scale(scaled_loss).backward()
#             # ---------------------------------------------

#             # Accumulate total loss (use the original non-scaled loss for tracking)
#             batch_size_actual = labels.size(0)
#             total_loss += loss.item() * batch_size_actual
#             num_samples += batch_size_actual

#             # --- Optimizer Step (with Scaler) ---
#             if (step + 1) % gradient_accumulation_steps == 0 or (step + 1) == len(dataloader):
#                 # Optional: Unscale gradients before clipping (if clipping)
#                 # scaler.unscale_(optimizer)
#                 # torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

#                 scaler.step(optimizer) # Unscales gradients, steps optimizer, checks for inf/NaN
#                 scaler.update() # Update scaler for next iteration
#                 optimizer.zero_grad() # Zero gradients *after* stepping or skipping step
#             # -----------------------------------

#             # Update progress bar description with non-scaled loss
#             progress_bar.set_postfix({'loss': f'{loss.item():.4f}'})
#             progress_bar.update(1)  # Explicitly update the progress bar
            
#             # Count successful batches
#             successful_batches += 1

#         except Exception as e:
#              logging.error(f"Error during training step {step}, batch keys: {batch.keys()}. Error: {e}", exc_info=True)
#              print(f"Error during training step {step}: {e}")
#              # Ensure gradients are zeroed if an error occurs mid-accumulation cycle
#              optimizer.zero_grad()
#              continue # Skip this batch on error

#     print(f"Completed training with {successful_batches}/{len(dataloader)} successful batches")
    
#     # Final calculation should use accumulated totals
#     avg_loss = total_loss / num_samples if num_samples > 0 else 0
#     print(f"\nAverage Training Loss for Epoch: {avg_loss:.4f}")
#     return avg_loss


# print("train_epoch function defined with AMP.")

In [20]:
# # Cell 7: Define Training Function for One Epoch (with AMP)

# from torch.cuda.amp import autocast, GradScaler # Import AMP utilities

# def train_epoch(model, dataloader, criterion, optimizer, device, gradient_accumulation_steps, scaler): # Add scaler argument
#     model.train() # Set model to training mode
#     total_loss = 0
#     num_samples = 0
#     optimizer.zero_grad() # Zero gradients once before the epoch loop
    
#     # Add counters for debugging
#     successful_batches = 0
#     print(f"Starting training with {len(dataloader)} batches")

#     progress_bar = tqdm(dataloader, desc="Training", leave=False)
#     for step, batch in enumerate(progress_bar):
#         if batch is None or not batch: 
#             print(f"Skipping empty batch at step {step}")
#             continue # Skip potentially None batches if Dataset has errors

#         try:
#             # Print batch shape information for debugging
#             print(f"Batch {step}: input shape = {batch['input_values'].shape}, label shape = {batch['labels'].shape}")
            
#             # Uses 'input_features' as the model's expected key based on previous debugging
#             expected_model_input_key = "input_features"
#             input_data_key = 'input_values' if 'input_values' in batch else 'input_features' # Key from feature extractor output

#             model_inputs = {}
#             if input_data_key in batch:
#                  model_inputs[expected_model_input_key] = batch[input_data_key].to(device)
#             else:
#                  raise KeyError(f"Neither 'input_values' nor 'input_features' found in batch.")

#             if 'attention_mask' in batch and batch['attention_mask'] is not None:
#                  model_inputs['attention_mask'] = batch['attention_mask'].to(device)

#             labels = batch['labels'].to(device) # Keep labels separate

#             # --- Automatic Mixed Precision ---
#             with torch.autocast(device_type=device.type): # Runs forward pass and loss in mixed precision
#                 outputs = model(**model_inputs)
#                 logits = outputs.logits
#                 loss = criterion(logits, labels) # BCEWithLogitsLoss expects float labels

#                 # Check for NaN loss immediately after calculation
#                 if torch.isnan(loss):
#                     logging.warning(f"NaN loss detected at step {step} *inside autocast*. Skipping batch.")
#                     print(f"NaN loss detected at step {step}. Skipping batch.")
#                     # Need to zero grad if skipping before optimizer step in accumulation cycle
#                     if (step + 1) % gradient_accumulation_steps != 0:
#                         optimizer.zero_grad() # Zero grad to prevent NaN propagation
#                     continue # Skip backward and optimizer step

#                 # Scale loss for gradient accumulation BEFORE scaler.scale()
#                 scaled_loss = loss / gradient_accumulation_steps
#             # --- End Autocast ---

#             # --- Scaler Scales the loss and Calls backward() ---
#             scaler.scale(scaled_loss).backward()
#             # ---------------------------------------------

#             # Accumulate total loss (use the original non-scaled loss for tracking)
#             batch_size_actual = labels.size(0)
#             total_loss += loss.item() * batch_size_actual
#             num_samples += batch_size_actual

#             # --- Optimizer Step (with Scaler) ---
#             if (step + 1) % gradient_accumulation_steps == 0 or (step + 1) == len(dataloader):
#                 # Optional: Unscale gradients before clipping (if clipping)
#                 # scaler.unscale_(optimizer)
#                 # torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

#                 scaler.step(optimizer) # Unscales gradients, steps optimizer, checks for inf/NaN
#                 scaler.update() # Update scaler for next iteration
#                 optimizer.zero_grad() # Zero gradients *after* stepping or skipping step
#             # -----------------------------------

#             # Update progress bar description with non-scaled loss
#             progress_bar.set_postfix({'loss': f'{loss.item():.4f}'})
#             progress_bar.update(1)  # Explicitly update the progress bar
            
#             # Count successful batches
#             successful_batches += 1

#         except Exception as e:
#              logging.error(f"Error during training step {step}, batch keys: {batch.keys()}. Error: {e}", exc_info=True)
#              print(f"Error during training step {step}: {e}")
#              # Ensure gradients are zeroed if an error occurs mid-accumulation cycle
#              optimizer.zero_grad()
#              continue # Skip this batch on error

#     print(f"Completed training with {successful_batches}/{len(dataloader)} successful batches")
    
#     # Final calculation should use accumulated totals
#     avg_loss = total_loss / num_samples if num_samples > 0 else 0
#     print(f"\nAverage Training Loss for Epoch: {avg_loss:.4f}")
#     return avg_loss


# print("train_epoch function defined with AMP.")

In [21]:
# Cell 7: Define Training Function for One Epoch (with AMP and Scheduler)

from torch.cuda.amp import autocast, GradScaler # Or from torch.amp import ...

# Ensure compute_metrics, torch, logging, tqdm etc. are imported

def train_epoch(model, dataloader, criterion, optimizer, device, gradient_accumulation_steps, scaler, scheduler=None): # <<< Added scheduler=None
    model.train()
    total_loss = 0
    num_samples = 0
    successful_steps = 0 # Counter for successful steps
    optimizer.zero_grad()

    progress_bar = tqdm(dataloader, desc="Training", leave=False)
    num_batches = len(dataloader) # Get total batches for scheduler check

    for step, batch in enumerate(progress_bar):
        if batch is None or not batch: continue

        try:
            expected_model_input_key = "input_features" # VERIFY THIS KEY NAME
            input_data_key = 'input_values' if 'input_values' in batch else 'input_features'
            model_inputs = {expected_model_input_key: batch[input_data_key].to(device)}
            if 'attention_mask' in batch and batch['attention_mask'] is not None: model_inputs['attention_mask'] = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            with autocast(device_type=device.type, enabled=(device.type=='cuda')): # Correct autocast usage
                outputs = model(**model_inputs)
                logits = outputs.logits
                loss = criterion(logits, labels)

            if torch.isnan(loss):
                logging.warning(f"NaN loss detected at step {step}. Skipping batch.")
                if (step + 1) % gradient_accumulation_steps != 0: optimizer.zero_grad()
                continue

            scaled_loss = loss / gradient_accumulation_steps
            scaler.scale(scaled_loss).backward()

            batch_size_actual = labels.size(0)
            total_loss += loss.item() * batch_size_actual
            num_samples += batch_size_actual

            if (step + 1) % gradient_accumulation_steps == 0 or (step + 1) == num_batches:
                scaler.step(optimizer)
                scaler.update()
                # --- Step the scheduler AFTER the optimizer step ---
                if scheduler:
                    scheduler.step() # <<<--- ADDED SCHEDULER STEP HERE
                # -------------------------------------------------
                optimizer.zero_grad()
                successful_steps +=1 # Count successful optimizer steps


            progress_bar.set_postfix({'loss': f'{loss.item():.4f}', 'lr': f'{optimizer.param_groups[0]["lr"]:.2e}'}) # Optionally show LR

        except Exception as e:
             logging.error(f"Error during training step {step}: {e}", exc_info=True)
             optimizer.zero_grad() # Zero grad on error too
             continue

    # Final optimizer step might not be needed if scheduler steps correctly, depends on exact logic.
    # Let's remove the extra step outside the loop for now.

    avg_loss = total_loss / num_samples if num_samples > 0 else 0
    print(f"\nCompleted training epoch. Successful optimizer steps: {successful_steps}")
    print(f"Average Training Loss for Epoch: {avg_loss:.4f}")
    return avg_loss

print("train_epoch function updated to accept scheduler.")

train_epoch function updated to accept scheduler.


In [22]:
# # Cell 8: Define Evaluation Function (Corrected Model Input)

# def evaluate(model, dataloader, criterion, device):
#     model.eval()
#     total_loss = 0
#     all_logits = []
#     all_labels = []
#     num_samples = 0

#     with torch.no_grad():
#         for step, batch in enumerate(tqdm(dataloader, desc="Evaluating", leave=False)):
#             try:
#                 # --- CORRECTED INPUT PREPARATION ---
#                 expected_model_input_key = "input_features" # <<<--- VERIFY THIS KEY NAME

#                 if 'input_values' not in batch:
#                      raise KeyError("Batch dictionary missing 'input_values' from Dataset/Extractor.")

#                 model_inputs = {
#                     expected_model_input_key: batch['input_values'].to(device)
#                 }
#                 if 'attention_mask' in batch and batch['attention_mask'] is not None:
#                      model_inputs['attention_mask'] = batch['attention_mask'].to(device)
#                 # --- END CORRECTION ---

#                 labels = batch['labels'].to(device)

#                 # Forward pass
#                 outputs = model(**model_inputs) # Pass the correctly named arguments
#                 logits = outputs.logits

#                 # Calculate loss
#                 loss = criterion(logits, labels)
#                 total_loss += loss.item() * labels.size(0)
#                 num_samples += labels.size(0)

#                 all_logits.append(logits.cpu())
#                 all_labels.append(labels.cpu())
#             except Exception as e:
#                  logging.error(f"Error during evaluation step {step}, batch keys: {batch.keys()}. Error: {e}", exc_info=True)
#                  continue # Skip batch

#     if not all_logits or not all_labels or num_samples == 0:
#         logging.warning("Evaluation yielded no results (all batches failed or empty dataloader?).")
#         return {}

#     avg_loss = total_loss / num_samples

#     all_logits_cat = torch.cat(all_logits, dim=0)
#     all_labels_cat = torch.cat(all_labels, dim=0)

#     eval_preds = (all_logits_cat, all_labels_cat)
#     metrics = compute_metrics(eval_preds)
#     metrics['eval_loss'] = avg_loss

#     print(f"\nValidation Loss: {avg_loss:.4f}")
#     for name, value in metrics.items():
#          if name != 'eval_loss': print(f"  Validation {name.replace('_', ' ').title()}: {value:.4f}")

#     return metrics

# print("evaluate function updated.")

In [23]:
# Cell 8: Define Evaluation Function (with AMP)

# Ensure compute_metrics function is defined in a previous cell
# Ensure torch, logging, tqdm, np are imported

def evaluate(model, dataloader, criterion, device):
    model.eval() # Set model to evaluation mode
    total_loss = 0
    all_logits = []
    all_labels = []
    num_samples = 0

    with torch.no_grad(): # Disable gradient calculations
        for step, batch in enumerate(tqdm(dataloader, desc="Evaluating", leave=False)):
            if batch is None or not batch: continue
            try:
                # Prepare inputs
                expected_model_input_key = "input_features" # VERIFY THIS KEY NAME
                input_data_key = 'input_values' if 'input_values' in batch else 'input_features'

                model_inputs = {}
                if input_data_key in batch:
                    model_inputs[expected_model_input_key] = batch[input_data_key].to(device)
                else:
                    raise KeyError(f"Required input key not found in batch during evaluation.")

                if 'attention_mask' in batch and batch['attention_mask'] is not None:
                     model_inputs['attention_mask'] = batch['attention_mask'].to(device)

                labels = batch['labels'].to(device)

                # --- Use autocast for forward pass during evaluation ---
                # Although not strictly needed for memory unless inputs are huge,
                # it ensures consistency with training pass calculations.
                with autocast(device_type=device.type):
                    outputs = model(**model_inputs)
                    logits = outputs.logits
                    loss = criterion(logits, labels)
                # ----------------------------------------------------

                total_loss += loss.item() * labels.size(0)
                num_samples += labels.size(0)

                all_logits.append(logits.cpu()) # Store logits on CPU
                all_labels.append(labels.cpu()) # Store labels on CPU
            except Exception as e:
                 logging.error(f"Error during evaluation step {step}: {e}", exc_info=True)
                 continue # Skip batch on error

    if not all_logits or not all_labels or num_samples == 0:
        logging.warning("Evaluation yielded no results.")
        return {}

    # Calculate average loss over processed samples
    avg_loss = total_loss / num_samples

    # Concatenate results from all batches
    all_logits_cat = torch.cat(all_logits, dim=0)
    all_labels_cat = torch.cat(all_labels, dim=0)

    # Calculate metrics using the helper function
    eval_preds = (all_logits_cat, all_labels_cat) # Pass tensors directly
    metrics = compute_metrics(eval_preds)
    metrics['eval_loss'] = avg_loss

    # Log metrics
    print(f"\nValidation Loss: {avg_loss:.4f}")
    for name, value in metrics.items():
         if name != 'eval_loss': print(f"  Validation {name.replace('_', ' ').title()}: {value:.4f}")

    return metrics # Return dictionary of all metrics

print("evaluate function defined with AMP (autocast only).")

evaluate function defined with AMP (autocast only).


In [24]:
# # Cell 9: Run ONE Epoch for Debugging

# from tqdm import tqdm # Ensure tqdm is imported
# from torch.cuda.amp import GradScaler



# # Ensure model, criterion, optimizer, dataloaders etc. are defined from previous cells
# print(f"\n--- Starting Debug Training Run for {num_epochs_debug} epoch ---")
# start_time = time.time()


# # --- Initialize GradScaler ---
# scaler = GradScaler() # <<<--- ADD THIS INITIALIZATION
# # ---------------------------



# # Make sure model and criterion are on the correct device
# model.to(device)
# criterion.to(device)

# for epoch in range(num_epochs_debug): # num_epochs_debug was set to 1 in Cell 2
#     print(f"\n--- Debug Epoch {epoch+1}/{num_epochs_debug} ---")

#     # Run training step for one epoch on the debug training data
#     train_loss = train_epoch(
#         model,
#         debug_train_dataloader, # Use the SMALL debug dataloader
#         criterion,
#         optimizer,
#         device,
#         gradient_accumulation_steps # Pass grad accum steps
#     )

#     # Run evaluation step on the debug validation data
#     eval_metrics = evaluate(
#         model,
#         debug_val_dataloader, # Use the SMALL debug dataloader
#         criterion,
#         device
#     )

#     print(f"\nDebug Epoch {epoch+1} finished.")
#     print(f"  Avg Train Loss: {train_loss:.4f}")
#     if eval_metrics:
#         # Print all collected metrics
#         for name, value in eval_metrics.items():
#             print(f"  Validation {name.replace('_', ' ').title()}: {value:.4f}")
#     else:
#         print("  Validation failed to produce metrics.")

#     # Optional: Save model after this 1 epoch for inspection
#     save_path = os.path.join(model_save_dir, f"wav2vec2bert_debug_epoch_{epoch+1}.pth") # <<<--- Corrected filename
#     try:
#          torch.save(model.state_dict(), save_path)
#          logging.info(f"Saved debug model checkpoint to {save_path}")
#     except Exception as e:
#          logging.error(f"Failed to save debug model checkpoint: {e}", exc_info=True)

# end_time = time.time()
# print(f"\n--- Debug Run Finished in {end_time - start_time:.2f} seconds ---")

In [25]:
# Cell 9: Run ONE Epoch for Debugging (with Updated AMP API)

# --- Ensure necessary imports are present ---

from tqdm import tqdm

# --- Import AMP components from the new location ---
from torch.amp import autocast, GradScaler # <<<--- UPDATED IMPORT
# ----------------------------------------------------

# # First, clear all existing models and tensors
# import gc
# import torch
# # Force CUDA cache clearing
# torch.cuda.empty_cache()
# gc.collect()
# Print memory status
print(f"GPU memory before dataloader setup: {torch.cuda.memory_allocated() / 1e9:.2f} GB")


# Ensure model, criterion, optimizer, dataloaders etc. are defined from previous cells
print(f"\n--- Starting Debug Training Run for {num_epochs_debug} epoch (with AMP) ---")
start_time = time.time()

# --- Initialize GradScaler using the NEW API ---
# Pass device type, and enable only if device is actually cuda
scaler = GradScaler(enabled=(device.type == 'cuda')) # <<<--- UPDATED INITIALIZATION
# -------------------------------------------

# Make sure model and criterion are on the correct device
model.to(device)
criterion.to(device) # Ensure criterion is also on device

for epoch in range(num_epochs_debug): # num_epochs_debug was set to 1 in Cell 2
    print(f"\n--- Debug Epoch {epoch+1}/{num_epochs_debug} ---")

    # Run training step (train_epoch function itself doesn't need change here, only how scaler is passed)
    train_loss = train_epoch(
        model,
        debug_train_dataloader,
        criterion,
        optimizer,
        device,
        gradient_accumulation_steps,
        scaler # Pass the scaler object (created with new API)
    )

    # Run evaluation step (evaluate function itself doesn't need change here for scaler)
    eval_metrics = evaluate(
        model,
        debug_val_dataloader,
        criterion,
        device
    )

    print(f"\nDebug Epoch {epoch+1} finished.")
    print(f"  Avg Train Loss: {train_loss:.4f}")
    if eval_metrics:
        for name, value in eval_metrics.items():
            print(f"  Validation {name.replace('_', ' ').title()}: {value:.4f}")
    else:
        print("  Validation failed to produce metrics.")

    # Optional: Save model after this 1 epoch for inspection
    save_path = os.path.join(model_save_dir, f"wav2vec2bert_debug_AMP_epoch_{epoch+1}.pth")
    try:
         torch.save(model.state_dict(), save_path)
         logging.info(f"Saved debug model checkpoint to {save_path}")
    except Exception as e:
         logging.error(f"Failed to save debug model checkpoint: {e}", exc_info=True)

end_time = time.time()
print(f"\n--- Debug Run Finished in {end_time - start_time:.2f} seconds ---")

GPU memory before dataloader setup: 2.33 GB

--- Starting Debug Training Run for 1 epoch (with AMP) ---

--- Debug Epoch 1/1 ---


Training:   0%|          | 0/8 [00:00<?, ?it/s]

                                                                                 


Completed training epoch. Successful optimizer steps: 2
Average Training Loss for Epoch: 0.6900


                                                         


Validation Loss: 0.6657
  Validation Hamming Loss: 0.0966
  Validation Jaccard Samples: 0.2500
  Validation F1 Micro: 0.1905
  Validation F1 Macro: 0.0182

Debug Epoch 1 finished.
  Avg Train Loss: 0.6900
  Validation Hamming Loss: 0.0966
  Validation Jaccard Samples: 0.2500
  Validation F1 Micro: 0.1905
  Validation F1 Macro: 0.0182
  Validation Eval Loss: 0.6657
2025-05-04 04:27:37,387 - INFO - Saved debug model checkpoint to /workspace/musicClaGen/models/wav2vec2bert_debug_AMP_epoch_1.pth

--- Debug Run Finished in 22.28 seconds ---


# 2. Trial debug training run worked! Now let's try the full training run.

In [26]:
# Cell 10: Setup DataLoaders for FULL Splits & LR Scheduler

from transformers import get_linear_schedule_with_warmup # Import scheduler

# --- Ensure Feature Extractor is Loaded ---
# (Code from previous Cell 4 - necessary if kernel restarted)
logging.info(f"Loading feature extractor for: {model_checkpoint}")
try:
    feature_extractor = AutoFeatureExtractor.from_pretrained(model_checkpoint)
    logging.info("Feature extractor loaded successfully.")
    target_sr = feature_extractor.sampling_rate
    print(f"Feature extractor expects sample rate: {target_sr}")
except Exception as e:
    logging.error(f"Could not load feature extractor. Error: {e}", exc_info=True)
    raise SystemExit

# --- Create Full Dataset instance ---
try:
    full_dataset = FMARawAudioDataset(manifest_path, feature_extractor=feature_extractor)
    manifest_df = full_dataset.manifest
except Exception as e:
     logging.error("Failed to instantiate FMARawAudioDataset.", exc_info=True)
     raise SystemExit

# --- Create FULL Datasets for Train/Val/Test ---
logging.info("Creating DataLoaders with FULL splits and custom collator...")
try:
    # Get indices for the splits from the manifest
    train_indices = manifest_df[manifest_df['split'] == 'training'].index.tolist()
    val_indices = manifest_df[manifest_df['split'] == 'validation'].index.tolist()
    test_indices = manifest_df[manifest_df['split'] == 'test'].index.tolist() # Get test indices too

    # Create Subset instances using the FULL index lists
    train_dataset = Subset(full_dataset, train_indices)
    val_dataset = Subset(full_dataset, val_indices)
    test_dataset = Subset(full_dataset, test_indices) # Create test dataset

    # --- Create Data Collator Instance ---
    data_collator = DataCollatorAudio()
    print("DataCollatorAudio instance created.")

    # --- Create DataLoaders ---
    # Use actual batch_size from config
    effective_batch_size = config.MODEL_PARAMS["batch_size"] * config.MODEL_PARAMS["gradient_accumulation_steps"]
    logging.info(f"Batch size: {config.MODEL_PARAMS['batch_size']}, Grad Accum Steps: {config.MODEL_PARAMS['gradient_accumulation_steps']}, Effective BS: {effective_batch_size}")

    # Use num_workers for faster loading (adjust based on instance cores)
    num_workers = 4 if os.name == 'posix' else 0
    pin_memory = True if device.type == 'cuda' else False

    train_dataloader = DataLoader(
        train_dataset, batch_size=config.MODEL_PARAMS["batch_size"], shuffle=True,
        collate_fn=data_collator, num_workers=num_workers, pin_memory=pin_memory, persistent_workers=(num_workers>0)
    )
    val_dataloader = DataLoader(
        val_dataset, batch_size=config.MODEL_PARAMS["batch_size"], shuffle=False,
        collate_fn=data_collator, num_workers=num_workers, pin_memory=pin_memory, persistent_workers=(num_workers>0)
    )
    test_dataloader = DataLoader(
        test_dataset, batch_size=config.MODEL_PARAMS["batch_size"], shuffle=False,
        collate_fn=data_collator, num_workers=num_workers, pin_memory=pin_memory, persistent_workers=(num_workers>0)
    )
    logging.info(f"FULL Dataset sizes: Train={len(train_dataset)}, Val={len(val_dataset)}, Test={len(test_dataset)}")
    logging.info("FULL DataLoaders with custom collator created.")

    # --- Setup LR Scheduler ---
    num_epochs = config.MODEL_PARAMS["epochs"]
    num_training_steps = (len(train_dataloader) // config.MODEL_PARAMS["gradient_accumulation_steps"]) * num_epochs
    scheduler = get_linear_schedule_with_warmup(
         optimizer, # Optimizer defined in Cell 6
         num_warmup_steps=0, # You can add warmup steps if desired (e.g., 10% of total steps)
         num_training_steps=num_training_steps
    )
    logging.info(f"LR Scheduler created. Total optimization steps: {num_training_steps}")

except Exception as e:
    logging.error(f"Failed to create datasets/dataloaders: {e}", exc_info=True)
    raise SystemExit

print("\nSetup for full training run complete.")

2025-05-04 04:27:37,443 - INFO - Loading feature extractor for: facebook/w2v-bert-2.0


2025-05-04 04:27:37,534 - INFO - Feature extractor loaded successfully.
Feature extractor expects sample rate: 16000
2025-05-04 04:27:37,536 - INFO - Initializing FMARawAudioDataset from: /workspace/musicClaGen/data/processed/small_subset_multihot.csv
2025-05-04 04:27:37,538 - INFO - Target sampling rate set from feature extractor: 16000 Hz
2025-05-04 04:27:37,539 - INFO - Loading manifest from: /workspace/musicClaGen/data/processed/small_subset_multihot.csv
2025-05-04 04:27:37,572 - INFO - Attempting to parse 'multi_hot_label' column using ast.literal_eval...
2025-05-04 04:27:37,893 - INFO - Example parsed label verified (type <class 'list'>, length 22): [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...
2025-05-04 04:27:37,895 - INFO - Loaded and parsed manifest with 8000 entries.
2025-05-04 04:27:37,897 - INFO - Creating DataLoaders with FULL splits and custom collator...
DataCollatorAudio instance created.
2025-05-04 04:27:37,904

In [27]:
# Cell 11: Run Full Training Loop

# Clear CUDA cache and force garbage collection
import gc
import torch
torch.cuda.empty_cache()
gc.collect()

# Check memory usage before training
print(f"GPU memory allocated before training: {torch.cuda.memory_allocated() / 1e9:.2f} GB")
print(f"GPU memory reserved before training: {torch.cuda.memory_reserved() / 1e9:.2f} GB")


# Make sure model, criterion, optimizer, scheduler, dataloaders defined from previous cells
num_epochs = config.MODEL_PARAMS["epochs"] # Get actual epochs from config
gradient_accumulation_steps = config.MODEL_PARAMS["gradient_accumulation_steps"]
metric_to_monitor = 'hamming_loss' # Metric to decide best model (lower is better)
best_val_metric = float('inf')

# --- Initialize GradScaler for AMP ---
scaler = GradScaler(enabled=(device.type == 'cuda'))
# ------------------------------------

logging.info(f"--- Starting FULL Training for {num_epochs} epochs ---")
start_time = time.time()

# Make sure model and criterion are on the correct device
model.to(device)
criterion.to(device)

for epoch in range(num_epochs):
    epoch_start_time = time.time()
    logging.info(f"\n--- Epoch {epoch+1}/{num_epochs} ---")

    # Run training for one epoch
    train_loss = train_epoch(
        model, train_dataloader, criterion, optimizer, device,
        gradient_accumulation_steps, scaler, scheduler # Pass scaler and scheduler
    )

    # Run evaluation on validation set
    eval_metrics = evaluate(model, val_dataloader, criterion, device)

    print(f"\nEpoch {epoch+1} finished.")
    print(f"  Avg Train Loss: {train_loss:.4f}")

    if not eval_metrics:
        logging.warning(f"Epoch {epoch+1}: Evaluation failed, skipping checkpoint.")
        continue

    # Log all validation metrics
    for name, value in eval_metrics.items():
        print(f"  Validation {name.replace('_', ' ').title()}: {value:.4f}")

    # Save model checkpoint if validation metric improved
    current_val_metric = eval_metrics.get(metric_to_monitor, float('inf'))
    if current_val_metric < best_val_metric:
        best_val_metric = current_val_metric
        # Use a consistent name for the best model checkpoint
        save_path = os.path.join(model_save_dir, f"{model_checkpoint.replace('/', '_')}_finetuned_best.pth")
        try:
            torch.save(model.state_dict(), save_path)
            logging.info(f"Validation metric improved ({metric_to_monitor}={current_val_metric:.4f}). Saved best model to {save_path}")
        except Exception as e:
            logging.error(f"Failed to save model checkpoint: {e}", exc_info=True)
    else:
         logging.info(f"Validation metric did not improve ({metric_to_monitor}={current_val_metric:.4f}). Best: {best_val_metric:.4f}")

    epoch_duration = time.time() - epoch_start_time
    logging.info(f"Epoch {epoch+1} finished in {epoch_duration / 60:.2f} minutes.")

total_training_time = time.time() - start_time
logging.info(f"--- Training Finished in {total_training_time / 60:.2f} minutes ---")

GPU memory allocated before training: 7.00 GB
GPU memory reserved before training: 15.53 GB
2025-05-04 04:27:38,273 - INFO - --- Starting FULL Training for 8 epochs ---
2025-05-04 04:27:38,283 - INFO - 
--- Epoch 1/8 ---


Training:   0%|          | 0/3200 [00:00<?, ?it/s][src/libmpg123/layer3.c:INT123_do_layer3():1774] error: part2_3_length (3360) too large for available bit count (3240)
  waveform, loaded_sr = librosa.load(
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


2025-05-04 04:28:22,951 - ERROR - Error loading/processing track 133297: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 209, in __soundfile_load
    context = sf.SoundFile(path)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 690, in __init__
    self._file = self._open(file, mode_int, closefd)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1265, in _open
    raise LibsndfileError(err, prefix="Error opening {0!r}: ".format(self.name))
soundfile.LibsndfileError: Error opening '/workspace/musicClaGen/data/raw/fma_audio/fma_small/133/133297.mp3': File does not exist or is not a regular file (possibly a pipe?).

During h

Training:  16%|█▋        | 521/3200 [07:11<37:45,  1.18it/s, loss=0.2326, lr=4.90e-05][src/libmpg123/layer3.c:INT123_do_layer3():1804] error: dequantization failed!
Note: Illegal Audio-MPEG-Header 0x00000000 at offset 63168.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1349] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).
  waveform, loaded_sr = librosa.load(
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


2025-05-04 04:34:50,558 - ERROR - Error loading/processing track 98569: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 222, in __soundfile_load
    y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 942, in read
    frames = self._array_io('read', out, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1394, in _array_io
    return self._cdata_io(action, cdata, ctype, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1404, in _cdata_io
    _error_check(self._errorcode)
  File "/root/miniconda

  waveform, loaded_sr = librosa.load(
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


2025-05-04 04:35:41,972 - ERROR - Error loading/processing track 99134: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 209, in __soundfile_load
    context = sf.SoundFile(path)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 690, in __init__
    self._file = self._open(file, mode_int, closefd)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1265, in _open
    raise LibsndfileError(err, prefix="Error opening {0!r}: ".format(self.name))
soundfile.LibsndfileError: Error opening '/workspace/musicClaGen/data/raw/fma_audio/fma_small/099/099134.mp3': File does not exist or is not a regular file (possibly a pipe?).

During ha

Training:  36%|███▌      | 1158/3200 [15:54<27:51,  1.22it/s, loss=0.2289, lr=4.77e-05][src/libmpg123/layer3.c:INT123_do_layer3():1804] error: dequantization failed!
Training:  37%|███▋      | 1192/3200 [16:22<27:17,  1.23it/s, loss=0.2381, lr=4.77e-05][src/libmpg123/layer3.c:INT123_do_layer3():1844] error: dequantization failed!
Training:  44%|████▍     | 1418/3200 [19:28<24:06,  1.23it/s, loss=0.1624, lr=4.72e-05][src/libmpg123/layer3.c:INT123_do_layer3():1804] error: dequantization failed!
  waveform, loaded_sr = librosa.load(
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


2025-05-04 04:50:48,731 - ERROR - Error loading/processing track 108925: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 209, in __soundfile_load
    context = sf.SoundFile(path)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 690, in __init__
    self._file = self._open(file, mode_int, closefd)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1265, in _open
    raise LibsndfileError(err, prefix="Error opening {0!r}: ".format(self.name))
soundfile.LibsndfileError: Error opening '/workspace/musicClaGen/data/raw/fma_audio/fma_small/108/108925.mp3': File does not exist or is not a regular file (possibly a pipe?).

During h

Training:  67%|██████▋   | 2129/3200 [29:11<13:30,  1.32it/s, loss=0.2103, lr=4.58e-05]

2025-05-04 04:56:49,404 - ERROR - Error loading/processing track 98565: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 222, in __soundfile_load
    y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 942, in read
    frames = self._array_io('read', out, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1394, in _array_io
    return self._cdata_io(action, cdata, ctype, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1404, in _cdata_io
    _error_check(self._errorcode)
  File "/root/miniconda

Note: Illegal Audio-MPEG-Header 0x00000000 at offset 33361.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1349] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).
Training:  81%|████████  | 2598/3200 [35:36<08:20,  1.20it/s, loss=0.1553, lr=4.49e-05]

2025-05-04 05:03:14,598 - ERROR - Error loading/processing track 98567: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 222, in __soundfile_load
    y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 942, in read
    frames = self._array_io('read', out, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1394, in _array_io
    return self._cdata_io(action, cdata, ctype, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1404, in _cdata_io
    _error_check(self._errorcode)
  File "/root/miniconda

Note: Illegal Audio-MPEG-Header 0x00000000 at offset 22401.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1349] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).
Training:  86%|████████▋ | 2762/3200 [37:50<05:56,  1.23it/s, loss=0.3741, lr=4.46e-05][src/libmpg123/layer3.c:INT123_do_layer3():1774] error: part2_3_length (3328) too large for available bit count (3240)
                                                                                       


Completed training epoch. Successful optimizer steps: 800
Average Training Loss for Epoch: 0.2407


                                                             


Validation Loss: 0.2316
  Validation Hamming Loss: 0.0691
  Validation Jaccard Samples: 0.0000
  Validation F1 Micro: 0.0000
  Validation F1 Macro: 0.0000

Epoch 1 finished.
  Avg Train Loss: 0.2407
  Validation Hamming Loss: 0.0691
  Validation Jaccard Samples: 0.0000
  Validation F1 Micro: 0.0000
  Validation F1 Macro: 0.0000
  Validation Eval Loss: 0.2316
2025-05-04 05:15:07,159 - INFO - Validation metric improved (hamming_loss=0.0691). Saved best model to /workspace/musicClaGen/models/facebook_w2v-bert-2.0_finetuned_best.pth
2025-05-04 05:15:07,162 - INFO - Epoch 1 finished in 47.48 minutes.
2025-05-04 05:15:07,163 - INFO - 
--- Epoch 2/8 ---


Training:  15%|█▌        | 481/3200 [06:37<37:07,  1.22it/s, loss=0.2863, lr=4.28e-05]

2025-05-04 05:21:44,646 - ERROR - Error loading/processing track 98565: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 222, in __soundfile_load
    y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 942, in read
    frames = self._array_io('read', out, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1394, in _array_io
    return self._cdata_io(action, cdata, ctype, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1404, in _cdata_io
    _error_check(self._errorcode)
  File "/root/miniconda

Note: Illegal Audio-MPEG-Header 0x00000000 at offset 33361.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1349] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).
Training:  22%|██▏       | 709/3200 [09:43<33:44,  1.23it/s, loss=0.1500, lr=4.24e-05][src/libmpg123/layer3.c:INT123_do_layer3():1844] error: dequantization failed!


2025-05-04 05:25:19,220 - ERROR - Error loading/processing track 133297: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 209, in __soundfile_load
    context = sf.SoundFile(path)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 690, in __init__
    self._file = self._open(file, mode_int, closefd)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1265, in _open
    raise LibsndfileError(err, prefix="Error opening {0!r}: ".format(self.name))
soundfile.LibsndfileError: Error opening '/workspace/musicClaGen/data/raw/fma_audio/fma_small/133/133297.mp3': File does not exist or is not a regular file (possibly a pipe?).

During h

Training:  31%|███       | 987/3200 [13:30<31:08,  1.18it/s, loss=0.3848, lr=4.18e-05]

2025-05-04 05:28:38,364 - ERROR - Error loading/processing track 99134: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 209, in __soundfile_load
    context = sf.SoundFile(path)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 690, in __init__
    self._file = self._open(file, mode_int, closefd)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1265, in _open
    raise LibsndfileError(err, prefix="Error opening {0!r}: ".format(self.name))
soundfile.LibsndfileError: Error opening '/workspace/musicClaGen/data/raw/fma_audio/fma_small/099/099134.mp3': File does not exist or is not a regular file (possibly a pipe?).

During ha

Training:  32%|███▏      | 1018/3200 [13:56<30:44,  1.18it/s, loss=0.2118, lr=4.18e-05][src/libmpg123/layer3.c:INT123_do_layer3():1804] error: dequantization failed!
Training:  39%|███▉      | 1244/3200 [17:03<27:38,  1.18it/s, loss=0.3634, lr=4.13e-05]

2025-05-04 05:32:10,991 - ERROR - Error loading/processing track 98567: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 222, in __soundfile_load
    y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 942, in read
    frames = self._array_io('read', out, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1394, in _array_io
    return self._cdata_io(action, cdata, ctype, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1404, in _cdata_io
    _error_check(self._errorcode)
  File "/root/miniconda

Note: Illegal Audio-MPEG-Header 0x00000000 at offset 22401.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1349] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).
Training:  39%|███▉      | 1259/3200 [17:15<24:35,  1.32it/s, loss=0.2439, lr=4.13e-05][src/libmpg123/layer3.c:INT123_do_layer3():1774] error: part2_3_length (3328) too large for available bit count (3240)
Training:  46%|████▌     | 1477/3200 [20:14<23:00,  1.25it/s, loss=0.1586, lr=4.09e-05][src/libmpg123/layer3.c:INT123_do_layer3():1774] error: part2_3_length (3360) too large for available bit count (3240)
Training:  62%|██████▏   | 1983/3200 [27:10<16:03,  1.26it/s, loss=0.1480, lr=3.99e-05]

2025-05-04 05:42:17,552 - ERROR - Error loading/processing track 98569: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 222, in __soundfile_load
    y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 942, in read
    frames = self._array_io('read', out, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1394, in _array_io
    return self._cdata_io(action, cdata, ctype, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1404, in _cdata_io
    _error_check(self._errorcode)
  File "/root/miniconda

[src/libmpg123/layer3.c:INT123_do_layer3():1804] error: dequantization failed!
Note: Illegal Audio-MPEG-Header 0x00000000 at offset 63168.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1349] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).
Training:  70%|██████▉   | 2234/3200 [30:36<13:13,  1.22it/s, loss=0.1583, lr=3.94e-05]

2025-05-04 05:45:44,355 - ERROR - Error loading/processing track 108925: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 209, in __soundfile_load
    context = sf.SoundFile(path)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 690, in __init__
    self._file = self._open(file, mode_int, closefd)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1265, in _open
    raise LibsndfileError(err, prefix="Error opening {0!r}: ".format(self.name))
soundfile.LibsndfileError: Error opening '/workspace/musicClaGen/data/raw/fma_audio/fma_small/108/108925.mp3': File does not exist or is not a regular file (possibly a pipe?).

During h

Training:  71%|███████   | 2269/3200 [31:05<13:08,  1.18it/s, loss=0.2320, lr=3.93e-05][src/libmpg123/layer3.c:INT123_do_layer3():1804] error: dequantization failed!
                                                                                       


Completed training epoch. Successful optimizer steps: 800
Average Training Loss for Epoch: 0.2206


                                                             


Validation Loss: 0.2310
  Validation Hamming Loss: 0.0691
  Validation Jaccard Samples: 0.0000
  Validation F1 Micro: 0.0000
  Validation F1 Macro: 0.0000

Epoch 2 finished.
  Avg Train Loss: 0.2206
  Validation Hamming Loss: 0.0691
  Validation Jaccard Samples: 0.0000
  Validation F1 Micro: 0.0000
  Validation F1 Macro: 0.0000
  Validation Eval Loss: 0.2310
2025-05-04 06:02:28,475 - INFO - Validation metric did not improve (hamming_loss=0.0691). Best: 0.0691
2025-05-04 06:02:28,477 - INFO - Epoch 2 finished in 47.36 minutes.
2025-05-04 06:02:28,478 - INFO - 
--- Epoch 3/8 ---


Training:  12%|█▏        | 375/3200 [05:11<37:18,  1.26it/s, loss=0.2326, lr=3.68e-05][src/libmpg123/layer3.c:INT123_do_layer3():1804] error: dequantization failed!
Training:  15%|█▍        | 475/3200 [06:33<37:10,  1.22it/s, loss=0.2397, lr=3.66e-05]

2025-05-04 06:09:01,880 - ERROR - Error loading/processing track 99134: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 209, in __soundfile_load
    context = sf.SoundFile(path)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 690, in __init__
    self._file = self._open(file, mode_int, closefd)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1265, in _open
    raise LibsndfileError(err, prefix="Error opening {0!r}: ".format(self.name))
soundfile.LibsndfileError: Error opening '/workspace/musicClaGen/data/raw/fma_audio/fma_small/099/099134.mp3': File does not exist or is not a regular file (possibly a pipe?).

During ha

Training:  19%|█▊        | 599/3200 [08:15<35:14,  1.23it/s, loss=0.1557, lr=3.63e-05][src/libmpg123/layer3.c:INT123_do_layer3():1844] error: dequantization failed!
Training:  26%|██▌       | 834/3200 [11:29<32:50,  1.20it/s, loss=0.1603, lr=3.59e-05][src/libmpg123/layer3.c:INT123_do_layer3():1804] error: dequantization failed!
Training:  27%|██▋       | 860/3200 [11:50<31:47,  1.23it/s, loss=0.2215, lr=3.58e-05]

2025-05-04 06:14:19,855 - ERROR - Error loading/processing track 98565: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 222, in __soundfile_load
    y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 942, in read
    frames = self._array_io('read', out, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1394, in _array_io
    return self._cdata_io(action, cdata, ctype, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1404, in _cdata_io
    _error_check(self._errorcode)
  File "/root/miniconda

Note: Illegal Audio-MPEG-Header 0x00000000 at offset 33361.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1349] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).
Training:  40%|███▉      | 1274/3200 [17:30<26:21,  1.22it/s, loss=0.2033, lr=3.50e-05]Note: Illegal Audio-MPEG-Header 0x00000000 at offset 22401.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1349] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).


2025-05-04 06:19:59,162 - ERROR - Error loading/processing track 98567: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 222, in __soundfile_load
    y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 942, in read
    frames = self._array_io('read', out, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1394, in _array_io
    return self._cdata_io(action, cdata, ctype, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1404, in _cdata_io
    _error_check(self._errorcode)
  File "/root/miniconda

Training:  50%|█████     | 1609/3200 [22:05<20:26,  1.30it/s, loss=0.3160, lr=3.44e-05][src/libmpg123/layer3.c:INT123_do_layer3():1774] error: part2_3_length (3360) too large for available bit count (3240)
Training:  64%|██████▎   | 2032/3200 [27:53<16:30,  1.18it/s, loss=0.1567, lr=3.35e-05]

2025-05-04 06:30:21,633 - ERROR - Error loading/processing track 133297: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 209, in __soundfile_load
    context = sf.SoundFile(path)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 690, in __init__
    self._file = self._open(file, mode_int, closefd)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1265, in _open
    raise LibsndfileError(err, prefix="Error opening {0!r}: ".format(self.name))
soundfile.LibsndfileError: Error opening '/workspace/musicClaGen/data/raw/fma_audio/fma_small/133/133297.mp3': File does not exist or is not a regular file (possibly a pipe?).

During h

Training:  72%|███████▏  | 2307/3200 [31:35<12:04,  1.23it/s, loss=0.2294, lr=3.30e-05][src/libmpg123/layer3.c:INT123_do_layer3():1774] error: part2_3_length (3328) too large for available bit count (3240)


2025-05-04 06:36:34,721 - ERROR - Error loading/processing track 108925: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 209, in __soundfile_load
    context = sf.SoundFile(path)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 690, in __init__
    self._file = self._open(file, mode_int, closefd)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1265, in _open
    raise LibsndfileError(err, prefix="Error opening {0!r}: ".format(self.name))
soundfile.LibsndfileError: Error opening '/workspace/musicClaGen/data/raw/fma_audio/fma_small/108/108925.mp3': File does not exist or is not a regular file (possibly a pipe?).

During h

Training:  98%|█████████▊| 3128/3200 [42:48<00:59,  1.21it/s, loss=0.2387, lr=3.14e-05]

2025-05-04 06:45:16,761 - ERROR - Error loading/processing track 98569: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 222, in __soundfile_load
    y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 942, in read
    frames = self._array_io('read', out, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1394, in _array_io
    return self._cdata_io(action, cdata, ctype, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1404, in _cdata_io
    _error_check(self._errorcode)
  File "/root/miniconda

[src/libmpg123/layer3.c:INT123_do_layer3():1804] error: dequantization failed!
Note: Illegal Audio-MPEG-Header 0x00000000 at offset 63168.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1349] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).
                                                                                       


Completed training epoch. Successful optimizer steps: 800
Average Training Loss for Epoch: 0.2209


                                                             


Validation Loss: 0.2308
  Validation Hamming Loss: 0.0691
  Validation Jaccard Samples: 0.0000
  Validation F1 Micro: 0.0000
  Validation F1 Macro: 0.0000

Epoch 3 finished.
  Avg Train Loss: 0.2209
  Validation Hamming Loss: 0.0691
  Validation Jaccard Samples: 0.0000
  Validation F1 Micro: 0.0000
  Validation F1 Macro: 0.0000
  Validation Eval Loss: 0.2308
2025-05-04 06:49:48,748 - INFO - Validation metric did not improve (hamming_loss=0.0691). Best: 0.0691
2025-05-04 06:49:48,749 - INFO - Epoch 3 finished in 47.34 minutes.
2025-05-04 06:49:48,750 - INFO - 
--- Epoch 4/8 ---


Training:   0%|          | 0/3200 [00:00<?, ?it/s]

2025-05-04 06:49:49,667 - ERROR - Error loading/processing track 98565: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 222, in __soundfile_load
    y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 942, in read
    frames = self._array_io('read', out, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1394, in _array_io
    return self._cdata_io(action, cdata, ctype, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1404, in _cdata_io
    _error_check(self._errorcode)
  File "/root/miniconda

Note: Illegal Audio-MPEG-Header 0x00000000 at offset 33361.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1349] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).
Training:   6%|▌         | 196/3200 [02:43<42:15,  1.18it/s, loss=0.2225, lr=3.09e-05][src/libmpg123/layer3.c:INT123_do_layer3():1774] error: part2_3_length (3360) too large for available bit count (3240)
Training:   7%|▋         | 239/3200 [03:18<41:50,  1.18it/s, loss=0.3324, lr=3.08e-05][src/libmpg123/layer3.c:INT123_do_layer3():1804] error: dequantization failed!
Training:   9%|▉         | 297/3200 [04:06<40:37,  1.19it/s, loss=0.1605, lr=3.07e-05][src/libmpg123/layer3.c:INT123_do_layer3():1774] error: part2_3_length (3328) too large for available bit count (3240)
Training:  32%|███▏      | 1039/3200 [14:17<28:37,  1.26it/s, loss=0.1627, lr=2.92e-05][src/libmpg123/layer3.c:INT123_do_layer3():1844] error: dequanti

2025-05-04 07:10:54,959 - ERROR - Error loading/processing track 98569: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 222, in __soundfile_load
    y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 942, in read
    frames = self._array_io('read', out, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1394, in _array_io
    return self._cdata_io(action, cdata, ctype, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1404, in _cdata_io
    _error_check(self._errorcode)
  File "/root/miniconda

[src/libmpg123/layer3.c:INT123_do_layer3():1804] error: dequantization failed!
Note: Illegal Audio-MPEG-Header 0x00000000 at offset 63168.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1349] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).
Training:  52%|█████▏    | 1663/3200 [22:47<21:14,  1.21it/s, loss=0.2967, lr=2.80e-05]

2025-05-04 07:12:36,857 - ERROR - Error loading/processing track 99134: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 209, in __soundfile_load
    context = sf.SoundFile(path)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 690, in __init__
    self._file = self._open(file, mode_int, closefd)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1265, in _open
    raise LibsndfileError(err, prefix="Error opening {0!r}: ".format(self.name))
soundfile.LibsndfileError: Error opening '/workspace/musicClaGen/data/raw/fma_audio/fma_small/099/099134.mp3': File does not exist or is not a regular file (possibly a pipe?).

During ha

Training:  75%|███████▍  | 2398/3200 [32:51<10:44,  1.24it/s, loss=0.1600, lr=2.66e-05]

2025-05-04 07:22:40,000 - ERROR - Error loading/processing track 133297: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 209, in __soundfile_load
    context = sf.SoundFile(path)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 690, in __init__
    self._file = self._open(file, mode_int, closefd)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1265, in _open
    raise LibsndfileError(err, prefix="Error opening {0!r}: ".format(self.name))
soundfile.LibsndfileError: Error opening '/workspace/musicClaGen/data/raw/fma_audio/fma_small/133/133297.mp3': File does not exist or is not a regular file (possibly a pipe?).

During h

Training:  91%|█████████ | 2917/3200 [39:56<03:49,  1.23it/s, loss=0.2289, lr=2.56e-05]Note: Illegal Audio-MPEG-Header 0x00000000 at offset 22401.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1349] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).


2025-05-04 07:29:45,862 - ERROR - Error loading/processing track 98567: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 222, in __soundfile_load
    y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 942, in read
    frames = self._array_io('read', out, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1394, in _array_io
    return self._cdata_io(action, cdata, ctype, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1404, in _cdata_io
    _error_check(self._errorcode)
  File "/root/miniconda



2025-05-04 07:32:35,082 - ERROR - Error loading/processing track 108925: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 209, in __soundfile_load
    context = sf.SoundFile(path)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 690, in __init__
    self._file = self._open(file, mode_int, closefd)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1265, in _open
    raise LibsndfileError(err, prefix="Error opening {0!r}: ".format(self.name))
soundfile.LibsndfileError: Error opening '/workspace/musicClaGen/data/raw/fma_audio/fma_small/108/108925.mp3': File does not exist or is not a regular file (possibly a pipe?).

During h

                                                                                       


Completed training epoch. Successful optimizer steps: 800
Average Training Loss for Epoch: 0.2202


                                                             


Validation Loss: 0.2313
  Validation Hamming Loss: 0.0691
  Validation Jaccard Samples: 0.0000
  Validation F1 Micro: 0.0000
  Validation F1 Macro: 0.0000

Epoch 4 finished.
  Avg Train Loss: 0.2202
  Validation Hamming Loss: 0.0691
  Validation Jaccard Samples: 0.0000
  Validation F1 Micro: 0.0000
  Validation F1 Macro: 0.0000
  Validation Eval Loss: 0.2313
2025-05-04 07:37:11,034 - INFO - Validation metric did not improve (hamming_loss=0.0691). Best: 0.0691
2025-05-04 07:37:11,035 - INFO - Epoch 4 finished in 47.37 minutes.
2025-05-04 07:37:11,036 - INFO - 
--- Epoch 5/8 ---


Training:   0%|          | 14/3200 [00:14<45:12,  1.17it/s, loss=0.1508, lr=2.50e-05] [src/libmpg123/layer3.c:INT123_do_layer3():1804] error: dequantization failed!
Training:  17%|█▋        | 534/3200 [07:19<36:44,  1.21it/s, loss=0.2463, lr=2.40e-05]Note: Illegal Audio-MPEG-Header 0x00000000 at offset 22401.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1349] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).


2025-05-04 07:44:30,756 - ERROR - Error loading/processing track 98567: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 222, in __soundfile_load
    y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 942, in read
    frames = self._array_io('read', out, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1394, in _array_io
    return self._cdata_io(action, cdata, ctype, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1404, in _cdata_io
    _error_check(self._errorcode)
  File "/root/miniconda

Training:  21%|██▏       | 683/3200 [09:21<35:10,  1.19it/s, loss=0.2091, lr=2.37e-05][src/libmpg123/layer3.c:INT123_do_layer3():1844] error: dequantization failed!
Training:  32%|███▏      | 1019/3200 [13:57<30:01,  1.21it/s, loss=0.2863, lr=2.30e-05][src/libmpg123/layer3.c:INT123_do_layer3():1804] error: dequantization failed!


2025-05-04 07:53:49,758 - ERROR - Error loading/processing track 108925: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 209, in __soundfile_load
    context = sf.SoundFile(path)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 690, in __init__
    self._file = self._open(file, mode_int, closefd)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1265, in _open
    raise LibsndfileError(err, prefix="Error opening {0!r}: ".format(self.name))
soundfile.LibsndfileError: Error opening '/workspace/musicClaGen/data/raw/fma_audio/fma_small/108/108925.mp3': File does not exist or is not a regular file (possibly a pipe?).

During h

Training:  43%|████▎     | 1381/3200 [18:54<26:09,  1.16it/s, loss=0.1452, lr=2.23e-05]

2025-05-04 07:56:05,359 - ERROR - Error loading/processing track 98565: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 222, in __soundfile_load
    y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 942, in read
    frames = self._array_io('read', out, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1394, in _array_io
    return self._cdata_io(action, cdata, ctype, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1404, in _cdata_io
    _error_check(self._errorcode)
  File "/root/miniconda

Note: Illegal Audio-MPEG-Header 0x00000000 at offset 33361.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1349] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).
Training:  44%|████▍     | 1410/3200 [19:17<24:37,  1.21it/s, loss=0.3236, lr=2.23e-05][src/libmpg123/layer3.c:INT123_do_layer3():1774] error: part2_3_length (3360) too large for available bit count (3240)
Training:  58%|█████▊    | 1845/3200 [25:13<18:10,  1.24it/s, loss=0.1544, lr=2.14e-05][src/libmpg123/layer3.c:INT123_do_layer3():1774] error: part2_3_length (3328) too large for available bit count (3240)


2025-05-04 08:05:16,761 - ERROR - Error loading/processing track 133297: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 209, in __soundfile_load
    context = sf.SoundFile(path)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 690, in __init__
    self._file = self._open(file, mode_int, closefd)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1265, in _open
    raise LibsndfileError(err, prefix="Error opening {0!r}: ".format(self.name))
soundfile.LibsndfileError: Error opening '/workspace/musicClaGen/data/raw/fma_audio/fma_small/133/133297.mp3': File does not exist or is not a regular file (possibly a pipe?).

During h

Training:  66%|██████▌   | 2105/3200 [28:47<15:14,  1.20it/s, loss=0.1551, lr=2.09e-05]

2025-05-04 08:05:58,421 - ERROR - Error loading/processing track 98569: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 222, in __soundfile_load
    y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 942, in read
    frames = self._array_io('read', out, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1394, in _array_io
    return self._cdata_io(action, cdata, ctype, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1404, in _cdata_io
    _error_check(self._errorcode)
  File "/root/miniconda

[src/libmpg123/layer3.c:INT123_do_layer3():1804] error: dequantization failed!
Note: Illegal Audio-MPEG-Header 0x00000000 at offset 63168.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1349] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).


2025-05-04 08:06:58,002 - ERROR - Error loading/processing track 99134: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 209, in __soundfile_load
    context = sf.SoundFile(path)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 690, in __init__
    self._file = self._open(file, mode_int, closefd)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1265, in _open
    raise LibsndfileError(err, prefix="Error opening {0!r}: ".format(self.name))
soundfile.LibsndfileError: Error opening '/workspace/musicClaGen/data/raw/fma_audio/fma_small/099/099134.mp3': File does not exist or is not a regular file (possibly a pipe?).

During ha

                                                                                       


Completed training epoch. Successful optimizer steps: 800
Average Training Loss for Epoch: 0.2201


                                                             


Validation Loss: 0.2311
  Validation Hamming Loss: 0.0691
  Validation Jaccard Samples: 0.0000
  Validation F1 Micro: 0.0000
  Validation F1 Macro: 0.0000

Epoch 5 finished.
  Avg Train Loss: 0.2201
  Validation Hamming Loss: 0.0691
  Validation Jaccard Samples: 0.0000
  Validation F1 Micro: 0.0000
  Validation F1 Macro: 0.0000
  Validation Eval Loss: 0.2311
2025-05-04 08:24:27,722 - INFO - Validation metric did not improve (hamming_loss=0.0691). Best: 0.0691
2025-05-04 08:24:27,724 - INFO - Epoch 5 finished in 47.28 minutes.
2025-05-04 08:24:27,725 - INFO - 
--- Epoch 6/8 ---


Training:   6%|▌         | 184/3200 [02:34<40:31,  1.24it/s, loss=0.2857, lr=1.84e-05]Note: Illegal Audio-MPEG-Header 0x00000000 at offset 22401.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1349] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).


2025-05-04 08:27:02,802 - ERROR - Error loading/processing track 98567: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 222, in __soundfile_load
    y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 942, in read
    frames = self._array_io('read', out, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1394, in _array_io
    return self._cdata_io(action, cdata, ctype, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1404, in _cdata_io
    _error_check(self._errorcode)
  File "/root/miniconda

Training:  11%|█▏        | 363/3200 [05:02<39:31,  1.20it/s, loss=0.1584, lr=1.80e-05]

2025-05-04 08:29:29,918 - ERROR - Error loading/processing track 108925: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 209, in __soundfile_load
    context = sf.SoundFile(path)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 690, in __init__
    self._file = self._open(file, mode_int, closefd)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1265, in _open
    raise LibsndfileError(err, prefix="Error opening {0!r}: ".format(self.name))
soundfile.LibsndfileError: Error opening '/workspace/musicClaGen/data/raw/fma_audio/fma_small/108/108925.mp3': File does not exist or is not a regular file (possibly a pipe?).

During h

Training:  14%|█▍        | 464/3200 [06:25<38:06,  1.20it/s, loss=0.2868, lr=1.78e-05][src/libmpg123/layer3.c:INT123_do_layer3():1804] error: dequantization failed!
Note: Illegal Audio-MPEG-Header 0x00000000 at offset 63168.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1349] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).


2025-05-04 08:30:53,035 - ERROR - Error loading/processing track 98569: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 222, in __soundfile_load
    y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 942, in read
    frames = self._array_io('read', out, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1394, in _array_io
    return self._cdata_io(action, cdata, ctype, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1404, in _cdata_io
    _error_check(self._errorcode)
  File "/root/miniconda

Training:  26%|██▌       | 817/3200 [11:15<33:28,  1.19it/s, loss=0.1579, lr=1.72e-05]

2025-05-04 08:35:42,835 - ERROR - Error loading/processing track 133297: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 209, in __soundfile_load
    context = sf.SoundFile(path)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 690, in __init__
    self._file = self._open(file, mode_int, closefd)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1265, in _open
    raise LibsndfileError(err, prefix="Error opening {0!r}: ".format(self.name))
soundfile.LibsndfileError: Error opening '/workspace/musicClaGen/data/raw/fma_audio/fma_small/133/133297.mp3': File does not exist or is not a regular file (possibly a pipe?).

During h

Training:  27%|██▋       | 862/3200 [11:50<29:22,  1.33it/s, loss=0.1579, lr=1.71e-05]

2025-05-04 08:36:18,664 - ERROR - Error loading/processing track 99134: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 209, in __soundfile_load
    context = sf.SoundFile(path)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 690, in __init__
    self._file = self._open(file, mode_int, closefd)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1265, in _open
    raise LibsndfileError(err, prefix="Error opening {0!r}: ".format(self.name))
soundfile.LibsndfileError: Error opening '/workspace/musicClaGen/data/raw/fma_audio/fma_small/099/099134.mp3': File does not exist or is not a regular file (possibly a pipe?).

During ha

Training:  61%|██████    | 1957/3200 [26:49<16:17,  1.27it/s, loss=0.2259, lr=1.49e-05]

2025-05-04 08:51:16,807 - ERROR - Error loading/processing track 98565: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 222, in __soundfile_load
    y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 942, in read
    frames = self._array_io('read', out, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1394, in _array_io
    return self._cdata_io(action, cdata, ctype, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1404, in _cdata_io
    _error_check(self._errorcode)
  File "/root/miniconda

Note: Illegal Audio-MPEG-Header 0x00000000 at offset 33361.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1349] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).
Training:  62%|██████▏   | 1970/3200 [26:59<17:05,  1.20it/s, loss=0.2360, lr=1.49e-05][src/libmpg123/layer3.c:INT123_do_layer3():1774] error: part2_3_length (3328) too large for available bit count (3240)
Training:  62%|██████▏   | 1977/3200 [27:05<17:14,  1.18it/s, loss=0.2200, lr=1.49e-05][src/libmpg123/layer3.c:INT123_do_layer3():1774] error: part2_3_length (3360) too large for available bit count (3240)
Training:  73%|███████▎  | 2325/3200 [31:49<12:15,  1.19it/s, loss=0.3179, lr=1.42e-05][src/libmpg123/layer3.c:INT123_do_layer3():1844] error: dequantization failed!
Training:  73%|███████▎  | 2347/3200 [32:07<11:48,  1.20it/s, loss=0.3290, lr=1.42e-05][src/libmpg123/layer3.c:INT123_do_layer3():1804] error: dequa


Completed training epoch. Successful optimizer steps: 800
Average Training Loss for Epoch: 0.2199


                                                             


Validation Loss: 0.2310
  Validation Hamming Loss: 0.0691
  Validation Jaccard Samples: 0.0000
  Validation F1 Micro: 0.0000
  Validation F1 Macro: 0.0000

Epoch 6 finished.
  Avg Train Loss: 0.2199
  Validation Hamming Loss: 0.0691
  Validation Jaccard Samples: 0.0000
  Validation F1 Micro: 0.0000
  Validation F1 Macro: 0.0000
  Validation Eval Loss: 0.2310
2025-05-04 09:11:48,896 - INFO - Validation metric did not improve (hamming_loss=0.0691). Best: 0.0691
2025-05-04 09:11:48,897 - INFO - Epoch 6 finished in 47.35 minutes.
2025-05-04 09:11:48,898 - INFO - 
--- Epoch 7/8 ---


Training:   1%|          | 37/3200 [00:32<40:42,  1.30it/s, loss=0.1552, lr=1.24e-05] 

2025-05-04 09:12:21,565 - ERROR - Error loading/processing track 99134: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 209, in __soundfile_load
    context = sf.SoundFile(path)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 690, in __init__
    self._file = self._open(file, mode_int, closefd)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1265, in _open
    raise LibsndfileError(err, prefix="Error opening {0!r}: ".format(self.name))
soundfile.LibsndfileError: Error opening '/workspace/musicClaGen/data/raw/fma_audio/fma_small/099/099134.mp3': File does not exist or is not a regular file (possibly a pipe?).

During ha

Training:  37%|███▋      | 1174/3200 [16:02<27:05,  1.25it/s, loss=0.1572, lr=1.02e-05][src/libmpg123/layer3.c:INT123_do_layer3():1774] error: part2_3_length (3360) too large for available bit count (3240)
Training:  42%|████▏     | 1358/3200 [18:34<25:09,  1.22it/s, loss=0.1615, lr=9.85e-06]

2025-05-04 09:30:23,397 - ERROR - Error loading/processing track 98565: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 222, in __soundfile_load
    y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 942, in read
    frames = self._array_io('read', out, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1394, in _array_io
    return self._cdata_io(action, cdata, ctype, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1404, in _cdata_io
    _error_check(self._errorcode)
  File "/root/miniconda

Note: Illegal Audio-MPEG-Header 0x00000000 at offset 33361.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1349] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).
Training:  50%|████▉     | 1586/3200 [21:41<21:25,  1.26it/s, loss=0.1603, lr=9.41e-06][src/libmpg123/layer3.c:INT123_do_layer3():1804] error: dequantization failed!
Training:  64%|██████▍   | 2043/3200 [27:58<16:40,  1.16it/s, loss=0.2278, lr=8.52e-06][src/libmpg123/layer3.c:INT123_do_layer3():1774] error: part2_3_length (3328) too large for available bit count (3240)
Training:  66%|██████▋   | 2127/3200 [29:07<14:12,  1.26it/s, loss=0.1567, lr=8.35e-06][src/libmpg123/layer3.c:INT123_do_layer3():1804] error: dequantization failed!
Training:  75%|███████▍  | 2393/3200 [32:46<10:37,  1.27it/s, loss=0.2323, lr=7.83e-06]

2025-05-04 09:44:35,300 - ERROR - Error loading/processing track 98569: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 222, in __soundfile_load
    y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 942, in read
    frames = self._array_io('read', out, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1394, in _array_io
    return self._cdata_io(action, cdata, ctype, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1404, in _cdata_io
    _error_check(self._errorcode)
  File "/root/miniconda

[src/libmpg123/layer3.c:INT123_do_layer3():1804] error: dequantization failed!
Note: Illegal Audio-MPEG-Header 0x00000000 at offset 63168.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1349] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).
Training:  78%|███████▊  | 2480/3200 [33:58<10:58,  1.09it/s, loss=0.2661, lr=7.66e-06][src/libmpg123/layer3.c:INT123_do_layer3():1844] error: dequantization failed!
Training:  88%|████████▊ | 2811/3200 [38:32<05:19,  1.22it/s, loss=0.1565, lr=7.02e-06]Note: Illegal Audio-MPEG-Header 0x00000000 at offset 22401.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1349] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).


2025-05-04 09:50:21,371 - ERROR - Error loading/processing track 98567: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 222, in __soundfile_load
    y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 942, in read
    frames = self._array_io('read', out, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1394, in _array_io
    return self._cdata_io(action, cdata, ctype, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1404, in _cdata_io
    _error_check(self._errorcode)
  File "/root/miniconda

Training:  93%|█████████▎| 2985/3200 [40:55<03:08,  1.14it/s, loss=0.1544, lr=6.67e-06]

2025-05-04 09:52:44,369 - ERROR - Error loading/processing track 108925: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 209, in __soundfile_load
    context = sf.SoundFile(path)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 690, in __init__
    self._file = self._open(file, mode_int, closefd)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1265, in _open
    raise LibsndfileError(err, prefix="Error opening {0!r}: ".format(self.name))
soundfile.LibsndfileError: Error opening '/workspace/musicClaGen/data/raw/fma_audio/fma_small/108/108925.mp3': File does not exist or is not a regular file (possibly a pipe?).

During h

Training:  97%|█████████▋| 3119/3200 [42:46<01:07,  1.21it/s, loss=0.1569, lr=6.41e-06]

2025-05-04 09:54:36,056 - ERROR - Error loading/processing track 133297: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 209, in __soundfile_load
    context = sf.SoundFile(path)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 690, in __init__
    self._file = self._open(file, mode_int, closefd)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1265, in _open
    raise LibsndfileError(err, prefix="Error opening {0!r}: ".format(self.name))
soundfile.LibsndfileError: Error opening '/workspace/musicClaGen/data/raw/fma_audio/fma_small/133/133297.mp3': File does not exist or is not a regular file (possibly a pipe?).

During h

                                                                                       


Completed training epoch. Successful optimizer steps: 800
Average Training Loss for Epoch: 0.2198


                                                             


Validation Loss: 0.2309
  Validation Hamming Loss: 0.0691
  Validation Jaccard Samples: 0.0000
  Validation F1 Micro: 0.0000
  Validation F1 Macro: 0.0000

Epoch 7 finished.
  Avg Train Loss: 0.2198
  Validation Hamming Loss: 0.0691
  Validation Jaccard Samples: 0.0000
  Validation F1 Micro: 0.0000
  Validation F1 Macro: 0.0000
  Validation Eval Loss: 0.2309
2025-05-04 09:59:13,633 - INFO - Validation metric did not improve (hamming_loss=0.0691). Best: 0.0691
2025-05-04 09:59:13,635 - INFO - Epoch 7 finished in 47.41 minutes.
2025-05-04 09:59:13,636 - INFO - 
--- Epoch 8/8 ---


Training:   3%|▎         | 106/3200 [01:28<41:24,  1.25it/s, loss=0.3084, lr=6.05e-06]Note: Illegal Audio-MPEG-Header 0x00000000 at offset 22401.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1349] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).


2025-05-04 10:00:42,766 - ERROR - Error loading/processing track 98567: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 222, in __soundfile_load
    y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 942, in read
    frames = self._array_io('read', out, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1394, in _array_io
    return self._cdata_io(action, cdata, ctype, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1404, in _cdata_io
    _error_check(self._errorcode)
  File "/root/miniconda

Training:   9%|▊         | 276/3200 [03:45<40:06,  1.21it/s, loss=0.2274, lr=5.71e-06][src/libmpg123/layer3.c:INT123_do_layer3():1804] error: dequantization failed!


2025-05-04 10:12:37,409 - ERROR - Error loading/processing track 99134: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 209, in __soundfile_load
    context = sf.SoundFile(path)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 690, in __init__
    self._file = self._open(file, mode_int, closefd)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1265, in _open
    raise LibsndfileError(err, prefix="Error opening {0!r}: ".format(self.name))
soundfile.LibsndfileError: Error opening '/workspace/musicClaGen/data/raw/fma_audio/fma_small/099/099134.mp3': File does not exist or is not a regular file (possibly a pipe?).

During ha



2025-05-04 10:23:55,461 - ERROR - Error loading/processing track 133297: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 209, in __soundfile_load
    context = sf.SoundFile(path)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 690, in __init__
    self._file = self._open(file, mode_int, closefd)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1265, in _open
    raise LibsndfileError(err, prefix="Error opening {0!r}: ".format(self.name))
soundfile.LibsndfileError: Error opening '/workspace/musicClaGen/data/raw/fma_audio/fma_small/133/133297.mp3': File does not exist or is not a regular file (possibly a pipe?).

During h

Training:  58%|█████▊    | 1859/3200 [25:25<17:40,  1.26it/s, loss=0.2259, lr=2.62e-06]

2025-05-04 10:24:38,891 - ERROR - Error loading/processing track 98569: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 222, in __soundfile_load
    y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 942, in read
    frames = self._array_io('read', out, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1394, in _array_io
    return self._cdata_io(action, cdata, ctype, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1404, in _cdata_io
    _error_check(self._errorcode)
  File "/root/miniconda

[src/libmpg123/layer3.c:INT123_do_layer3():1804] error: dequantization failed!
Note: Illegal Audio-MPEG-Header 0x00000000 at offset 63168.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1349] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).
Training:  73%|███████▎  | 2335/3200 [31:53<11:14,  1.28it/s, loss=0.2276, lr=1.70e-06][src/libmpg123/layer3.c:INT123_do_layer3():1774] error: part2_3_length (3328) too large for available bit count (3240)
Training:  77%|███████▋  | 2472/3200 [33:44<09:45,  1.24it/s, loss=0.2252, lr=1.42e-06][src/libmpg123/layer3.c:INT123_do_layer3():1774] error: part2_3_length (3360) too large for available bit count (3240)
Training:  82%|████████▏ | 2608/3200 [35:36<08:15,  1.20it/s, loss=0.1569, lr=1.16e-06][src/libmpg123/layer3.c:INT123_do_layer3():1804] error: dequantization failed!


2025-05-04 10:36:00,401 - ERROR - Error loading/processing track 108925: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 209, in __soundfile_load
    context = sf.SoundFile(path)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 690, in __init__
    self._file = self._open(file, mode_int, closefd)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1265, in _open
    raise LibsndfileError(err, prefix="Error opening {0!r}: ".format(self.name))
soundfile.LibsndfileError: Error opening '/workspace/musicClaGen/data/raw/fma_audio/fma_small/108/108925.mp3': File does not exist or is not a regular file (possibly a pipe?).

During h

Training:  92%|█████████▏| 2940/3200 [40:09<03:47,  1.14it/s, loss=0.1569, lr=5.08e-07]Note: Illegal Audio-MPEG-Header 0x00000000 at offset 33361.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1349] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).


2025-05-04 10:39:23,756 - ERROR - Error loading/processing track 98565: 
Traceback (most recent call last):
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 176, in load
    y, sr_native = __soundfile_load(path, offset, duration, dtype)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/librosa/core/audio.py", line 222, in __soundfile_load
    y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 942, in read
    frames = self._array_io('read', out, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1394, in _array_io
    return self._cdata_io(action, cdata, ctype, frames)
  File "/root/miniconda/envs/musicClaGen_env22/lib/python3.10/site-packages/soundfile.py", line 1404, in _cdata_io
    _error_check(self._errorcode)
  File "/root/miniconda

Training:  95%|█████████▌| 3046/3200 [41:36<02:04,  1.23it/s, loss=0.1575, lr=3.05e-07][src/libmpg123/layer3.c:INT123_do_layer3():1844] error: dequantization failed!
                                                                                       


Completed training epoch. Successful optimizer steps: 800
Average Training Loss for Epoch: 0.2196


                                                             


Validation Loss: 0.2310
  Validation Hamming Loss: 0.0691
  Validation Jaccard Samples: 0.0000
  Validation F1 Micro: 0.0000
  Validation F1 Macro: 0.0000

Epoch 8 finished.
  Avg Train Loss: 0.2196
  Validation Hamming Loss: 0.0691
  Validation Jaccard Samples: 0.0000
  Validation F1 Micro: 0.0000
  Validation F1 Macro: 0.0000
  Validation Eval Loss: 0.2310
2025-05-04 10:46:34,901 - INFO - Validation metric did not improve (hamming_loss=0.0691). Best: 0.0691
2025-05-04 10:46:34,903 - INFO - Epoch 8 finished in 47.35 minutes.
2025-05-04 10:46:34,904 - INFO - --- Training Finished in 378.94 minutes ---





• Time: Took ~47 minutes for 1 epoch on the full `fma_small` training set (~6400 samples). This is a realistic time given the model size, 30s inputs, data loading, and AMP.

• Errors During Training: The log shows several errors during the training loop:

  • `ERROR - Error loading/processing track ...`
  
  `audioread.exceptions.NoBackendError`: This error occurred multiple times (tracks 133297, 99134, 98569, 98567, 98565, 108925). It indicates `librosa.load` failed. It first tries `soundfile` (which fails often with MP3s, sometimes due to file existence/permissions or internal errors), then falls back to `audioread`, which then fails because no suitable backend (like `ffmpeg`) was found or successfully used by `audioread`. This is despite installing `ffmpeg` earlier. It suggests `librosa`'s fallback mechanism isn't working reliably in this environment.

  • `[src/libmpg123/...]: warning: Cannot read next header...`, `error: dequantization failed!`, `error: part2_3_length ... too large...`, `error: Giving up resync...`: These are lower-level MP3 decoding errors from the `mpg123` library, likely called by `audioread` or another backend. They indicate corrupted or non-standard MP3 files.


When checked the documentation on fma github(https://github.com/mdeff/fma/wiki), these track IDs are flawed indeed, so everything is fine!

In [28]:
# Cell 12: Evaluate Best Model on Test Set (Robust Version)

import torch.nn as nn
import os
from transformers import AutoModelForAudioClassification
import logging
from torch.utils.data import DataLoader
import time

logging.info("\n--- Evaluating on Test Set using Best Model ---")

# Construct path to the best saved model
best_model_path = os.path.join(model_save_dir, f"{model_checkpoint.replace('/', '_')}_finetuned_best.pth")

if os.path.exists(best_model_path):
    try:
        logging.info(f"Loading best model from {best_model_path}")
        
        # Re-initialize model with correct structure
        model_reloaded = AutoModelForAudioClassification.from_pretrained(
            model_checkpoint,
            num_labels=num_labels,
            ignore_mismatched_sizes=True
        )
        
        # Load the saved state dict
        model_reloaded.load_state_dict(torch.load(best_model_path, map_location=device))
        model_reloaded.to(device)
        model_reloaded.eval()
        logging.info("Model successfully loaded and moved to device")
        
        # Create a safer test dataloader with no workers (avoid multiprocessing issues)
        safe_test_dataloader = DataLoader(
            test_dataset, 
            batch_size=config.MODEL_PARAMS["batch_size"],
            shuffle=False,
            collate_fn=data_collator,
            num_workers=0,  # Use main process only - no worker processes
            pin_memory=False  # Disable pinned memory to reduce memory usage
        )
        logging.info("Created safer test dataloader without worker processes")
        
        # Run evaluation with extra error handling
        logging.info("Starting evaluation on test set...")
        try:
            start_time = time.time()
            test_metrics = evaluate(model_reloaded, safe_test_dataloader, criterion, device)
            eval_time = time.time() - start_time
            
            # Log test results
            logging.info(f"\n--- Final Test Set Results (completed in {eval_time:.2f}s) ---")
            if test_metrics:
                for metric_name, metric_value in test_metrics.items():
                    logging.info(f"Test {metric_name.replace('_', ' ').title()}: {metric_value:.4f}")
            else:
                logging.info("Test evaluation failed to produce metrics.")
        
        except RuntimeError as e:
            if "CUDA out of memory" in str(e):
                logging.error("CUDA out of memory during evaluation. Try reducing batch size.")
            elif "DataLoader worker" in str(e):
                logging.error(f"DataLoader worker error (should not happen with num_workers=0): {e}")
            else:
                logging.error(f"Runtime error during evaluation: {e}")
        except Exception as e:
            logging.error(f"Error during evaluation: {e}", exc_info=True)
            
    except Exception as e:
        logging.error(f"Failed to load model: {e}", exc_info=True)
else:
    logging.warning(f"Best model checkpoint not found at {best_model_path}. Skipping final test evaluation.")

2025-05-04 10:46:34,932 - INFO - 
--- Evaluating on Test Set using Best Model ---
2025-05-04 10:46:34,936 - INFO - Loading best model from /workspace/musicClaGen/models/facebook_w2v-bert-2.0_finetuned_best.pth


Some weights of Wav2Vec2BertForSequenceClassification were not initialized from the model checkpoint at facebook/w2v-bert-2.0 and are newly initialized: ['classifier.bias', 'classifier.weight', 'projector.bias', 'projector.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-05-04 10:46:41,758 - INFO - Model successfully loaded and moved to device
2025-05-04 10:46:41,761 - INFO - Created safer test dataloader without worker processes
2025-05-04 10:46:41,762 - INFO - Starting evaluation on test set...


                                                             


Validation Loss: 0.1972
  Validation Hamming Loss: 0.0574
  Validation Jaccard Samples: 0.0000
  Validation F1 Micro: 0.0000
  Validation F1 Macro: 0.0000
2025-05-04 10:55:42,077 - INFO - 
--- Final Test Set Results (completed in 540.31s) ---
2025-05-04 10:55:42,078 - INFO - Test Hamming Loss: 0.0574
2025-05-04 10:55:42,080 - INFO - Test Jaccard Samples: 0.0000
2025-05-04 10:55:42,081 - INFO - Test F1 Micro: 0.0000
2025-05-04 10:55:42,082 - INFO - Test F1 Macro: 0.0000
2025-05-04 10:55:42,083 - INFO - Test Eval Loss: 0.1972




In [29]:
# # Cell 12: High-CPU-Utilization Aware Evaluation

# import torch.nn as nn
# import os
# import psutil
# import time
# from transformers import AutoModelForAudioClassification
# import logging
# from torch.utils.data import DataLoader, Subset
# import random

# logging.info("\n--- Evaluating on Test Set using Best Model ---")

# # Get CPU utilization
# cpu_percent = psutil.cpu_percent(interval=1)
# logging.info(f"Current CPU utilization: {cpu_percent}%")

# # Determine safe worker count based on CPU utilization
# if cpu_percent > 90:
#     worker_count = 0  # No workers if CPU is nearly maxed out
#     logging.warning("High CPU utilization detected. Using 0 workers for stability.")
# elif cpu_percent > 70:
#     worker_count = 1  # Minimal workers if CPU is heavily used
# else:
#     worker_count = min(2, os.cpu_count() // 4)  # Conservative: 1/4 of cores with max of 2

# # Construct path to the best saved model
# best_model_path = os.path.join(model_save_dir, f"{model_checkpoint.replace('/', '_')}_finetuned_best.pth")

# if os.path.exists(best_model_path):
#     try:
#         logging.info(f"Loading best model from {best_model_path}")
        
#         # Re-initialize model with correct structure
#         model_reloaded = AutoModelForAudioClassification.from_pretrained(
#             model_checkpoint,
#             num_labels=num_labels,
#             ignore_mismatched_sizes=True
#         )
        
#         # Load the saved state dict
#         model_reloaded.load_state_dict(torch.load(best_model_path, map_location=device))
#         model_reloaded.to(device)
#         model_reloaded.eval()
#         logging.info("Model successfully loaded and moved to device")
        
#         # Create optimized test dataloader
#         safe_test_dataloader = DataLoader(
#             test_dataset, 
#             batch_size=config.MODEL_PARAMS["batch_size"],
#             shuffle=False,
#             collate_fn=data_collator,
#             num_workers=worker_count,
#             pin_memory=(device.type=='cuda' and worker_count > 0)
#         )
#         logging.info(f"Created test dataloader with {worker_count} workers")
        
#         # Run evaluation with error handling
#         logging.info("Starting evaluation on test set...")
#         try:
#             start_time = time.time()
#             test_metrics = evaluate(model_reloaded, safe_test_dataloader, criterion, device)
#             eval_time = time.time() - start_time
            
#             # Log test results
#             logging.info(f"\n--- Final Test Set Results (completed in {eval_time:.2f}s) ---")
#             if test_metrics:
#                 for metric_name, metric_value in test_metrics.items():
#                     logging.info(f"Test {metric_name.replace('_', ' ').title()}: {metric_value:.4f}")
#             else:
#                 logging.info("Test evaluation failed to produce metrics.")
        
#         except Exception as e:
#             logging.error(f"Error during evaluation: {e}")
#             logging.info("Falling back to zero workers and smaller batch size...")
            
#             # Fallback DataLoader with zero workers and smaller batch size
#             fallback_loader = DataLoader(
#                 test_dataset,
#                 batch_size=max(1, config.MODEL_PARAMS["batch_size"] // 2),
#                 shuffle=False,
#                 collate_fn=data_collator,
#                 num_workers=0
#             )
            
#             try:
#                 test_metrics = evaluate(model_reloaded, fallback_loader, criterion, device)
#                 if test_metrics:
#                     for metric_name, metric_value in test_metrics.items():
#                         logging.info(f"Test {metric_name.replace('_', ' ').title()}: {metric_value:.4f}")
#             except Exception as inner_e:
#                 logging.error(f"Fallback evaluation also failed: {inner_e}")
            
#     except Exception as e:
#         logging.error(f"Failed to load model: {e}", exc_info=True)
# else:
#     logging.warning(f"Best model checkpoint not found at {best_model_path}. Skipping final test evaluation.")

In [None]:
# import os
# import psutil
# import torch

# def get_optimal_workers():
#     # Check number of CPU cores
#     cpu_count = os.cpu_count()
    
#     # Check available memory (GB)
#     available_memory = psutil.virtual_memory().available / (1024 ** 3)
    
#     # Check if CUDA is available
#     cuda_available = torch.cuda.is_available()
    
#     # Get GPU memory if available (GB)
#     gpu_memory = 0
#     if cuda_available:
#         gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3)
        
#     # Print system info
#     print(f"Available CPU cores: {cpu_count}")
#     print(f"Available system memory: {available_memory:.2f} GB")
#     if cuda_available:
#         print(f"GPU memory: {gpu_memory:.2f} GB")
    
#     # Conservative recommendation: half the cores is usually safe
#     recommended = max(1, cpu_count // 2)
    
#     # Account for memory-intensive operations (each worker might need ~2-4GB)
#     # Adjust this estimate based on your dataset's memory footprint
#     memory_limited = max(1, int(available_memory / 4))
    
#     final_recommendation = min(recommended, memory_limited)
#     print(f"\nRecommended num_workers: {final_recommendation}")
    
#     return final_recommendation

# # Get recommendation
# optimal_workers = get_optimal_workers()