In [1]:
import os
import librosa
import soundfile as sf
import numpy as np
import torch
import pickle
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import classification_report, accuracy_score
from transformers import Wav2Vec2FeatureExtractor, HubertForSequenceClassification, TrainingArguments, Trainer
from transformers import HubertModel, AutoFeatureExtractor
from transformers import pipeline
from datasets import Dataset, load_dataset
import gc
import xgboost as xgb

2024-07-18 05:50:36.999734: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:479] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-18 05:50:37.082680: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:10575] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-18 05:50:37.083053: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1442] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-18 05:50:37.211688: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
train_path = "/teamspace/studios/this_studio/dataset/train"
test_path = "/teamspace/studios/this_studio/dataset/test"
checkpoint_dir = "/teamspace/studios/this_studio/checkpoint"

In [3]:
test2_dir = "/teamspace/studios/this_studio/chord_examples/base de datos/test"
train2_dir = "/teamspace/studios/this_studio/chord_examples/base de datos/train"
test3_dir = "/teamspace/studios/this_studio/chord_examples/base de datos 3/test"
train3_dir = "/teamspace/studios/this_studio/chord_examples/base de datos 3/train"

In [4]:
if not os.path.exists(checkpoint_dir):
    os.makedirs(checkpoint_dir)

In [5]:
# Function to load audio files and their labels in batches
def load_audio_files_in_batches(path, batch_size=1000):
    audio_files = []
    labels = []
    for label in os.listdir(path):
        class_path = os.path.join(path, label)
        for i, file_name in enumerate(os.listdir(class_path)):
            file_path = os.path.join(class_path, file_name)
            audio_files.append(file_path)
            labels.append(label)
            if len(audio_files) == batch_size:
                yield audio_files, labels
                audio_files, labels = [], []
    if audio_files:
        yield audio_files, labels

In [6]:
# Load the HuBERT feature extractor and model
feature_extractor = AutoFeatureExtractor.from_pretrained("ntu-spml/distilhubert")
hubert_model = HubertModel.from_pretrained("ntu-spml/distilhubert")

Some weights of the model checkpoint at ntu-spml/distilhubert were not used when initializing HubertModel: ['encoder.pos_conv_embed.conv.weight_g', 'encoder.pos_conv_embed.conv.weight_v']
- This IS expected if you are initializing HubertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing HubertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of HubertModel were not initialized from the model checkpoint at ntu-spml/distilhubert and are newly initialized: ['encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'encoder.pos_conv_embed.conv.parametrizations.weight.original1']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

In [7]:
# Function to extract features using HuBERT
def extract_features(file_path):
    audio_input, _ = sf.read(file_path)
    inputs = feature_extractor(audio_input, sampling_rate=16000, return_tensors="pt", padding=True)
    with torch.no_grad():
        features = hubert_model(**inputs).last_hidden_state.mean(dim=1).squeeze().numpy()
    return features

In [8]:
# Function to process batches of audio files and extract features
def process_batches(path, batch_size=1000, checkpoint_file=None):
    if checkpoint_file and os.path.exists(checkpoint_file):
        with open(checkpoint_file, 'rb') as f:
            features, labels = pickle.load(f)
    else:
        features, labels = [], []
    
    for audio_files, batch_labels in load_audio_files_in_batches(path, batch_size):
        batch_features = [extract_features(f) for f in audio_files]
        features.extend(batch_features)
        labels.extend(batch_labels)
        gc.collect()  # Explicitly free memory
        
        if checkpoint_file:
            with open(checkpoint_file, 'wb') as f:
                pickle.dump((features, labels), f)

    return features, labels

In [10]:
# Process training and test data
train_checkpoint = os.path.join(checkpoint_dir, "train_features.pkl")
test_checkpoint = os.path.join(checkpoint_dir, "test_features.pkl")

In [11]:
train_features, train_labels = process_batches(train_path, checkpoint_file=train_checkpoint)
test_features, test_labels = process_batches(test_path, checkpoint_file=test_checkpoint)

In [12]:
# Convert labels to integers
label_to_int = {label: i for i, label in enumerate(set(train_labels))}
train_labels = [label_to_int[label] for label in train_labels]
test_labels = [label_to_int[label] for label in test_labels]

In [9]:
# Datos de prueba base de datos
train2_checkpoint = os.path.join(checkpoint_dir, "train2_features.pkl")
test2_checkpoint = os.path.join(checkpoint_dir, "test2_features.pkl")

# Process train and test data
train_features2, train_labels2 = process_batches(train2_dir, checkpoint_file=train2_checkpoint)
test_features2, test_labels2 = process_batches(test2_dir, checkpoint_file=test2_checkpoint)

# Convert lists to numpy arrays
train_features2 = np.array(train_features2)
test_features2 = np.array(test_features2)

In [18]:
# Convert labels to integers
label_to_int2 = {label: i for i, label in enumerate(set(train_labels2))}
train_labels2 = [label_to_int2[label] for label in train_labels2]
test_labels2 = [label_to_int2[label] for label in test_labels2]

In [25]:
# Datos de prueba base de datos 3
train3_checkpoint = os.path.join(checkpoint_dir, "train3_features.pkl")
test3_checkpoint = os.path.join(checkpoint_dir, "test3_features.pkl")

# Process train and test data
train_features3, train_labels3 = process_batches(train3_dir, checkpoint_file=train3_checkpoint)
test_features3, test_labels3 = process_batches(test3_dir, checkpoint_file=test3_checkpoint)

# Convert lists to numpy arrays
train_features3 = np.array(train_features3)
test_features3 = np.array(test_features3)

# Convert labels to integers
label_to_int3 = {label: i for i, label in enumerate(set(train_labels3))}
train_labels3 = [label_to_int3[label] for label in train_labels3]
test_labels3 = [label_to_int3[label] for label in test_labels3]

In [13]:
# Save label mapping for later use
with open(os.path.join(checkpoint_dir, "label_mapping.pkl"), 'wb') as f:
    pickle.dump(label_to_int, f)

In [14]:
# Train-validation split
X_train, X_val, y_train, y_val = train_test_split(train_features, train_labels, test_size=0.2, random_state=42)

In [15]:
# Function to save model checkpoints
def save_model_checkpoint(model, checkpoint_path):
    with open(checkpoint_path, 'wb') as f:
        pickle.dump(model, f)

# Load model checkpoint if exists
def load_model_checkpoint(checkpoint_path):
    if os.path.exists(checkpoint_path):
        with open(checkpoint_path, 'rb') as f:
            return pickle.load(f)
    return None

In [16]:
# Grid search for SVM
svm_checkpoint = os.path.join(checkpoint_dir, "svm_model.pkl")
svm_best = load_model_checkpoint(svm_checkpoint)
if not svm_best:
    svm_params = {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf', 'poly']}
    svm = SVC()
    svm_grid = GridSearchCV(svm, svm_params, cv=3, scoring='accuracy', verbose=2, n_jobs=-1)
    svm_grid.fit(X_train, y_train)
    svm_best = svm_grid.best_estimator_
    save_model_checkpoint(svm_best, svm_checkpoint)
svm_pred = svm_best.predict(X_val)
print("SVM Classification Report")
print(classification_report(y_val, svm_pred))

Fitting 3 folds for each of 9 candidates, totalling 27 fits
[CV] END ...............................C=0.1, kernel=linear; total time=10.0min
[CV] END ...............................C=0.1, kernel=linear; total time=10.3min
[CV] END ...............................C=0.1, kernel=linear; total time=10.5min
[CV] END .................................C=0.1, kernel=poly; total time=23.2min
[CV] END ..................................C=0.1, kernel=rbf; total time=34.1min
[CV] END ..................................C=0.1, kernel=rbf; total time=32.2min
[CV] END ..................................C=0.1, kernel=rbf; total time=32.8min
[CV] END .................................C=1, kernel=linear; total time= 4.7min
[CV] END .................................C=1, kernel=linear; total time= 4.9min
[CV] END .................................C=1, kernel=linear; total time= 4.9min
[CV] END .................................C=0.1, kernel=poly; total time=22.5min
[CV] END .................................C=0.1, 

In [21]:
print(classification_report(y_val, svm_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       235
           1       1.00      1.00      1.00       280
           2       1.00      1.00      1.00       241
           3       1.00      1.00      1.00       245
           4       1.00      1.00      1.00       262
           5       1.00      1.00      1.00       258
           6       1.00      1.00      1.00       273
           7       1.00      1.00      1.00       241
           8       1.00      1.00      1.00       274
           9       1.00      1.00      1.00       259
          10       1.00      1.00      1.00       236
          11       1.00      1.00      1.00       242
          12       1.00      1.00      1.00       237
          13       1.00      1.00      1.00       280
          14       1.00      1.00      1.00       257
          15       1.00      1.00      1.00       234
          16       1.00      1.00      1.00       275
          17       1.00    

In [17]:
# Grid search for Random Forest
rf_checkpoint = os.path.join(checkpoint_dir, "rf_model.pkl")
rf_best = load_model_checkpoint(rf_checkpoint)
if not rf_best:
    rf_params = {'n_estimators': [100, 200, 300], 'max_depth': [10, 20, 30]}
    rf = RandomForestClassifier()
    rf_grid = GridSearchCV(rf, rf_params, cv=3, scoring='accuracy', verbose=2, n_jobs=-1)
    rf_grid.fit(X_train, y_train)
    rf_best = rf_grid.best_estimator_
    save_model_checkpoint(rf_best, rf_checkpoint)
rf_pred = rf_best.predict(X_val)
print("Random Forest Classification Report")
print(classification_report(y_val, rf_pred))

Fitting 3 folds for each of 9 candidates, totalling 27 fits
[CV] END .....................max_depth=10, n_estimators=100; total time= 3.0min
[CV] END .....................max_depth=10, n_estimators=100; total time= 3.0min
[CV] END .....................max_depth=10, n_estimators=100; total time= 3.0min
[CV] END .....................max_depth=10, n_estimators=200; total time= 5.9min
[CV] END .....................max_depth=10, n_estimators=200; total time= 6.0min
[CV] END .....................max_depth=10, n_estimators=200; total time= 6.0min
[CV] END .....................max_depth=10, n_estimators=300; total time= 8.9min
[CV] END .....................max_depth=20, n_estimators=100; total time= 4.4min
[CV] END .....................max_depth=10, n_estimators=300; total time= 9.0min
[CV] END .....................max_depth=20, n_estimators=100; total time= 4.5min
[CV] END .....................max_depth=20, n_estimators=100; total time= 4.5min
[CV] END .....................max_depth=10, n_est



[CV] END .....................max_depth=30, n_estimators=100; total time= 4.7min
[CV] END .....................max_depth=30, n_estimators=100; total time= 4.6min
[CV] END .....................max_depth=30, n_estimators=100; total time= 4.7min
[CV] END .....................max_depth=20, n_estimators=300; total time=13.3min
[CV] END .....................max_depth=20, n_estimators=300; total time=13.5min
[CV] END .....................max_depth=30, n_estimators=200; total time= 9.4min
[CV] END .....................max_depth=30, n_estimators=200; total time= 9.4min
[CV] END .....................max_depth=30, n_estimators=200; total time= 9.4min
[CV] END .....................max_depth=30, n_estimators=300; total time=13.3min
[CV] END .....................max_depth=30, n_estimators=300; total time=11.1min
[CV] END .....................max_depth=30, n_estimators=300; total time=11.2min
Random Forest Classification Report
              precision    recall  f1-score   support

           0      

In [19]:
# Grid search for XGBoost
xgb_checkpoint = os.path.join(checkpoint_dir, "xgb_model.pkl")
xgb_best = load_model_checkpoint(xgb_checkpoint)
if not xgb_best:
    xgb_params = {'n_estimators': [100, 200, 300], 'learning_rate': [0.01, 0.1, 0.2], 'max_depth': [3, 6, 9]}
    xgb_model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
    xgb_grid = GridSearchCV(xgb_model, xgb_params, cv=3, scoring='accuracy', verbose=2, n_jobs=-1)
    xgb_grid.fit(X_train, y_train)
    xgb_best = xgb_grid.best_estimator_
    save_model_checkpoint(xgb_best, xgb_checkpoint)
xgb_pred = xgb_best.predict(X_val)
print("XGBoost Classification Report")
print(classification_report(y_val, xgb_pred))

Fitting 3 folds for each of 27 candidates, totalling 81 fits
[CV] END ..learning_rate=0.01, max_depth=3, n_estimators=100; total time=14.7min
[CV] END ..learning_rate=0.01, max_depth=3, n_estimators=100; total time=14.9min
[CV] END ..learning_rate=0.01, max_depth=3, n_estimators=100; total time=14.9min
[CV] END ..learning_rate=0.01, max_depth=3, n_estimators=200; total time=32.5min
[CV] END ..learning_rate=0.01, max_depth=3, n_estimators=200; total time=31.0min
[CV] END ..learning_rate=0.01, max_depth=3, n_estimators=200; total time=31.1min
[CV] END ..learning_rate=0.01, max_depth=3, n_estimators=300; total time=48.5min
[CV] END ..learning_rate=0.01, max_depth=6, n_estimators=100; total time=26.0min
[CV] END ..learning_rate=0.01, max_depth=3, n_estimators=300; total time=50.7min
[CV] END ..learning_rate=0.01, max_depth=6, n_estimators=100; total time=26.0min
[CV] END ..learning_rate=0.01, max_depth=3, n_estimators=300; total time=50.6min
[CV] END ..learning_rate=0.01, max_depth=6, n_es

In [20]:
print(classification_report(y_val, xgb_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       235
           1       1.00      1.00      1.00       280
           2       1.00      1.00      1.00       241
           3       1.00      1.00      1.00       245
           4       1.00      1.00      1.00       262
           5       1.00      1.00      1.00       258
           6       1.00      1.00      1.00       273
           7       1.00      1.00      1.00       241
           8       1.00      1.00      1.00       274
           9       1.00      1.00      1.00       259
          10       1.00      1.00      1.00       236
          11       1.00      1.00      1.00       242
          12       1.00      1.00      1.00       237
          13       1.00      1.00      1.00       280
          14       1.00      1.00      1.00       257
          15       1.00      1.00      1.00       234
          16       1.00      1.00      1.00       275
          17       1.00    

In [7]:
checkpoint_dir = "/teamspace/studios/this_studio/checkpoint"
svm_checkpoint = os.path.join(checkpoint_dir, "svm_model.pkl")
rf_checkpoint = os.path.join(checkpoint_dir, "rf_model.pkl")
xgb_checkpoint = os.path.join(checkpoint_dir, "xgb_model.pkl")
label_mapping_path = os.path.join(checkpoint_dir, "label_mapping.pkl")

In [10]:
# Load the trained models
def load_model_checkpoint(checkpoint_path):
    with open(checkpoint_path, 'rb') as f:
        return pickle.load(f)

In [12]:
svm_checkpoint = os.path.join(checkpoint_dir, "svm_model.pkl")
rf_checkpoint = os.path.join(checkpoint_dir, "rf_model.pkl")
xgb_checkpoint = os.path.join(checkpoint_dir, "xgb_model.pkl")

In [13]:
svm_model = load_model_checkpoint(svm_checkpoint)
rf_model = load_model_checkpoint(rf_checkpoint)
xgb_model = load_model_checkpoint(xgb_checkpoint)

In [21]:
svm_pred = svm_model.predict(train_features2)
rf_pred = rf_model.predict(train_features2)
xgb_pred = xgb_model.predict(train_features2)

In [22]:
print("SVM Test Classification Report")
print(classification_report(train_labels2, svm_pred))

SVM Test Classification Report
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        16
           1       0.00      0.00      0.00        16
           2       0.00      0.00      0.00        16
           3       0.00      0.00      0.00        16
           4       0.00      0.00      0.00        16
           5       0.00      0.00      0.00        16
           6       0.00      0.00      0.00        16
           7       0.00      0.00      0.00        16
           8       0.00      0.00      0.00        16
           9       0.00      0.00      0.00        16
          10       0.00      0.00      0.00        16
          11       0.05      0.12      0.07        16
          12       0.00      0.00      0.00        16
          13       0.00      0.00      0.00        16
          14       0.04      0.50      0.07        16
          15       0.00      0.00      0.00        16
          16       0.00      0.00      0.00       

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [23]:
print("Random Forest Test Classification Report")
print(classification_report(train_labels2, rf_pred))

Random Forest Test Classification Report
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        16
           1       0.00      0.00      0.00        16
           2       0.00      0.00      0.00        16
           3       0.00      0.00      0.00        16
           4       0.00      0.00      0.00        16
           5       0.00      0.00      0.00        16
           6       0.00      0.00      0.00        16
           7       0.00      0.00      0.00        16
           8       0.07      0.38      0.12        16
           9       0.00      0.00      0.00        16
          10       0.00      0.00      0.00        16
          11       0.00      0.00      0.00        16
          12       0.00      0.00      0.00        16
          13       0.00      0.00      0.00        16
          14       0.00      0.00      0.00        16
          15       0.00      0.00      0.00        16
          16       0.00      0.00      0

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [24]:
print("XGBoost Test Classification Report")
print(classification_report(train_labels2, xgb_pred))

XGBoost Test Classification Report
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        16
           1       0.00      0.00      0.00        16
           2       0.00      0.00      0.00        16
           3       0.00      0.00      0.00        16
           4       0.00      0.00      0.00        16
           5       0.00      0.00      0.00        16
           6       0.00      0.00      0.00        16
           7       0.00      0.00      0.00        16
           8       0.08      0.06      0.07        16
           9       0.00      0.00      0.00        16
          10       0.00      0.00      0.00        16
          11       0.00      0.00      0.00        16
          12       0.00      0.00      0.00        16
          13       0.00      0.00      0.00        16
          14       0.00      0.00      0.00        16
          15       0.00      0.00      0.00        16
          16       0.00      0.00      0.00   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [26]:
svm_pred = svm_model.predict(train_features3)
rf_pred = rf_model.predict(train_features3)
xgb_pred = xgb_model.predict(train_features3)

In [27]:
print("SVM Test Classification Report")
print(classification_report(train_labels3, svm_pred))

SVM Test Classification Report
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         8
           1       0.00      0.00      0.00         8
           2       0.00      0.00      0.00         8
           3       0.00      0.00      0.00         8
           4       0.00      0.00      0.00         8
           5       0.17      0.12      0.14         8
           6       0.00      0.00      0.00         8
           7       0.00      0.00      0.00         8
           8       0.00      0.00      0.00         8
           9       0.00      0.00      0.00         8
          10       0.05      0.25      0.09         8
          11       0.00      0.00      0.00         8
          12       0.00      0.00      0.00         8
          13       0.00      0.00      0.00         8
          14       0.00      0.00      0.00         8
          15       0.00      0.00      0.00         8
          16       0.00      0.00      0.00       

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [28]:
print("Random Forest Test Classification Report")
print(classification_report(train_labels3, rf_pred))

Random Forest Test Classification Report
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         8
           1       0.00      0.00      0.00         8
           2       0.00      0.00      0.00         8
           3       0.00      0.00      0.00         8
           4       0.00      0.00      0.00         8
           5       0.00      0.00      0.00         8
           6       0.13      0.25      0.17         8
           7       0.00      0.00      0.00         8
           8       0.00      0.00      0.00         8
           9       0.00      0.00      0.00         8
          10       0.08      0.12      0.10         8
          11       0.00      0.00      0.00         8
          12       0.11      0.38      0.17         8
          13       0.00      0.00      0.00         8
          14       0.00      0.00      0.00         8
          15       0.00      0.00      0.00         8
          16       0.00      0.00      0

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [29]:
print("XGBoost Test Classification Report")
print(classification_report(train_labels3, xgb_pred))

XGBoost Test Classification Report
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         8
           1       0.00      0.00      0.00         8
           2       0.00      0.00      0.00         8
           3       0.00      0.00      0.00         8
           4       0.17      0.12      0.14         8
           5       0.00      0.00      0.00         8
           6       0.11      0.12      0.12         8
           7       0.00      0.00      0.00         8
           8       0.00      0.00      0.00         8
           9       0.00      0.00      0.00         8
          10       0.29      0.25      0.27         8
          11       0.00      0.00      0.00         8
          12       0.00      0.00      0.00         8
          13       0.00      0.00      0.00         8
          14       0.04      0.12      0.06         8
          15       0.00      0.00      0.00         8
          16       0.00      0.00      0.00   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [46]:
# Configurar el pipeline
pipe = pipeline(
    "audio-classification", model="alejogil35/distilhubert-finetuned-chorddetection2"
)

# Ruta a la carpeta base
base_dir = "/teamspace/studios/this_studio/chord_examples/base de datos 3"

def load_audio_files(base_dir, dataset_type):
    audio_files = []
    labels = []
    
    dataset_path = os.path.join(base_dir, dataset_type)
    if not os.path.exists(dataset_path):
        raise FileNotFoundError(f"Dataset path does not exist: {dataset_path}")
    
    for class_folder in os.listdir(dataset_path):
        class_path = os.path.join(dataset_path, class_folder)
        if os.path.isdir(class_path):
            for file in os.listdir(class_path):
                if file.endswith(".wav"):
                    file_path = os.path.join(class_path, file)
                    audio_files.append(file_path)
                    labels.append(class_folder)
    
    return audio_files, labels

train_audio_files, train_labels = load_audio_files(base_dir, "train")
test_audio_files, test_labels = load_audio_files(base_dir, "test")

In [47]:
def predict_audio_files(audio_files, pipe, sr=16000):
    predictions = []
    for audio_path in audio_files:
        audio, _ = librosa.load(audio_path, sr=sr)
        pred = pipe(audio)
        pred_label = pred[0]['label']
        predictions.append(pred_label)
    return predictions

train_predictions = predict_audio_files(train_audio_files, pipe)
test_predictions = predict_audio_files(test_audio_files, pipe)

In [48]:
def evaluate_model(true_labels, predicted_labels):
    accuracy = accuracy_score(true_labels, predicted_labels)
    report = classification_report(true_labels, predicted_labels)
    return accuracy, report

train_accuracy, train_report = evaluate_model(train_labels, train_predictions)
test_accuracy, test_report = evaluate_model(test_labels, test_predictions)

print("Train Accuracy:", train_accuracy)
print("Train Classification Report:\n", train_report)
print("Test Accuracy:", test_accuracy)
print("Test Classification Report:\n", test_report)

Train Accuracy: 0.11458333333333333
Train Classification Report:
                    precision    recall  f1-score   support

      DoAumentada       0.00      0.00      0.00         0
     DoDisminuida       0.00      0.00      0.00         0
          DoMayor       0.12      0.38      0.19         8
          DoMenor       0.00      0.00      0.00         8
      FaAumentada       0.00      0.00      0.00         0
     FaDisminuida       0.00      0.00      0.00         0
          FaMayor       0.06      0.12      0.08         8
          FaMenor       0.33      0.12      0.18         8
 LaBemolAumentada       0.00      0.00      0.00         0
     LaBemolMayor       0.00      0.00      0.00         8
     LaBemolMenor       0.00      0.00      0.00         8
          LaMayor       0.12      0.12      0.12         8
          LaMenor       0.18      0.25      0.21         8
      MiAumentada       0.00      0.00      0.00         0
     MiBemolMayor       0.08      0.12      0.10

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
