In [1]:
import os
import json
import warnings
warnings.filterwarnings('ignore')

In [2]:
METADATA_PATH = '../UrbanSound8K/metadata/UrbanSound8K.csv'
AUDIO_BASE_PATH = '../UrbanSound8K/audio'

In [None]:
import torch
import torch.nn as nn
import numpy as np
import os
import json
from datetime import datetime


from torch.utils.data import Dataset, DataLoader
from torchaudio.transforms import Spectrogram, MelSpectrogram, TimeStretch, AmplitudeToDB
from torch.distributions import Uniform

from helper_classes import _num_stft_bins, RandomTimeStretch, SpecNormalization, MelspectrogramStretch

from torch.utils.data import Dataset, DataLoader
from utils import load_fold_paths
from helper_classes import AudioDataset, TrainingHistory


In [None]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, classification_report
from graph_utils import print_metrics, plot_confusion_matrix, plot_fold_accuracies, plot_all_metrics_across_folds, print_cross_validation_results
from utils import compute_metrics, cleanup
from RNN import AudioRNN, LazyAudioRNNDataset, collate_fn_rnn

In [None]:
def train_audio_rnn_cross_validation(
    data_cache_dir,
    config,
    epochs=20,
    batch_size=32,
    lr=0.001,
    device=None,
    num_classes=10
):
    """
    10-Fold Cross-Validation Pipeline for UrbanSound8K
    """
    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        print(f"Initial GPU memory allocated: {torch.cuda.memory_allocated()/1024**2:.2f} MB")
    
    train_paths, train_labels, test_paths, test_labels = load_fold_paths(data_cache_dir=data_cache_dir)
    
    timestamp = datetime.now().strftime("%m%d_%H%M%S")
    save_dir = f"../saved_cv/{timestamp}"
    os.makedirs(save_dir, exist_ok=True)
    print(f"Save directory: {save_dir}")
    
    with open(os.path.join(save_dir, 'config.json'), 'w') as f:
        json.dump(config, f, indent=2)
    
    fold_results = []
    all_fold_accuracies = []
    cumulative_confusion_matrix = np.zeros((num_classes, num_classes), dtype=int)
    
    print("\n" + "="*60)
    print("10-FOLD CROSS-VALIDATION")
    print("="*60)
    print("Scheme: 1 fold test, 1 fold validation, 8 folds training")
    print("="*60)
    
    for test_fold in range(1, 11):
        val_fold = (test_fold % 10) + 1
        train_folds = [f for f in range(1, 11) if f != test_fold and f != val_fold]
        
        print(f"\n{'='*60}")
        print(f"FOLD {test_fold}/10")
        print(f"{'='*60}")
        print(f"Test fold: {test_fold}")
        print(f"Validation fold: {val_fold}")
        print(f"Training folds: {train_folds}")
        
        fold_dir = os.path.join(save_dir, f'fold_{test_fold}')
        os.makedirs(fold_dir, exist_ok=True)
        
        print("\n" + "="*60)
        print("STEP 1: Loading Data from Cache")
        print("="*60)

        X_train = sum((train_paths[f] for f in train_folds), [])
        y_train = sum((train_labels[f] for f in train_folds), [])

        X_val = test_paths[val_fold]
        y_val = test_labels[val_fold]

        X_test = test_paths[test_fold]
        y_test = test_labels[test_fold]

        train_dataset = LazyAudioRNNDataset(X_train, y_train)
        val_dataset   = LazyAudioRNNDataset(X_val, y_val)
        test_dataset  = LazyAudioRNNDataset(X_test, y_test)

        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0, collate_fn=collate_fn_rnn)
        val_loader   = DataLoader(val_dataset,   batch_size=batch_size, shuffle=False, num_workers=0, collate_fn=collate_fn_rnn)
        test_loader  = DataLoader(test_dataset,  batch_size=batch_size, shuffle=False, num_workers=0, collate_fn=collate_fn_rnn)
        
        print("\n" + "="*60)
        print("STEP 2: Creating Model")
        print("="*60)
        
        model = AudioRNN(num_classes=num_classes, config=config)
        model = model.to(device)
        
        total_params = sum(p.numel() for p in model.parameters())
        print(f"Total parameters: {total_params:,}")
        
        if torch.cuda.is_available():
            print(f"GPU memory after model creation: {torch.cuda.memory_allocated()/1024**2:.2f} MB")
        
        print("\n" + "="*60)
        print("STEP 3: Training Model")
        print("="*60)
        
        criterion = nn.NLLLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=lr,
                                    betas = (config.get('betas')[0],
                                    config.get('betas')[1]),
                                    eps = config.get('eps'),
                                    weight_decay=config.get('weight_decay'))

        history = TrainingHistory()
        
        best_val_loss, best_val_acc = AudioRNN.trainModel(
            model=model,
            train_loader=train_loader,
            val_loader=val_loader,
            test_fold=test_fold,
            history=history,
            config=config,
            fold_dir=fold_dir,
            epochs=epochs,
            optimizer=optimizer,
            criterion=criterion,
            device=device
        )
        
        print("\n" + "="*60)
        print("STEP 4: Testing on Held-Out Fold")
        print("="*60)
        
        checkpoint = torch.load(os.path.join(fold_dir, 'best_model.pth'))
        model.load_state_dict(checkpoint['model_state_dict'])
        
        model.eval()

        avg_test_loss, test_acc, all_predictions, all_targets = AudioRNN.test(model, criterion, test_loader, device)
        
        print(f"\nTest Loss: {avg_test_loss:.4f}")
        print(f"Test Accuracy: {test_acc:.4f}")
        
        test_metrics = compute_metrics(all_predictions, all_targets, num_classes)
        print_metrics(test_metrics, prefix="Test ")
        
        fold_confusion_matrix = confusion_matrix(all_targets, all_predictions, labels=range(num_classes))
        cumulative_confusion_matrix += fold_confusion_matrix
        
        fold_result = {
            'fold': test_fold,
            'test_fold': test_fold,
            'val_fold': val_fold,
            'train_folds': train_folds,
            'test_loss': avg_test_loss,
            'test_accuracy': test_metrics['accuracy'],
            'test_precision_macro': test_metrics['precision_macro'],
            'test_precision_weighted': test_metrics['precision_weighted'],
            'test_recall_macro': test_metrics['recall_macro'],
            'test_recall_weighted': test_metrics['recall_weighted'],
            'test_f1_macro': test_metrics['f1_macro'],
            'test_f1_weighted': test_metrics['f1_weighted'],
            'test_precision_per_class': test_metrics['precision_per_class'],
            'test_recall_per_class': test_metrics['recall_per_class'],
            'test_f1_per_class': test_metrics['f1_per_class'],
            'best_val_accuracy': best_val_acc,
            'best_val_loss': best_val_loss,
            'confusion_matrix': fold_confusion_matrix.tolist()
        }
        fold_results.append(fold_result)
        all_fold_accuracies.append(test_acc)
        
        with open(os.path.join(fold_dir, 'fold_results.json'), 'w') as f:
            json.dump(fold_result, f, indent=2)
        
        class_names = [f'Class_{i}' for i in range(num_classes)]
        report = classification_report(all_targets, all_predictions, 
                                      target_names=class_names, 
                                      digits=4)
        with open(os.path.join(fold_dir, 'classification_report.txt'), 'w') as f:
            f.write(f"Classification Report - Fold {test_fold}\n")
            f.write("="*60 + "\n")
            f.write(report)
        
        history.save(os.path.join(fold_dir, 'history.json'))
        history.plot(save_path=os.path.join(fold_dir, 'training_curves.png'))
        
        plot_confusion_matrix(
            fold_confusion_matrix,
            save_path=os.path.join(fold_dir, 'confusion_matrix.png'),
            title=f'Confusion Matrix - Fold {test_fold}'
        )
        
        print(f"\nFold {test_fold} completed!")
        print(f"Results saved to: {fold_dir}")
        
        cleanup(train_loader, val_loader, test_loader,
            train_dataset, val_dataset, test_dataset,
            model, optimizer, criterion, history,
            all_predictions, all_targets,
            checkpoint)

    print("\n" + "="*60)
    print("CROSS-VALIDATION RESULTS")
    print("="*60)

    return print_cross_validation_results(fold_results, save_dir)


In [11]:
import os
from datetime import datetime

from utils import read_config

config = read_config("../config/rnn.json")

base_dir = '../data_cache'
folders = [f for f in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, f))]

folder_dates = []
for folder in folders:
    try:
        folder_date = datetime.strptime(folder, '%m%d_%H%M%S')
        folder_dates.append((folder_date, folder))
    except ValueError:
        print(f"Folder name '{folder}' does not match expected format.")

if folder_dates:
    most_recent_folder = max(folder_dates, key=lambda x: x[0])[1]
    data_cache_dir = os.path.join(base_dir, most_recent_folder)
    print(f"Most recent folder: {data_cache_dir}")

else:
    print("No valid folders found.")

Most recent folder: ../data_cache/1128_230122


In [12]:
results = train_audio_rnn_cross_validation(
    data_cache_dir=data_cache_dir,
    config=config,
    epochs=20,
    batch_size=16,
    lr=0.001,
    num_classes=10
)

print(f"Model saved to: {results['save_dir']}")

Using device: cuda
Initial GPU memory allocated: 0.00 MB


/opt/amdgpu/share/libdrm/amdgpu.ids: No such file or directory
/opt/amdgpu/share/libdrm/amdgpu.ids: No such file or directory


FileNotFoundError: [Errno 2] No such file or directory: '../data_cache/1128_230122/fold_1/train/labels.json'