# üéµ Treinamento de Modelos Deep Learning - Urban Sound Classification

Este notebook implementa o treinamento completo de v√°rios modelos de deep learning para classifica√ß√£o de sons urbanos:
- **CNN** (Convolutional Neural Network)
- **RNN** (Recurrent Neural Network)
- **GRU** (Gated Recurrent Unit)
- **BiRNN** (Bidirectional RNN)
- **LSTM** (Long Short-Term Memory)
- **LSTM com Attention**

## Dataset: Urban Sound 8K
- 10 classes de sons urbanos
- ~8732 samples de √°udio
- 4 segundos cada

## 1Ô∏è‚É£ Imports e Configura√ß√µes

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, f1_score
import librosa
import librosa.display
import time
from tqdm.auto import tqdm
import warnings
warnings.filterwarnings('ignore')

# Imports dos nossos m√≥dulos
import sys
sys.path.append('.')
from dataloader import Dataloader
from models.CNN import SoundCNN
from models.RNN import SoundRNN, SoundGRU, SoundBiRNN
from models.LSTM import SoundLSTM, SoundLSTMAttention
from config import DEVICE, TRAINING, DATASET, MODELS_DIR, PLOTS_DIR, LOGS_DIR

print(f"‚úÖ Imports completados")
print(f"üì± Device dispon√≠vel: {DEVICE}")
print(f"üéØ Configura√ß√µes:")
print(f"   - Epochs: {TRAINING['epochs']}")
print(f"   - Batch size: {TRAINING['batch_size']}")
print(f"   - Learning rate: {TRAINING['learning_rate']}")

ModuleNotFoundError: No module named 'soundata'

## 2Ô∏è‚É£ Fun√ß√£o de Preprocessamento de √Åudio

In [None]:
def preprocess_audio(clip, target_length=174, n_mels=40, sr=22050):
    """
    Preprocessa √°udio para espectrograma mel.
    
    Args:
        clip: soundata.core.Clip object
        target_length: n√∫mero de frames temporais desejado
        n_mels: n√∫mero de mel bins
        sr: sample rate
    
    Returns:
        mel_spectrogram: numpy array [n_mels, target_length]
    """
    # Carregar √°udio
    audio_data, original_sr = clip.audio
    
    # Resample se necess√°rio
    if original_sr != sr:
        audio_data = librosa.resample(audio_data, orig_sr=original_sr, target_sr=sr)
    
    # Converter para mono se necess√°rio
    if len(audio_data.shape) > 1:
        audio_data = np.mean(audio_data, axis=0)
    
    # Calcular mel spectrogram
    mel_spec = librosa.feature.melspectrogram(
        y=audio_data,
        sr=sr,
        n_mels=n_mels,
        n_fft=2048,
        hop_length=512
    )
    
    # Converter para dB
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
    
    # Ajustar comprimento (pad ou truncate)
    if mel_spec_db.shape[1] < target_length:
        pad_width = target_length - mel_spec_db.shape[1]
        mel_spec_db = np.pad(mel_spec_db, ((0, 0), (0, pad_width)), mode='constant')
    else:
        mel_spec_db = mel_spec_db[:, :target_length]
    
    return mel_spec_db

print("‚úÖ Fun√ß√£o de preprocessamento definida")

## 3Ô∏è‚É£ Dataset PyTorch Personalizado

In [None]:
class UrbanSoundDataset(Dataset):
    """
    Dataset PyTorch para Urban Sound 8K.
    """
    def __init__(self, dataloader, indices=None):
        self.dataloader = dataloader
        self.indices = indices if indices is not None else list(range(len(dataloader)))
    
    def __len__(self):
        return len(self.indices)
    
    def __getitem__(self, idx):
        actual_idx = self.indices[idx]
        clip, label = self.dataloader[actual_idx]
        
        # Preprocessar
        mel_spec = preprocess_audio(clip)
        
        # Converter para tensor
        mel_spec_tensor = torch.FloatTensor(mel_spec).unsqueeze(0)  # Add channel dimension
        label_tensor = torch.LongTensor([label])[0]
        
        return mel_spec_tensor, label_tensor

print("‚úÖ Dataset PyTorch definido")

## 4Ô∏è‚É£ Carregar Dataset

In [None]:
# Path do dataset - ajuste conforme necess√°rio
DATASET_PATH = r"C:\Users\diogo\OneDrive\Documents\UrbanSound8K\UrbanSound8K"

print("üîÑ Carregando dataset...")
dataloader = Dataloader(DATASET_PATH, verbose=False)

# Criar dataset PyTorch
full_dataset = UrbanSoundDataset(dataloader)

# Split train/validation/test (70/15/15)
total_size = len(full_dataset)
train_size = int(0.7 * total_size)
val_size = int(0.15 * total_size)
test_size = total_size - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(
    full_dataset, 
    [train_size, val_size, test_size],
    generator=torch.Generator().manual_seed(42)
)

print(f"‚úÖ Dataset carregado:")
print(f"   - Total: {total_size} samples")
print(f"   - Train: {train_size} samples")
print(f"   - Validation: {val_size} samples")
print(f"   - Test: {test_size} samples")

# Criar DataLoaders
train_loader = DataLoader(train_dataset, batch_size=TRAINING['batch_size'], shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=TRAINING['batch_size'], shuffle=False, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=TRAINING['batch_size'], shuffle=False, num_workers=0)

print(f"‚úÖ DataLoaders criados")

## 5Ô∏è‚É£ Visualizar Exemplos do Dataset

In [None]:
# Visualizar alguns espectrogramas
fig, axes = plt.subplots(2, 3, figsize=(15, 8))
axes = axes.ravel()

class_mapping = dataloader.get_label_mapping()

for i in range(6):
    mel_spec, label = train_dataset[i]
    
    axes[i].imshow(mel_spec.squeeze().numpy(), aspect='auto', origin='lower', cmap='viridis')
    axes[i].set_title(f"Class: {class_mapping[label.item()]}")
    axes[i].set_xlabel('Time Frames')
    axes[i].set_ylabel('Mel Bins')

plt.tight_layout()
plt.savefig(PLOTS_DIR / 'dataset_examples.png', dpi=150, bbox_inches='tight')
plt.show()

print("‚úÖ Exemplos visualizados")

## 6Ô∏è‚É£ Fun√ß√µes de Treino e Avalia√ß√£o

In [None]:
def train_epoch(model, dataloader, criterion, optimizer, device):
    """
    Treina o modelo por uma √©poca.
    """
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    pbar = tqdm(dataloader, desc='Training', leave=False)
    for inputs, labels in pbar:
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
        pbar.set_postfix({'loss': f'{loss.item():.4f}', 'acc': f'{100*correct/total:.2f}%'})
    
    epoch_loss = running_loss / len(dataloader)
    epoch_acc = 100 * correct / total
    return epoch_loss, epoch_acc


def validate(model, dataloader, criterion, device):
    """
    Valida o modelo.
    """
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for inputs, labels in tqdm(dataloader, desc='Validating', leave=False):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    epoch_loss = running_loss / len(dataloader)
    epoch_acc = 100 * correct / total
    return epoch_loss, epoch_acc


def train_model(model, train_loader, val_loader, criterion, optimizer, 
                num_epochs, device, model_name, patience=10):
    """
    Treina o modelo com early stopping.
    """
    history = {
        'train_loss': [],
        'train_acc': [],
        'val_loss': [],
        'val_acc': []
    }
    
    best_val_acc = 0.0
    best_model_state = None
    epochs_no_improve = 0
    
    print(f"\n{'='*80}")
    print(f"üéØ Treinando modelo: {model_name}")
    print(f"{'='*80}")
    
    start_time = time.time()
    
    for epoch in range(num_epochs):
        print(f"\nEpoch [{epoch+1}/{num_epochs}]")
        
        # Train
        train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
        
        # Validate
        val_loss, val_acc = validate(model, val_loader, criterion, device)
        
        # Save history
        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)
        
        print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%")
        print(f"Val Loss:   {val_loss:.4f} | Val Acc:   {val_acc:.2f}%")
        
        # Early stopping
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_model_state = model.state_dict().copy()
            epochs_no_improve = 0
            print(f"‚úÖ Novo melhor modelo! Val Acc: {best_val_acc:.2f}%")
        else:
            epochs_no_improve += 1
            print(f"‚ö†Ô∏è Sem melhoria por {epochs_no_improve} √©pocas")
        
        if epochs_no_improve >= patience:
            print(f"\nüõë Early stopping ap√≥s {epoch+1} √©pocas")
            break
    
    # Restaurar melhor modelo
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
    
    elapsed_time = time.time() - start_time
    print(f"\n‚è±Ô∏è Tempo total de treino: {elapsed_time/60:.2f} minutos")
    print(f"üèÜ Melhor Val Acc: {best_val_acc:.2f}%")
    
    return history, best_val_acc

print("‚úÖ Fun√ß√µes de treino definidas")

## 7Ô∏è‚É£ Fun√ß√£o para Avaliar no Test Set

In [None]:
def evaluate_model(model, test_loader, device, model_name, class_mapping):
    """
    Avalia o modelo no test set e gera m√©tricas e gr√°ficos.
    """
    model.eval()
    all_preds = []
    all_labels = []
    
    print(f"\nüîç Avaliando {model_name} no test set...")
    
    with torch.no_grad():
        for inputs, labels in tqdm(test_loader, desc='Testing'):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    # Calcular m√©tricas
    test_acc = accuracy_score(all_labels, all_preds)
    test_f1 = f1_score(all_labels, all_preds, average='weighted')
    
    print(f"\nüìä M√©tricas do Test Set:")
    print(f"   - Accuracy: {test_acc*100:.2f}%")
    print(f"   - F1-Score (weighted): {test_f1:.4f}")
    
    # Classification report
    print(f"\nüìã Classification Report:")
    target_names = [class_mapping[i] for i in range(len(class_mapping))]
    print(classification_report(all_labels, all_preds, target_names=target_names))
    
    # Confusion Matrix
    cm = confusion_matrix(all_labels, all_preds)
    
    plt.figure(figsize=(12, 10))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=target_names, yticklabels=target_names)
    plt.title(f'Confusion Matrix - {model_name}\nAccuracy: {test_acc*100:.2f}%')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.xticks(rotation=45, ha='right')
    plt.yticks(rotation=0)
    plt.tight_layout()
    plt.savefig(PLOTS_DIR / f'confusion_matrix_{model_name}.png', dpi=150, bbox_inches='tight')
    plt.show()
    
    return test_acc, test_f1, cm

print("‚úÖ Fun√ß√£o de avalia√ß√£o definida")

## 8Ô∏è‚É£ Fun√ß√£o para Plotar Hist√≥rico de Treino

In [None]:
def plot_training_history(history, model_name):
    """
    Plota loss e accuracy ao longo das √©pocas.
    """
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
    
    # Loss
    ax1.plot(history['train_loss'], label='Train Loss', marker='o')
    ax1.plot(history['val_loss'], label='Val Loss', marker='s')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Loss')
    ax1.set_title(f'{model_name} - Loss')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # Accuracy
    ax2.plot(history['train_acc'], label='Train Acc', marker='o')
    ax2.plot(history['val_acc'], label='Val Acc', marker='s')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Accuracy (%)')
    ax2.set_title(f'{model_name} - Accuracy')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(PLOTS_DIR / f'training_history_{model_name}.png', dpi=150, bbox_inches='tight')
    plt.show()

print("‚úÖ Fun√ß√£o de plot definida")

## 9Ô∏è‚É£ Treinar Todos os Modelos

### 9.1 Configurar Modelos

In [None]:
# Definir todos os modelos
models_config = {
    'SoundCNN': SoundCNN(num_classes=10),
    'SoundRNN': SoundRNN(num_classes=10, input_height=40, input_width=174),
    'SoundGRU': SoundGRU(num_classes=10, input_height=40, input_width=174),
    'SoundBiRNN': SoundBiRNN(num_classes=10, input_height=40, input_width=174),
    'SoundLSTM': SoundLSTM(num_classes=10, input_height=40, input_width=174),
    'SoundLSTMAttention': SoundLSTMAttention(num_classes=10, input_height=40, input_width=174)
}

# Exibir n√∫mero de par√¢metros
print("\nüìä Modelos configurados:")
print("="*60)
for name, model in models_config.items():
    num_params = sum(p.numel() for p in model.parameters())
    print(f"{name:20s} - {num_params:>12,} par√¢metros")
print("="*60)

### 9.2 Loop de Treinamento

In [None]:
# Armazenar resultados
results = {}

# Loop atrav√©s de cada modelo
for model_name, model in models_config.items():
    print(f"\n\n{'#'*80}")
    print(f"# TREINANDO: {model_name}")
    print(f"{'#'*80}\n")
    
    # Mover modelo para device
    model = model.to(DEVICE)
    
    # Definir loss e optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(
        model.parameters(), 
        lr=TRAINING['learning_rate'],
        weight_decay=TRAINING['weight_decay']
    )
    
    # Treinar modelo
    history, best_val_acc = train_model(
        model=model,
        train_loader=train_loader,
        val_loader=val_loader,
        criterion=criterion,
        optimizer=optimizer,
        num_epochs=TRAINING['epochs'],
        device=DEVICE,
        model_name=model_name,
        patience=TRAINING['early_stopping_patience']
    )
    
    # Plotar hist√≥rico de treino
    plot_training_history(history, model_name)
    
    # Avaliar no test set
    test_acc, test_f1, cm = evaluate_model(
        model=model,
        test_loader=test_loader,
        device=DEVICE,
        model_name=model_name,
        class_mapping=dataloader.get_label_mapping()
    )
    
    # Salvar modelo
    model_path = MODELS_DIR / f"{model_name}.pt"
    torch.save(model.state_dict(), model_path)
    print(f"\nüíæ Modelo salvo em: {model_path}")
    
    # Armazenar resultados
    results[model_name] = {
        'history': history,
        'best_val_acc': best_val_acc,
        'test_acc': test_acc,
        'test_f1': test_f1,
        'confusion_matrix': cm
    }
    
    print(f"\n{'='*80}")
    print(f"‚úÖ {model_name} conclu√≠do!")
    print(f"{'='*80}")

print("\n\nüéâ TODOS OS MODELOS TREINADOS!")

## üîü Compara√ß√£o Final dos Modelos

In [None]:
# Criar tabela de compara√ß√£o
import pandas as pd

comparison_data = []
for name, result in results.items():
    num_params = sum(p.numel() for p in models_config[name].parameters())
    comparison_data.append({
        'Model': name,
        'Parameters': f"{num_params:,}",
        'Best Val Acc (%)': f"{result['best_val_acc']:.2f}",
        'Test Acc (%)': f"{result['test_acc']*100:.2f}",
        'Test F1-Score': f"{result['test_f1']:.4f}"
    })

comparison_df = pd.DataFrame(comparison_data)
comparison_df = comparison_df.sort_values('Test Acc (%)', ascending=False)

print("\n" + "="*80)
print("üìä COMPARA√á√ÉO FINAL DOS MODELOS")
print("="*80)
print(comparison_df.to_string(index=False))
print("="*80)

# Salvar tabela
comparison_df.to_csv(LOGS_DIR / 'model_comparison.csv', index=False)
print(f"\nüíæ Tabela salva em: {LOGS_DIR / 'model_comparison.csv'}")

### Gr√°fico de Barras - Compara√ß√£o de Accuracy

In [None]:
# Preparar dados para plot
model_names = [name for name in results.keys()]
test_accs = [result['test_acc']*100 for result in results.values()]
val_accs = [result['best_val_acc'] for result in results.values()]

# Criar gr√°fico
fig, ax = plt.subplots(figsize=(14, 7))

x = np.arange(len(model_names))
width = 0.35

bars1 = ax.bar(x - width/2, val_accs, width, label='Best Validation Accuracy', color='steelblue')
bars2 = ax.bar(x + width/2, test_accs, width, label='Test Accuracy', color='coral')

# Adicionar labels
ax.set_xlabel('Model', fontsize=12, fontweight='bold')
ax.set_ylabel('Accuracy (%)', fontsize=12, fontweight='bold')
ax.set_title('Model Comparison - Validation vs Test Accuracy', fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(model_names, rotation=45, ha='right')
ax.legend()
ax.grid(True, alpha=0.3, axis='y')

# Adicionar valores nas barras
def add_values_on_bars(bars):
    for bar in bars:
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:.1f}%',
                ha='center', va='bottom', fontsize=9)

add_values_on_bars(bars1)
add_values_on_bars(bars2)

plt.tight_layout()
plt.savefig(PLOTS_DIR / 'model_comparison_accuracy.png', dpi=150, bbox_inches='tight')
plt.show()

print("‚úÖ Gr√°fico de compara√ß√£o salvo")

### Gr√°fico - Compara√ß√£o de Par√¢metros vs Accuracy

In [None]:
# Preparar dados
params_list = [sum(p.numel() for p in models_config[name].parameters()) for name in model_names]

# Criar scatter plot
fig, ax = plt.subplots(figsize=(12, 8))

colors = plt.cm.viridis(np.linspace(0, 1, len(model_names)))

for i, name in enumerate(model_names):
    ax.scatter(params_list[i], test_accs[i], s=200, c=[colors[i]], 
               alpha=0.7, edgecolors='black', linewidth=2, label=name)

ax.set_xlabel('Number of Parameters', fontsize=12, fontweight='bold')
ax.set_ylabel('Test Accuracy (%)', fontsize=12, fontweight='bold')
ax.set_title('Model Complexity vs Performance', fontsize=14, fontweight='bold')
ax.legend(loc='best', fontsize=10)
ax.grid(True, alpha=0.3)

# Formatar eixo x
ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'{int(x/1000)}K'))

plt.tight_layout()
plt.savefig(PLOTS_DIR / 'model_complexity_vs_performance.png', dpi=150, bbox_inches='tight')
plt.show()

print("‚úÖ Gr√°fico de complexidade salvo")

## 1Ô∏è‚É£1Ô∏è‚É£ An√°lise por Classe

In [None]:
# Para o melhor modelo, analisar performance por classe
best_model_name = max(results, key=lambda x: results[x]['test_acc'])
best_model = models_config[best_model_name].to(DEVICE)
best_model.load_state_dict(torch.load(MODELS_DIR / f"{best_model_name}.pt"))

print(f"\nüèÜ Melhor modelo: {best_model_name}")
print(f"   Test Accuracy: {results[best_model_name]['test_acc']*100:.2f}%")

# Obter predi√ß√µes por classe
best_model.eval()
class_correct = [0] * 10
class_total = [0] * 10

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
        outputs = best_model(inputs)
        _, predicted = torch.max(outputs, 1)
        
        for i in range(len(labels)):
            label = labels[i].item()
            class_total[label] += 1
            if predicted[i] == label:
                class_correct[label] += 1

# Calcular accuracy por classe
class_mapping = dataloader.get_label_mapping()
class_accuracies = []

print("\nüìä Accuracy por Classe:")
print("="*60)
for i in range(10):
    if class_total[i] > 0:
        acc = 100 * class_correct[i] / class_total[i]
        class_accuracies.append(acc)
        print(f"{class_mapping[i]:20s} - {acc:6.2f}% ({class_correct[i]}/{class_total[i]})")
    else:
        class_accuracies.append(0)
        print(f"{class_mapping[i]:20s} - N/A (no samples)")
print("="*60)

# Plotar accuracy por classe
fig, ax = plt.subplots(figsize=(12, 6))
class_names = [class_mapping[i] for i in range(10)]
bars = ax.barh(class_names, class_accuracies, color='teal', alpha=0.7)

# Colorir barras por performance
for i, bar in enumerate(bars):
    if class_accuracies[i] >= 80:
        bar.set_color('green')
    elif class_accuracies[i] >= 60:
        bar.set_color('orange')
    else:
        bar.set_color('red')

ax.set_xlabel('Accuracy (%)', fontsize=12, fontweight='bold')
ax.set_ylabel('Class', fontsize=12, fontweight='bold')
ax.set_title(f'Per-Class Accuracy - {best_model_name}', fontsize=14, fontweight='bold')
ax.set_xlim([0, 100])
ax.grid(True, alpha=0.3, axis='x')

# Adicionar valores
for i, v in enumerate(class_accuracies):
    ax.text(v + 1, i, f'{v:.1f}%', va='center', fontweight='bold')

plt.tight_layout()
plt.savefig(PLOTS_DIR / f'per_class_accuracy_{best_model_name}.png', dpi=150, bbox_inches='tight')
plt.show()

print("‚úÖ An√°lise por classe conclu√≠da")

## 1Ô∏è‚É£2Ô∏è‚É£ Resumo Final

In [None]:
print("\n" + "#"*80)
print("# üéâ TREINAMENTO COMPLETO!")
print("#"*80)
print("\nüìÅ Arquivos salvos:")
print(f"   - Modelos treinados: {MODELS_DIR}")
print(f"   - Gr√°ficos: {PLOTS_DIR}")
print(f"   - Logs: {LOGS_DIR}")

print("\nüèÜ Ranking dos Modelos (por Test Accuracy):")
print("="*80)
sorted_results = sorted(results.items(), key=lambda x: x[1]['test_acc'], reverse=True)
for i, (name, result) in enumerate(sorted_results, 1):
    print(f"{i}. {name:20s} - {result['test_acc']*100:.2f}% (F1: {result['test_f1']:.4f})")
print("="*80)

print("\n‚ú® Experimento conclu√≠do com sucesso!")
print("\nPr√≥ximos passos sugeridos:")
print("   1. Analisar confusion matrices para identificar classes problem√°ticas")
print("   2. Experimentar com data augmentation")
print("   3. Ajustar hiperpar√¢metros do melhor modelo")
print("   4. Considerar ensemble de modelos")
print("   5. Testar com novos dados de √°udio")