# Guitar Harmonics CNN Classifier Training

This notebook trains a CNN model for classifying guitar harmonics from audio clips using mel spectrograms.

**Classes:**
- `harmonic`: Natural or artificial harmonics
- `dead_note`: Muted or deadened notes
- `general_note`: Regular notes

**Model Architecture:**
- 4 convolutional blocks with BatchNorm, ReLU, MaxPool
- Global average pooling
- Fully connected classification head

## 1. Import Libraries and Set Random Seeds

In [None]:
from pathlib import Path
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import librosa
from tqdm import tqdm
import json
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, f1_score
import matplotlib.pyplot as plt
import seaborn as sns

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)

print("All libraries imported successfully!")
print(f"PyTorch version: {torch.__version__}")
print(f"Device available: {'CUDA' if torch.cuda.is_available() else 'CPU'}")

## 2. Define HarmonicsDataset Class

In [None]:
class HarmonicsDataset(Dataset):
    """Dataset for guitar harmonics audio clips."""
    
    def __init__(self, metadata_df, sr=22050, duration=3.0, n_mels=128, n_fft=2048, hop_length=512):
        self.metadata = metadata_df.reset_index(drop=True)
        self.sr = sr
        self.duration = duration
        self.n_mels = n_mels
        self.n_fft = n_fft
        self.hop_length = hop_length
        
        # Label mapping
        self.label_map = {'harmonic': 0, 'dead_note': 1, 'general_note': 2}
        self.labels = [self.label_map[label] for label in self.metadata['label_category']]
        
    def __len__(self):
        return len(self.metadata)
    
    def __getitem__(self, idx):
        row = self.metadata.iloc[idx]
        
        try:
            # Load audio
            audio, _ = librosa.load(
                row['source_audio'],
                sr=self.sr,
                offset=row['onset_sec'],
                duration=min(row['duration_sec'], self.duration)
            )
            
            # Pad or trim to fixed length
            target_length = int(self.sr * self.duration)
            if len(audio) < target_length:
                audio = np.pad(audio, (0, target_length - len(audio)))
            else:
                audio = audio[:target_length]
            
            # Compute mel spectrogram
            mel_spec = librosa.feature.melspectrogram(
                y=audio,
                sr=self.sr,
                n_fft=self.n_fft,
                hop_length=self.hop_length,
                n_mels=self.n_mels,
                fmin=80,
                fmax=8000
            )
            
            # Convert to log scale (dB)
            mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
            
            # Normalize to [0, 1]
            mel_spec_norm = (mel_spec_db - mel_spec_db.min()) / (mel_spec_db.max() - mel_spec_db.min() + 1e-8)
            
            # Convert to tensor (add channel dimension)
            mel_tensor = torch.FloatTensor(mel_spec_norm).unsqueeze(0)
            label = torch.LongTensor([self.labels[idx]])[0]
            
            return mel_tensor, label
            
        except Exception as e:
            print(f"Error loading sample {idx}: {e}")
            # Return zeros on error
            mel_tensor = torch.zeros((1, self.n_mels, 130))  # Approximate time frames
            label = torch.LongTensor([self.labels[idx]])[0]
            return mel_tensor, label

print("HarmonicsDataset class defined successfully!")

## 3. Define HarmonicsCNN Model

In [None]:
class HarmonicsCNN(nn.Module):
    """CNN for guitar harmonics classification."""
    
    def __init__(self, num_classes=3, dropout=0.5):
        super(HarmonicsCNN, self).__init__()
        
        # Convolutional layers
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Dropout2d(0.25)
        )
        
        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Dropout2d(0.25)
        )
        
        self.conv3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Dropout2d(0.25)
        )
        
        self.conv4 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((1, 1))  # Global average pooling
        )
        
        # Fully connected layers
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(128, num_classes)
        )
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.fc(x)
        return x

print("HarmonicsCNN model defined successfully!")

## 4. Define Helper Functions

In [None]:
def compute_class_weights(labels):
    """Compute class weights for imbalanced dataset."""
    unique, counts = np.unique(labels, return_counts=True)
    total = len(labels)
    weights = total / (len(unique) * counts)
    return torch.FloatTensor(weights)


def train_epoch(model, loader, criterion, optimizer, device):
    """Train for one epoch."""
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    pbar = tqdm(loader, desc='Training')
    for inputs, labels in pbar:
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        
        pbar.set_postfix({'loss': running_loss / (pbar.n + 1), 'acc': 100. * correct / total})
    
    return running_loss / len(loader), 100. * correct / total


def validate(model, loader, criterion, device):
    """Validate the model."""
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for inputs, labels in tqdm(loader, desc='Validation'):
            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    return running_loss / len(loader), 100. * correct / total, all_preds, all_labels


def plot_training_history(history, output_dir):
    """Plot training and validation metrics."""
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    # Loss
    axes[0].plot(history['train_loss'], label='Train Loss', linewidth=2)
    axes[0].plot(history['val_loss'], label='Val Loss', linewidth=2)
    axes[0].set_xlabel('Epoch')
    axes[0].set_ylabel('Loss')
    axes[0].set_title('Training and Validation Loss')
    axes[0].legend()
    axes[0].grid(True, alpha=0.3)
    
    # Accuracy
    axes[1].plot(history['train_acc'], label='Train Acc', linewidth=2)
    axes[1].plot(history['val_acc'], label='Val Acc', linewidth=2)
    axes[1].set_xlabel('Epoch')
    axes[1].set_ylabel('Accuracy (%)')
    axes[1].set_title('Training and Validation Accuracy')
    axes[1].legend()
    axes[1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(output_dir / 'training_history.png', dpi=150, bbox_inches='tight')
    print(f"Saved: {output_dir / 'training_history.png'}")
    plt.close()


def plot_confusion_matrix(y_true, y_pred, output_dir):
    """Plot confusion matrix."""
    cm = confusion_matrix(y_true, y_pred)
    
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=['harmonic', 'dead_note', 'general_note'],
                yticklabels=['harmonic', 'dead_note', 'general_note'])
    plt.title('Confusion Matrix - Test Set')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.tight_layout()
    plt.savefig(output_dir / 'confusion_matrix.png', dpi=150, bbox_inches='tight')
    print(f"Saved: {output_dir / 'confusion_matrix.png'}")
    plt.close()

print("Helper functions defined successfully!")

## 5. Configure Training Parameters

In [None]:
# Configuration parameters
METADATA_PATH = 'processed_dataset/metadata.csv'
OUTPUT_DIR = Path('models/')
BATCH_SIZE = 32
EPOCHS = 50
LEARNING_RATE = 0.001
DROPOUT = 0.5
N_SAMPLES_PER_CLASS = None  # Set to an integer to limit samples per class, or None for all

# Create output directory
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
print(f"\nConfiguration:")
print(f"  Metadata: {METADATA_PATH}")
print(f"  Output: {OUTPUT_DIR}")
print(f"  Batch Size: {BATCH_SIZE}")
print(f"  Epochs: {EPOCHS}")
print(f"  Learning Rate: {LEARNING_RATE}")
print(f"  Dropout: {DROPOUT}")
print(f"  Samples per class: {N_SAMPLES_PER_CLASS if N_SAMPLES_PER_CLASS else 'All'}")

## 6. Load and Prepare Metadata

In [None]:
# Load metadata
print("Loading metadata...")
df = pd.read_csv(METADATA_PATH)
print(f"Loaded {len(df)} samples")

# Display basic statistics
print("\nDataset overview:")
print(df.head())
print("\nColumn names:", df.columns.tolist())
print("\nClass distribution (before limiting):")
print(df['label_category'].value_counts())

# Optionally limit samples per class
if N_SAMPLES_PER_CLASS:
    print(f"\nLimiting to {N_SAMPLES_PER_CLASS} samples per class...")
    df_balanced = []
    for label in ['harmonic', 'dead_note', 'general_note']:
        subset = df[df['label_category'] == label]
        sampled = subset.sample(min(N_SAMPLES_PER_CLASS, len(subset)), random_state=42)
        df_balanced.append(sampled)
    df = pd.concat(df_balanced, ignore_index=True)
    print(f"Dataset size after limiting: {len(df)} samples")
    print("\nClass distribution (after limiting):")
    print(df['label_category'].value_counts())

## 7. Split Dataset by Audio Files

Split by audio files (not individual samples) to prevent data leakage between train/val/test sets.

In [None]:
# Split by audio files to prevent data leakage
print("Splitting dataset by audio files...")
audio_files = df['source_audio'].unique()
print(f"Total unique audio files: {len(audio_files)}")

# Create file-to-dominant-class mapping for stratification
file_labels = {}
for audio_file in audio_files:
    subset = df[df['source_audio'] == audio_file]
    dominant_class = subset['label_category'].mode()[0] if len(subset) > 0 else 'unknown'
    file_labels[audio_file] = dominant_class

# Split files (70% train, 15% val, 15% test)
try:
    train_files, test_files = train_test_split(
        audio_files, test_size=0.15, stratify=[file_labels[f] for f in audio_files], random_state=42
    )
    train_files, val_files = train_test_split(
        train_files, test_size=0.15/0.85, stratify=[file_labels[f] for f in train_files], random_state=42
    )
except ValueError:
    print("Warning: Stratified split failed, using random split")
    train_files, test_files = train_test_split(audio_files, test_size=0.15, random_state=42)
    train_files, val_files = train_test_split(train_files, test_size=0.15/0.85, random_state=42)

# Create dataset splits
train_df = df[df['source_audio'].isin(train_files)]
val_df = df[df['source_audio'].isin(val_files)]
test_df = df[df['source_audio'].isin(test_files)]

print(f"\nSplit statistics:")
print(f"  Train: {len(train_df)} samples from {len(train_files)} files")
print(f"  Val: {len(val_df)} samples from {len(val_files)} files")
print(f"  Test: {len(test_df)} samples from {len(test_files)} files")

# Print class distribution
print("\nClass distribution per split:")
for split_name, split_df in [('Train', train_df), ('Val', val_df), ('Test', test_df)]:
    print(f"\n  {split_name}:")
    for label in ['harmonic', 'dead_note', 'general_note']:
        count = (split_df['label_category'] == label).sum()
        pct = 100 * count / len(split_df)
        print(f"    {label}: {count} ({pct:.1f}%)")

## 8. Create PyTorch Datasets and DataLoaders

In [None]:
# Create datasets
print("Creating PyTorch datasets...")
train_dataset = HarmonicsDataset(train_df)
val_dataset = HarmonicsDataset(val_df)
test_dataset = HarmonicsDataset(test_df)

print(f"Train dataset: {len(train_dataset)} samples")
print(f"Val dataset: {len(val_dataset)} samples")
print(f"Test dataset: {len(test_dataset)} samples")

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

print(f"\nDataLoaders created:")
print(f"  Train batches: {len(train_loader)}")
print(f"  Val batches: {len(val_loader)}")
print(f"  Test batches: {len(test_loader)}")

## 9. Initialize Model, Loss, and Optimizer

In [None]:
# Compute class weights for imbalanced dataset
class_weights = compute_class_weights(train_dataset.labels).to(device)
print(f"Class weights: {class_weights.cpu().numpy()}")

# Create model
print("\nCreating model...")
model = HarmonicsCNN(num_classes=3, dropout=DROPOUT).to(device)
print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")

# Print model architecture
print("\nModel Architecture:")
print(model)

# Loss function with class weights
criterion = nn.CrossEntropyLoss(weight=class_weights)

# Optimizer
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

# Learning rate scheduler
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.5, patience=5, verbose=True
)

print("\nModel, loss, and optimizer initialized successfully!")

## 10. Training Loop

Train the model for the specified number of epochs, tracking metrics and saving the best model.

In [None]:
# Training loop
print("="*60)
print("TRAINING")
print("="*60)

history = {
    'train_loss': [],
    'train_acc': [],
    'val_loss': [],
    'val_acc': []
}

best_val_acc = 0.0
best_epoch = 0

for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    
    # Train
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
    
    # Validate
    val_loss, val_acc, val_preds, val_labels = validate(model, val_loader, criterion, device)
    
    # Update scheduler
    scheduler.step(val_loss)
    
    # Save history
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)
    
    # Compute F1 score
    val_f1 = f1_score(val_labels, val_preds, average='macro')
    
    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
    print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%, Val F1: {val_f1:.4f}")
    
    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_epoch = epoch + 1
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'val_acc': val_acc,
            'val_f1': val_f1,
        }, OUTPUT_DIR / 'best_model.pt')
        print(f"✓ Saved best model (val_acc: {val_acc:.2f}%)")

print(f"\n{'='*60}")
print(f"Training Complete!")
print(f"Best validation accuracy: {best_val_acc:.2f}% at epoch {best_epoch}")
print(f"{'='*60}")

## 11. Plot Training History

In [None]:
# Plot training history
print("Plotting training history...")
plot_training_history(history, OUTPUT_DIR)
plt.show()

## 12. Evaluate on Test Set

Load the best model and evaluate on the held-out test set.

In [None]:
# Test on best model
print("="*60)
print("TESTING")
print("="*60)

# Load best model
checkpoint = torch.load(OUTPUT_DIR / 'best_model.pt')
model.load_state_dict(checkpoint['model_state_dict'])
print(f"Loaded best model from epoch {checkpoint['epoch'] + 1}")

# Evaluate on test set
test_loss, test_acc, test_preds, test_labels = validate(model, test_loader, criterion, device)
test_f1 = f1_score(test_labels, test_preds, average='macro')

print(f"\n{'='*60}")
print(f"Test Results:")
print(f"  Test Loss: {test_loss:.4f}")
print(f"  Test Accuracy: {test_acc:.2f}%")
print(f"  Test F1 Score (macro): {test_f1:.4f}")
print(f"{'='*60}")

## 13. Generate Classification Report and Confusion Matrix

In [None]:
# Classification report
print("Classification Report:")
print("="*60)
print(classification_report(test_labels, test_preds, 
                          target_names=['harmonic', 'dead_note', 'general_note']))

# Plot confusion matrix
print("\nPlotting confusion matrix...")
plot_confusion_matrix(test_labels, test_preds, OUTPUT_DIR)
plt.show()

## 14. Save Results

Save all training results and configuration to a JSON file for future reference.

In [None]:
# Save results to JSON
results = {
    'test_accuracy': float(test_acc),
    'test_f1_macro': float(test_f1),
    'test_loss': float(test_loss),
    'best_val_accuracy': float(best_val_acc),
    'best_epoch': int(best_epoch),
    'total_epochs': EPOCHS,
    'batch_size': BATCH_SIZE,
    'learning_rate': LEARNING_RATE,
    'dropout': DROPOUT,
    'n_samples_per_class': N_SAMPLES_PER_CLASS,
}

results_path = OUTPUT_DIR / 'results.json'
with open(results_path, 'w') as f:
    json.dump(results, f, indent=2)

print(f"✓ Results saved to {results_path}")
print("\nFinal Results Summary:")
print(json.dumps(results, indent=2))
print("\n" + "="*60)
print("TRAINING COMPLETE!")
print("="*60)