# Training Notebook

Train the CatMeowCNN model on preprocessed data.


In [None]:
import sys
from pathlib import Path

# Add src to path
sys.path.insert(0, str(Path("..").resolve()))
sys.path.insert(0, str(Path("../src").resolve()))

from src.data_loader import load_train_data, load_test_data, get_train_val_loaders, get_test_loader
from src.train import train, cross_validate
from src.test import test
from src.transforms import SpecAugment, Compose, RandomApply, AddNoise
from models import CatMeowCNN
import matplotlib.pyplot as plt
import torch



## 1. Load Data


In [None]:
DATA_DIR = Path("../data/interim")
MODEL_PATH = Path("../results/cat_meow.pt")

# Ensure results dir exists
MODEL_PATH.parent.mkdir(parents=True, exist_ok=True)

# Load train and test separately
X_train, y_train = load_train_data(DATA_DIR)
X_test, y_test = load_test_data(DATA_DIR)

print(f"Train: X={X_train.shape}, y={y_train.shape}")
print(f"Test:  X={X_test.shape}, y={y_test.shape}")
print(f"Classes: {len(set(y_train))}")


In [None]:
# Augmentation for training
train_transform = Compose([
    SpecAugment(freq_mask_param=15, time_mask_param=25),
    RandomApply(AddNoise(noise_level=0.005), p=0.3),
])

# Split training data into train/val
train_loader, val_loader = get_train_val_loaders(
    X_train, y_train, batch_size=16, train_transform=train_transform
)
print(f"Train: {len(train_loader.dataset)} samples (with augmentation)")
print(f"Val: {len(val_loader.dataset)} samples")
print(f"Test: {len(X_test)} samples (held out)")


## 2. Create Model


In [None]:
n_classes = len(set(y_train))
model = CatMeowCNN(n_classes=n_classes)

n_params = sum(p.numel() for p in model.parameters())
print(f"Model: CatMeowCNN")
print(f"Parameters: {n_params:,}")


## 3. Train


In [None]:
# Training is done in "5. Evaluate" cell below


## 5. Evaluate


In [None]:
# Train a final model and evaluate on test set
model = CatMeowCNN(n_classes=n_classes)

# Get fresh loaders
train_loader, val_loader = get_train_val_loaders(
    X_train, y_train, batch_size=16, train_transform=train_transform
)

history = train(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    epochs=100,
    learning_rate=0.001,
    save_path=str(MODEL_PATH),
    patience=40,
)

# Load best model and evaluate on held-out test data
model.load_state_dict(torch.load(MODEL_PATH, weights_only=True))
test_loader = get_test_loader(X_test, y_test)
results = test(model, test_loader)


## 6. Cross-Validation

Run k-fold cross-validation for a more reliable accuracy estimate with limited data.


In [None]:
# Run 5-fold cross-validation on training data
cv_results = cross_validate(
    model_class=CatMeowCNN,
    X=X_train,
    y=y_train,
    n_splits=5,
    epochs=100,
    learning_rate=0.001,
    batch_size=16,
    patience=40,
    n_classes=n_classes,  # passed to CatMeowCNN
)


In [None]:
# Visualize CV results
fig, ax = plt.subplots(figsize=(8, 4))
folds = [r["fold"] for r in cv_results["fold_results"]]
accs = cv_results["accuracies"]

ax.bar(folds, accs, color='steelblue', edgecolor='white')
ax.axhline(cv_results["mean_acc"], color='red', linestyle='--', 
           label=f'Mean: {cv_results["mean_acc"]:.3f} ± {cv_results["std_acc"]:.3f}')
ax.set_xlabel("Fold")
ax.set_ylabel("Validation Accuracy")
ax.set_title("5-Fold Cross-Validation Results")
ax.set_ylim(0, 1)
ax.legend()
plt.tight_layout()
plt.show()

print(f"\nCV Accuracy: {cv_results['mean_acc']:.1%} ± {cv_results['std_acc']:.1%}")


In [None]:
# Plot training curves for each fold
n_folds = len(cv_results["fold_results"])
fig, axes = plt.subplots(n_folds, 2, figsize=(12, 3 * n_folds))

for i, fold_result in enumerate(cv_results["fold_results"]):
    fold_history = fold_result["history"]
    fold_num = fold_result["fold"]
    
    # Loss
    axes[i, 0].plot(fold_history["train_loss"], label="Train")
    axes[i, 0].plot(fold_history["val_loss"], label="Val")
    axes[i, 0].set_xlabel("Epoch")
    axes[i, 0].set_ylabel("Loss")
    axes[i, 0].set_title(f"Fold {fold_num} - Loss")
    axes[i, 0].legend()
    
    # Accuracy
    axes[i, 1].plot(fold_history["train_acc"], label="Train")
    axes[i, 1].plot(fold_history["val_acc"], label="Val")
    axes[i, 1].set_xlabel("Epoch")
    axes[i, 1].set_ylabel("Accuracy")
    axes[i, 1].set_title(f"Fold {fold_num} - Accuracy (Best: {fold_result['best_val_acc']:.3f})")
    axes[i, 1].legend()

plt.tight_layout()
plt.show()


## 8. Save Results for Diagnostics


In [None]:
# Save all training results for diagnostics notebook
import pickle

RESULTS_DIR = Path("../results")
RESULTS_DIR.mkdir(parents=True, exist_ok=True)

# Bundle all results
training_results = {
    "history": history,
    "cv_results": cv_results,
    "test_results": results,
    "n_classes": n_classes,
}

with open(RESULTS_DIR / "training_results.pkl", "wb") as f:
    pickle.dump(training_results, f)

print(f"Saved training results to {RESULTS_DIR / 'training_results.pkl'}")
print(f"  - Training history: {len(history['train_loss'])} epochs")
print(f"  - CV results: {len(cv_results['fold_results'])} folds")
print(f"  - Test accuracy: {results['accuracy']:.1%}")
