In [12]:
# Model Trainer
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import numpy as np
import os
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
from typing import Dict, List, Tuple, Any

# Utils
import os
import json
from datetime import datetime
import numpy as np

In [2]:
%pip install pandas

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Collecting pandas
  Downloading pandas-2.2.3-cp39-cp39-win_amd64.whl.metadata (19 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2025.1-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2025.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pandas-2.2.3-cp39-cp39-win_amd64.whl (11.6 MB)
   ---------------------------------------- 0.0/11.6 MB ? eta -:--:--
    --------------------------------------- 0.3/11.6 MB ? eta -:--:--
   ----- ---------------------------------- 1.6/11.6 MB 5.6 MB/s eta 0:00:02
   --------- ------------------------------ 2.9/11.6 MB 5.8 MB/s eta 0:00:02
   -------------- ------------------------- 4.2/11.6 MB 6.0 MB/s eta 0:00:02
   ------------------ --------------------- 5.5/11.6 MB 6.1 MB/s eta 0:00:02
   ----------------------- ---------------- 6.8/11.6 MB 6.2 MB/s eta 0:00:01
   ------------------------- -------------- 7.3/

In [3]:
%pip install seaborn

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.comNote: you may need to restart the kernel to use updated packages.

Collecting seaborn
  Downloading seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Downloading seaborn-0.13.2-py3-none-any.whl (294 kB)
Installing collected packages: seaborn
Successfully installed seaborn-0.13.2


In [13]:
class ModelTrainer:
    def __init__(
        self,
        models_dict: Dict[str, nn.Module],
        train_loader: DataLoader,
        test_loader: DataLoader,
        criterion: nn.Module,
        num_epochs: int = 10,
        device: str = None
    ):
        """
        Initialize the model trainer with multiple models and dataset loaders.

        Args:
            models_dict: Dictionary of model names and their instances
            train_loader: Training data loader
            test_loader: Test data loader
            criterion: Loss function
            num_epochs: Number of training epochs
            device: Device to run training on (will auto-detect if None)
        """
        self.device = device or ('cpu')
        self.models = {name: model.to(self.device) for name, model in models_dict.items()}
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.criterion = criterion
        self.num_epochs = num_epochs
        self.results = {}

    def _validate_input_batch(self, inputs: torch.Tensor, labels: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        """Validate and prepare input batch for training/testing."""
        if not isinstance(inputs, torch.Tensor):
            raise TypeError(f"Expected inputs to be torch.Tensor, got {type(inputs)}")
        if not isinstance(labels, torch.Tensor):
            raise TypeError(f"Expected labels to be torch.Tensor, got {type(labels)}")

        return inputs.to(self.device), labels.to(self.device)

    def train_model(self, model_name: str, learning_rate: float = 0.001) -> Dict[str, Any]:
        """
        Train a single model and track its performance metrics.

        Args:
            model_name: Name of the model to train
            learning_rate: Learning rate for optimization

        Returns:
            Dictionary containing training history
        """
        if model_name not in self.models:
            raise ValueError(f"Model {model_name} not found in initialized models")

        model = self.models[model_name]
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        history = {
            'train_loss': [],
            'train_acc': [],
            'val_loss': [],
            'val_acc': [],
            'test_metrics': None
        }

        try:
            for epoch in range(self.num_epochs):
                # Training phase
                model.train()
                train_loss = 0
                correct = 0
                total = 0

                for batch_idx, (inputs, labels) in enumerate(self.train_loader):
                    try:
                        inputs, labels = self._validate_input_batch(inputs, labels)

                        optimizer.zero_grad()
                        outputs = model(inputs)
                        loss = self.criterion(outputs, labels)
                        loss.backward()
                        optimizer.step()

                        train_loss += loss.item()
                        _, predicted = outputs.max(1)
                        total += labels.size(0)
                        correct += predicted.eq(labels).sum().item()

                    except RuntimeError as e:
                        print(f"Error in batch {batch_idx}: {str(e)}")
                        continue

                train_acc = 100. * correct / total
                train_loss = train_loss / len(self.train_loader)

                # Validation phase
                val_loss, val_acc = self._validate_epoch(model)

                # Save metrics
                history['train_loss'].append(train_loss)
                history['train_acc'].append(train_acc)
                history['val_loss'].append(val_loss)
                history['val_acc'].append(val_acc)

                print(f'Epoch [{epoch+1}/{self.num_epochs}] - '
                      f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, '
                      f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%')

        except Exception as e:
            print(f"Training interrupted for {model_name}: {str(e)}")
            raise

        # Final test phase
        history['test_metrics'] = self.test(model)
        self.results[model_name] = history
        return history

    def _validate_epoch(self, model: nn.Module) -> Tuple[float, float]:
        """Run validation for one epoch."""
        model.eval()
        val_loss = 0
        correct = 0
        total = 0

        with torch.no_grad():
            for inputs, labels in self.test_loader:
                inputs, labels = self._validate_input_batch(inputs, labels)
                outputs = model(inputs)
                loss = self.criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = outputs.max(1)
                total += labels.size(0)
                correct += predicted.eq(labels).sum().item()

        return val_loss / len(self.test_loader), 100. * correct / total

    def test(self, model: nn.Module) -> Dict[str, Any]:
        """
        Test model and return metrics

        Returns:
            Dictionary containing test metrics
        """
        model.eval()
        test_loss = 0
        correct = 0
        total = 0
        predictions = []
        targets = []

        with torch.no_grad():
            for batch_idx, (inputs, labels) in enumerate(self.test_loader):
                try:
                    inputs, labels = self._validate_input_batch(inputs, labels)

                    outputs = model(inputs)
                    loss = self.criterion(outputs, labels)
                    test_loss += loss.item()

                    _, predicted = outputs.max(1)  # Multi-class case
                    predictions.append(predicted.cpu().numpy())
                    targets.append(labels.cpu().numpy())

                    total += labels.size(0)
                    correct += predicted.eq(labels).sum().item()

                except RuntimeError as e:
                    print(f"Error in batch {batch_idx}: {str(e)}")
                    continue

        # Flatten predictions and targets for metrics
        all_predictions = np.concatenate(predictions)
        all_targets = np.concatenate(targets)

        test_acc = 100. * correct / total
        return {
            'test_loss': test_loss / len(self.test_loader),
            'test_accuracy': test_acc,
            'predictions': all_predictions,
            'targets': all_targets
        }

    def train_all_models(self) -> None:
        """Train all models and save results"""
        for model_name in self.models:
            try:
                self.train_model(model_name)
            except Exception as e:
                print(f"Failed to train {model_name}: {str(e)}")
                continue

        self.save_results()
        self.generate_reports()

    def save_results(self) -> None:
        """Save training results to CSV"""
        results_df = pd.DataFrame()

        for model_name, history in self.results.items():
            model_df = pd.DataFrame({
                'epoch': range(1, self.num_epochs + 1),
                'model': model_name,
                'train_loss': history['train_loss'],
                'train_acc': history['train_acc'],
                'val_loss': history['val_loss'],
                'val_acc': history['val_acc']
            })
            results_df = pd.concat([results_df, model_df])

        os.makedirs('results', exist_ok=True)
        results_df.to_csv('results/training_results.csv', index=False)

    def generate_reports(self) -> None:
        """Generate and save visualization plots"""
        os.makedirs('plots', exist_ok=True)

        self._plot_training_curves()
        self._plot_confusion_matrices()
        self._plot_model_comparison()
        self._analyze_fitting()

    def _plot_training_curves(self) -> None:
        """Plot training and validation curves for all models"""
        plt.figure(figsize=(15, 10))

        for model_name, history in self.results.items():
            epochs = range(1, self.num_epochs + 1)

            # Loss subplot
            plt.subplot(2, 1, 1)
            plt.plot(epochs, history['train_loss'], '-o', label=f'{model_name} (train)')
            plt.plot(epochs, history['val_loss'], '--o', label=f'{model_name} (val)')
            plt.title('Model Loss')
            plt.xlabel('Epoch')
            plt.ylabel('Loss')
            plt.legend()

            # Accuracy subplot
            plt.subplot(2, 1, 2)
            plt.plot(epochs, history['train_acc'], '-o', label=f'{model_name} (train)')
            plt.plot(epochs, history['val_acc'], '--o', label=f'{model_name} (val)')
            plt.title('Model Accuracy')
            plt.xlabel('Epoch')
            plt.ylabel('Accuracy (%)')
            plt.legend()

        plt.tight_layout()
        plt.savefig('plots/training_curves.png')
        plt.close()

    def _plot_confusion_matrices(self) -> None:
        """Plot confusion matrices for all models"""
        for model_name, history in self.results.items():
            metrics = history['test_metrics']
            cm = confusion_matrix(metrics['targets'], metrics['predictions'])

            plt.figure(figsize=(10, 8))
            sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
            plt.title(f'Confusion Matrix - {model_name}')
            plt.xlabel('Predicted')
            plt.ylabel('True')
            plt.savefig(f'plots/confusion_matrix_{model_name}.png')
            plt.close()

    def _plot_model_comparison(self) -> None:
        """Plot final test accuracy comparison"""
        model_names = list(self.results.keys())
        test_accuracies = [self.results[model]['test_metrics']['test_accuracy']
                          for model in model_names]

        plt.figure(figsize=(12, 6))
        bars = plt.bar(model_names, test_accuracies)
        plt.title('Model Comparison - Test Accuracy')
        plt.xlabel('Model')
        plt.ylabel('Test Accuracy (%)')
        plt.xticks(rotation=45)

        # Add value labels on top of each bar
        for bar in bars:
            height = bar.get_height()
            plt.text(bar.get_x() + bar.get_width()/2., height,
                    f'{height:.1f}%',
                    ha='center', va='bottom')

        plt.tight_layout()
        plt.savefig('plots/model_comparison.png')
        plt.close()

    def _analyze_fitting(self) -> None:
        """Analyze and report fitting status for each model"""
        fitting_analysis = {}

        for model_name, history in self.results.items():
            train_loss = history['train_loss']
            val_loss = history['val_loss']

            # Calculate metrics for fitting analysis
            final_train_loss = train_loss[-1]
            final_val_loss = val_loss[-1]
            loss_gap = final_val_loss - final_train_loss

            # Determine fitting status
            if final_train_loss > 0.1 and final_val_loss > 0.1:
                status = "Underfitting"
            elif loss_gap > 0.1:
                status = "Overfitting"
            else:
                status =  "Fit"

            fitting_analysis[model_name] = {
                'status': status,
                'final_train_loss': final_train_loss,
                'final_val_loss': final_val_loss,
                'loss_gap': loss_gap,
                'recommended_action': self._get_fitting_recommendation(status)
            }

        with open('results/fitting_analysis.json', 'w') as f:
            json.dump(fitting_analysis, f, indent=4)

    def _get_fitting_recommendation(self, status: str) -> str:
        """Get recommendation based on fitting status"""
        recommendations = {
            "Underfitting": "Consider increasing model capacity or training longer",
            "Overfitting": "Consider adding regularization or reducing model capacity",
            "Fit": "Model is well-balanced, continue monitoring performance"
        }
        return recommendations.get(status, "Unknown fitting status")

In [14]:
from torchvision import transforms, models, datasets

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Load train dataset
train_dataset = datasets.ImageFolder('train', transform=transform)

# Load test dataset
test_dataset = datasets.ImageFolder('test', transform=transform)


FileNotFoundError: [WinError 3] The system cannot find the path specified: 'train'

In [10]:
import torch
import torch.nn as nn
import torchvision.models as models

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

num_labels = len(train_dataset.classes)
print(num_labels)


def EfficientNet():
    model = models.efficientnet_b0(pretrained=True)
    model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_labels)
    return model.to(device)

def ResNet50():
    model = models.resnet50(pretrained=True)
    model.fc = nn.Linear(model.fc.in_features, num_labels)
    return model.to(device)

def VGG16():
    model = models.vgg16(pretrained=True)
    model.classifier[6] = nn.Linear(model.classifier[6].in_features, num_labels)
    return model.to(device)

def MobileNet():
    model = models.mobilenet_v2(pretrained=True)
    model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_labels)
    return model.to(device)

NameError: name 'train_dataset' is not defined

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
models = {
    "EfficientNet": EfficientNet(),
    "ResNet50": ResNet50(),
    "VGG16": VGG16(),
    "MobileNet": MobileNet()
}

criterion = nn.CrossEntropyLoss()

In [None]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

trainer = ModelTrainer(models, train_loader, test_loader, criterion, num_epochs=3)
trainer.train_all_models()

In [None]:
trainer.test(models['EfficientNet'])

In [None]:
trainer.test(models['ResNet50'])

In [None]:
trainer.test(models['VGG16'])

In [None]:
trainer.test(models['MobileNet'])

In [None]:
trainer._plot_training_curves()

In [None]:
trainer.save_results()

In [None]:
trainer.generate_reports()