In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from sklearn.metrics import accuracy_score, confusion_matrix
import time

In [None]:


# ----------------------------
# Model Architecture Components
# ----------------------------

class GlobalFeatureExtractor(nn.Module):
    """
    Extracts global features from the input image.
    """
    def __init__(self, in_channels, out_channels):
        super(GlobalFeatureExtractor, self).__init__()
        self.global_layers = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),  # [B, out_channels, H, W]
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),  # [B, out_channels, H/2, W/2]
            nn.Conv2d(out_channels, out_channels * 2, kernel_size=3, padding=1),  # [B, out_channels*2, H/2, W/2]
            nn.BatchNorm2d(out_channels * 2),
            nn.ReLU(inplace=True),
            nn.AdaptiveAvgPool2d((1,1))  # [B, out_channels*2, 1, 1]
        )

    def forward(self, x):
        x = self.global_layers(x)
        x = x.view(x.size(0), -1)  # [B, out_channels*2]
        return x  # Shape: [batch_size, out_channels*2]

class LocalFeatureExtractor(nn.Module):
    """
    Extracts local features from the input image.
    """
    def __init__(self, in_channels, out_channels):
        super(LocalFeatureExtractor, self).__init__()
        self.local_layers = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),  # [B, out_channels, H, W]
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels * 2, kernel_size=3, padding=1),  # [B, out_channels*2, H, W]
            nn.BatchNorm2d(out_channels * 2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2)  # [B, out_channels*2, H/2, W/2]
        )

    def forward(self, x):
        x = self.local_layers(x)
        return x  # Shape: [batch_size, out_channels*2, H/2, W/2]

class AdaptiveAttentionModule(nn.Module):
    """
    Adaptive Attention Module that weights global and local features.
    """
    def __init__(self, global_dim, local_dim):
        super(AdaptiveAttentionModule, self).__init__()
        self.attention = nn.Sequential(
            nn.Linear(global_dim + local_dim, 128),
            nn.ReLU(inplace=True),
            nn.Linear(128, 2),  # Outputs weights for global and local features
            nn.Softmax(dim=1)
        )

    def forward(self, global_feat, local_feat):
        # global_feat: [B, global_dim]
        # local_feat: [B, local_dim, H, W]
        local_feat_mean = torch.mean(local_feat, dim=[2,3])  # [B, local_dim]
        combined = torch.cat((global_feat, local_feat_mean), dim=1)  # [B, global_dim + local_dim]
        weights = self.attention(combined)  # [B, 2]
        global_weight = weights[:,0].unsqueeze(1)  # [B, 1]
        local_weight = weights[:,1].unsqueeze(1)   # [B, 1]
        return global_weight, local_weight  # Each [B, 1]

class DynamicHolisticPerceptionNetwork(nn.Module):
    """
    Dynamic Holistic Perception Network combining global and local features.
    """
    def __init__(self, num_classes=10):
        super(DynamicHolisticPerceptionNetwork, self).__init__()
        # Initial Convolutional Layers
        self.initial_conv = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),  # [B, 64, 32, 32] for CIFAR-10
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),  # [B, 64, 16, 16]
            nn.Conv2d(64, 64, kernel_size=3, padding=1),  # [B, 64, 16, 16]
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2)   # [B, 64, 8, 8]
        )

        # Feature Extractors
        self.global_extractor = GlobalFeatureExtractor(in_channels=64, out_channels=128)  # Output: [B, 256]
        self.local_extractor = LocalFeatureExtractor(in_channels=64, out_channels=128)    # Output: [B, 256, 4, 4]

        # Adaptive Attention Module
        self.adaptive_attention = AdaptiveAttentionModule(global_dim=256, local_dim=256)

        # Fusion and Classification Layers
        self.fusion_fc = nn.Sequential(
            nn.Linear(256 + 256, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.initial_conv(x)  # [B, 64, 8, 8]
        global_feat = self.global_extractor(x)  # [B, 256]
        local_feat = self.local_extractor(x)    # [B, 256, 4, 4]
        global_weight, local_weight = self.adaptive_attention(global_feat, local_feat)  # Each [B,1]

        # Weight the features
        global_feat_weighted = global_feat * global_weight  # [B, 256]
        local_feat_pooled = F.adaptive_avg_pool2d(local_feat, (1,1)).view(local_feat.size(0), -1)  # [B, 256]
        local_feat_weighted = local_feat_pooled * local_weight  # [B, 256]

        # Concatenate weighted features
        fused_feat = torch.cat((global_feat_weighted, local_feat_weighted), dim=1)  # [B, 512]

        # Classification
        out = self.fusion_fc(fused_feat)  # [B, num_classes]
        return out

# ----------------------------
# Evaluation Function
# ----------------------------

def evaluate_model(model, dataloader, device):
    """
    Evaluates the model on the given dataloader.
    Returns accuracy and confusion matrix.
    """
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)  # [B, num_classes]
            _, preds = torch.max(outputs, 1)  # [B]
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    acc = accuracy_score(all_labels, all_preds) * 100
    cm = confusion_matrix(all_labels, all_preds)
    return acc, cm

# ----------------------------
# Main Training and Evaluation Pipeline
# ----------------------------

def main():
    # ----------------------------
    # Device Configuration
    # ----------------------------
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # ----------------------------
    # Hyperparameters
    # ----------------------------
    num_epochs = 20
    batch_size = 128
    learning_rate = 1e-3
    num_classes = 10  # For CIFAR-10

    # ----------------------------
    # Data Transformations
    # ----------------------------
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    # ----------------------------
    # Load Datasets
    # ----------------------------
    trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                            download=True, transform=transform_train)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                              shuffle=True, num_workers=2)

    testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                           download=True, transform=transform_test)
    testloader = torch.utils.data.DataLoader(testset, batch_size=100,
                                             shuffle=False, num_workers=2)

    # ----------------------------
    # Initialize Model, Loss, Optimizer
    # ----------------------------
    model = DynamicHolisticPerceptionNetwork(num_classes=num_classes).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

    # ----------------------------
    # Training Loop
    # ----------------------------
    best_val_acc = 0.0
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        start_time = time.time()

        for inputs, labels in trainloader:
            inputs, labels = inputs.to(device), labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)  # [B, num_classes]
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

            # Statistics
            running_loss += loss.item() * inputs.size(0)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

        epoch_loss = running_loss / total
        epoch_acc = 100. * correct / total
        elapsed_time = time.time() - start_time

        # Validation
        val_acc, val_cm = evaluate_model(model, testloader, device)

        # Scheduler step
        scheduler.step()

        print(f"Epoch [{epoch+1}/{num_epochs}] - "
              f"Loss: {epoch_loss:.4f} - "
              f"Train Acc: {epoch_acc:.2f}% - "
              f"Val Acc: {val_acc:.2f}% - "
              f"Time: {elapsed_time:.2f}s")

        # Save the best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), 'best_model.pth')
            print(f"Best model saved with Val Acc: {best_val_acc:.2f}%")

    # ----------------------------
    # Final Evaluation
    # ----------------------------
    print("\nTraining Completed!")
    print(f"Best Validation Accuracy: {best_val_acc:.2f}%")

    # Load the best model for final evaluation
    model.load_state_dict(torch.load('best_model.pth'))
    final_acc, final_cm = evaluate_model(model, testloader, device)
    print(f"\nFinal Test Accuracy: {final_acc:.2f}%")
    print("Confusion Matrix:")
    print(final_cm)

if __name__ == '__main__':
    main()


Using device: cuda
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:05<00:00, 30.8MB/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Epoch [1/20] - Loss: 1.4444 - Train Acc: 46.85% - Val Acc: 58.94% - Time: 30.37s
Best model saved with Val Acc: 58.94%
Epoch [2/20] - Loss: 1.0599 - Train Acc: 62.18% - Val Acc: 62.29% - Time: 24.17s
Best model saved with Val Acc: 62.29%
Epoch [3/20] - Loss: 0.8979 - Train Acc: 68.34% - Val Acc: 67.59% - Time: 21.73s
Best model saved with Val Acc: 67.59%
Epoch [4/20] - Loss: 0.7947 - Train Acc: 72.15% - Val Acc: 67.43% - Time: 21.18s
Epoch [5/20] - Loss: 0.7330 - Train Acc: 74.32% - Val Acc: 70.59% - Time: 23.07s
Best model saved with Val Acc: 70.59%
Epoch [6/20] - Loss: 0.6797 - Train Acc: 76.48% - Val Acc: 74.75% - Time: 21.17s
Best model saved with Val Acc: 74.75%
Epoch [7/20] - Loss: 0.6388 - Train Acc: 77.88% - Val Acc: 74.43% - Time: 21.11s
Epoch [8/20] - Loss: 0.6095 - Train Acc: 79.06% - Val Acc: 75.38% - Time: 22.56s
Best model saved with Val Acc: 75.38%
Epoch [9/20] - Loss: 0.5775 - Train

  model.load_state_dict(torch.load('best_model.pth'))



Final Test Accuracy: 84.69%
Confusion Matrix:
[[872  14  27  13  13   1   5   9  35  11]
 [  6 949   1   2   1   0   1   3   8  29]
 [ 33   1 807  25  58  23  31  13   5   4]
 [ 14   2  64 650  61 122  37  29  15   6]
 [  9   2  42  21 868  13  20  22   2   1]
 [  9   1  40 122  38 739  10  38   1   2]
 [  6   1  49  28  13   9 889   2   2   1]
 [ 10   2  18  21  38  34   3 871   1   2]
 [ 33  10   2   6   3   0   3   4 931   8]
 [ 18  58   5   4   0   0   4   2  16 893]]


In [None]:


# ----------------------------
# Model Architecture Components
# ----------------------------

class GlobalFeatureExtractor(nn.Module):
    """
    Extracts global features from the input image.
    """
    def __init__(self, in_channels, out_channels):
        super(GlobalFeatureExtractor, self).__init__()
        self.global_layers = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),  # [B, out_channels, H, W]
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),  # [B, out_channels, H/2, W/2]
            nn.Conv2d(out_channels, out_channels * 2, kernel_size=3, padding=1),  # [B, out_channels*2, H/2, W/2]
            nn.BatchNorm2d(out_channels * 2),
            nn.ReLU(inplace=True),
            nn.AdaptiveAvgPool2d((1,1))  # [B, out_channels*2, 1, 1]
        )

    def forward(self, x):
        x = self.global_layers(x)
        x = x.view(x.size(0), -1)  # [B, out_channels*2]
        return x  # Shape: [batch_size, out_channels*2]

class LocalFeatureExtractor(nn.Module):
    """
    Extracts local features from the input image.
    """
    def __init__(self, in_channels, out_channels):
        super(LocalFeatureExtractor, self).__init__()
        self.local_layers = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),  # [B, out_channels, H, W]
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels * 2, kernel_size=3, padding=1),  # [B, out_channels*2, H, W]
            nn.BatchNorm2d(out_channels * 2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2)  # [B, out_channels*2, H/2, W/2]
        )

    def forward(self, x):
        x = self.local_layers(x)
        return x  # Shape: [batch_size, out_channels*2, H/2, W/2]

class AdaptiveAttentionModule(nn.Module):
    """
    Adaptive Attention Module that weights global and local features.
    """
    def __init__(self, global_dim, local_dim):
        super(AdaptiveAttentionModule, self).__init__()
        self.attention = nn.Sequential(
            nn.Linear(global_dim + local_dim, 128),
            nn.ReLU(inplace=True),
            nn.Linear(128, 2),  # Outputs weights for global and local features
            nn.Softmax(dim=1)
        )

    def forward(self, global_feat, local_feat):
        # global_feat: [B, global_dim]
        # local_feat: [B, local_dim, H, W]
        local_feat_mean = torch.mean(local_feat, dim=[2,3])  # [B, local_dim]
        combined = torch.cat((global_feat, local_feat_mean), dim=1)  # [B, global_dim + local_dim]
        weights = self.attention(combined)  # [B, 2]
        global_weight = weights[:,0].unsqueeze(1)  # [B, 1]
        local_weight = weights[:,1].unsqueeze(1)   # [B, 1]
        return global_weight, local_weight  # Each [B, 1]

class DynamicHolisticPerceptionNetwork(nn.Module):
    """
    Dynamic Holistic Perception Network combining global and local features.
    """
    def __init__(self, num_classes=10):
        super(DynamicHolisticPerceptionNetwork, self).__init__()
        # Initial Convolutional Layers
        self.initial_conv = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),  # [B, 64, 32, 32] for CIFAR-10
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),  # [B, 64, 16, 16]
            nn.Conv2d(64, 64, kernel_size=3, padding=1),  # [B, 64, 16, 16]
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2)   # [B, 64, 8, 8]
        )

        # Feature Extractors
        self.global_extractor = GlobalFeatureExtractor(in_channels=64, out_channels=128)  # Output: [B, 256]
        self.local_extractor = LocalFeatureExtractor(in_channels=64, out_channels=128)    # Output: [B, 256, 4, 4]

        # Adaptive Attention Module
        self.adaptive_attention = AdaptiveAttentionModule(global_dim=256, local_dim=256)

        # Fusion and Classification Layers
        self.fusion_fc = nn.Sequential(
            nn.Linear(256 + 256, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.initial_conv(x)  # [B, 64, 8, 8]
        global_feat = self.global_extractor(x)  # [B, 256]
        local_feat = self.local_extractor(x)    # [B, 256, 4, 4]
        global_weight, local_weight = self.adaptive_attention(global_feat, local_feat)  # Each [B,1]

        # Weight the features
        global_feat_weighted = global_feat * global_weight  # [B, 256]
        local_feat_pooled = F.adaptive_avg_pool2d(local_feat, (1,1)).view(local_feat.size(0), -1)  # [B, 256]
        local_feat_weighted = local_feat_pooled * local_weight  # [B, 256]

        # Concatenate weighted features
        fused_feat = torch.cat((global_feat_weighted, local_feat_weighted), dim=1)  # [B, 512]

        # Classification
        out = self.fusion_fc(fused_feat)  # [B, num_classes]
        return out

# ----------------------------
# Evaluation Function
# ----------------------------

def evaluate_model(model, dataloader, device):
    """
    Evaluates the model on the given dataloader.
    Returns accuracy and confusion matrix.
    """
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)  # [B, num_classes]
            _, preds = torch.max(outputs, 1)  # [B]
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    acc = accuracy_score(all_labels, all_preds) * 100
    cm = confusion_matrix(all_labels, all_preds)
    return acc, cm

# ----------------------------
# Main Training and Evaluation Pipeline
# ----------------------------

def main():
    # ----------------------------
    # Device Configuration
    # ----------------------------
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # ----------------------------
    # Hyperparameters
    # ----------------------------
    num_epochs = 100
    batch_size = 128
    learning_rate = 1e-3
    num_classes = 10  # For CIFAR-10

    # ----------------------------
    # Data Transformations
    # ----------------------------
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    # ----------------------------
    # Load Datasets
    # ----------------------------
    trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                            download=True, transform=transform_train)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                              shuffle=True, num_workers=2)

    testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                           download=True, transform=transform_test)
    testloader = torch.utils.data.DataLoader(testset, batch_size=100,
                                             shuffle=False, num_workers=2)

    # ----------------------------
    # Initialize Model, Loss, Optimizer
    # ----------------------------
    model = DynamicHolisticPerceptionNetwork(num_classes=num_classes).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

    # ----------------------------
    # Training Loop
    # ----------------------------
    best_val_acc = 0.0
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        start_time = time.time()

        for inputs, labels in trainloader:
            inputs, labels = inputs.to(device), labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)  # [B, num_classes]
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

            # Statistics
            running_loss += loss.item() * inputs.size(0)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

        epoch_loss = running_loss / total
        epoch_acc = 100. * correct / total
        elapsed_time = time.time() - start_time

        # Validation
        val_acc, val_cm = evaluate_model(model, testloader, device)

        # Scheduler step
        scheduler.step()

        print(f"Epoch [{epoch+1}/{num_epochs}] - "
              f"Loss: {epoch_loss:.4f} - "
              f"Train Acc: {epoch_acc:.2f}% - "
              f"Val Acc: {val_acc:.2f}% - "
              f"Time: {elapsed_time:.2f}s")

        # Save the best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), 'best_model.pth')
            print(f"Best model saved with Val Acc: {best_val_acc:.2f}%")

    # ----------------------------
    # Final Evaluation
    # ----------------------------
    print("\nTraining Completed!")
    print(f"Best Validation Accuracy: {best_val_acc:.2f}%")

    # Load the best model for final evaluation
    model.load_state_dict(torch.load('best_model.pth'))
    final_acc, final_cm = evaluate_model(model, testloader, device)
    print(f"\nFinal Test Accuracy: {final_acc:.2f}%")
    print("Confusion Matrix:")
    print(final_cm)

if __name__ == '__main__':
    main()


Using device: cuda
Files already downloaded and verified
Files already downloaded and verified
Epoch [1/100] - Loss: 1.4398 - Train Acc: 46.47% - Val Acc: 54.80% - Time: 20.36s
Best model saved with Val Acc: 54.80%
Epoch [2/100] - Loss: 1.0561 - Train Acc: 62.32% - Val Acc: 56.22% - Time: 20.11s
Best model saved with Val Acc: 56.22%
Epoch [3/100] - Loss: 0.9075 - Train Acc: 67.89% - Val Acc: 68.89% - Time: 22.26s
Best model saved with Val Acc: 68.89%
Epoch [4/100] - Loss: 0.8072 - Train Acc: 71.58% - Val Acc: 70.52% - Time: 20.13s
Best model saved with Val Acc: 70.52%
Epoch [5/100] - Loss: 0.7428 - Train Acc: 74.08% - Val Acc: 69.11% - Time: 20.27s
Epoch [6/100] - Loss: 0.6951 - Train Acc: 75.80% - Val Acc: 74.06% - Time: 21.99s
Best model saved with Val Acc: 74.06%
Epoch [7/100] - Loss: 0.6561 - Train Acc: 77.22% - Val Acc: 75.05% - Time: 20.21s
Best model saved with Val Acc: 75.05%
Epoch [8/100] - Loss: 0.6250 - Train Acc: 78.52% - Val Acc: 78.21% - Time: 21.34s
Best model saved with

  model.load_state_dict(torch.load('best_model.pth'))



Final Test Accuracy: 84.24%
Confusion Matrix:
[[866  10  25  10  13   0   5  11  37  23]
 [  7 932   0   4   0   1   2   1  11  42]
 [ 39   3 782  43  48  29  36  12   4   4]
 [ 16   3  41 680  56 117  46  24   8   9]
 [  6   2  34  27 848  22  30  26   3   2]
 [ 11   2  23 125  36 754  11  34   0   4]
 [  6   2  30  40  17  12 882   4   4   3]
 [ 10   1  17  30  29  38   3 864   1   7]
 [ 45  15   1   4   1   2   5   2 914  11]
 [ 17  48   3   8   1   0   2   5  14 902]]


# With ResidualBlock

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from sklearn.metrics import accuracy_score, confusion_matrix
import time

# ----------------------------
# Model Architecture Components with Residual Connections
# ----------------------------

class ResidualBlock(nn.Module):
    """
    A standard residual block with two convolutional layers and a skip connection.
    """
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels,
                               kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)

        self.conv2 = nn.Conv2d(out_channels, out_channels,
                               kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

        # Shortcut connection to match dimensions
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        out += self.shortcut(x)
        out = self.relu(out)
        return out

class GlobalFeatureExtractor(nn.Module):
    """
    Extracts global features from the input image with residual connections.
    """
    def __init__(self, in_channels, out_channels):
        super(GlobalFeatureExtractor, self).__init__()
        self.layer1 = ResidualBlock(in_channels, out_channels, stride=1)
        self.layer2 = ResidualBlock(out_channels, out_channels * 2, stride=2)
        self.pool = nn.AdaptiveAvgPool2d((1,1))

    def forward(self, x):
        x = self.layer1(x)  # [B, out_channels, H, W]
        x = self.layer2(x)  # [B, out_channels*2, H/2, W/2]
        x = self.pool(x)     # [B, out_channels*2, 1, 1]
        x = x.view(x.size(0), -1)  # [B, out_channels*2]
        return x

class LocalFeatureExtractor(nn.Module):
    """
    Extracts local features from the input image with residual connections.
    """
    def __init__(self, in_channels, out_channels):
        super(LocalFeatureExtractor, self).__init__()
        self.layer1 = ResidualBlock(in_channels, out_channels, stride=1)
        self.layer2 = ResidualBlock(out_channels, out_channels * 2, stride=2)
        self.pool = nn.AdaptiveAvgPool2d((2,2))  # Retains some spatial information

    def forward(self, x):
        x = self.layer1(x)  # [B, out_channels, H, W]
        x = self.layer2(x)  # [B, out_channels*2, H/2, W/2]
        x = self.pool(x)     # [B, out_channels*2, 2, 2]
        return x

class AdaptiveAttentionModule(nn.Module):
    """
    Adaptive Attention Module that weights global and local features.
    """
    def __init__(self, global_dim, local_dim):
        super(AdaptiveAttentionModule, self).__init__()
        self.attention = nn.Sequential(
            nn.Linear(global_dim + local_dim, 128),
            nn.ReLU(inplace=True),
            nn.Linear(128, 2),  # Outputs weights for global and local features
            nn.Softmax(dim=1)
        )

    def forward(self, global_feat, local_feat):
        # global_feat: [B, global_dim]
        # local_feat: [B, local_dim, H, W]
        local_feat_mean = torch.mean(local_feat, dim=[2,3])  # [B, local_dim]
        combined = torch.cat((global_feat, local_feat_mean), dim=1)  # [B, global_dim + local_dim]
        weights = self.attention(combined)  # [B, 2]
        global_weight = weights[:,0].unsqueeze(1)  # [B, 1]
        local_weight = weights[:,1].unsqueeze(1)   # [B, 1]
        return global_weight, local_weight  # Each [B, 1]

class DynamicHolisticPerceptionNetwork(nn.Module):
    """
    Dynamic Holistic Perception Network combining global and local features with residual connections.
    """
    def __init__(self, num_classes=10):
        super(DynamicHolisticPerceptionNetwork, self).__init__()
        # Initial Convolutional Layers
        self.initial_conv = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),  # [B, 64, 32, 32] for CIFAR-10
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),  # [B, 64, 16, 16]
            nn.Conv2d(64, 64, kernel_size=3, padding=1),  # [B, 64, 16, 16]
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2)   # [B, 64, 8, 8]
        )

        # Feature Extractors with Residual Blocks
        self.global_extractor = GlobalFeatureExtractor(in_channels=64, out_channels=128)  # Output: [B, 256]
        self.local_extractor = LocalFeatureExtractor(in_channels=64, out_channels=128)    # Output: [B, 256, 2, 2]

        # Adaptive Attention Module
        self.adaptive_attention = AdaptiveAttentionModule(global_dim=256, local_dim=256)

        # Fusion and Classification Layers
        self.fusion_fc = nn.Sequential(
            nn.Linear(256 + 256, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.initial_conv(x)  # [B, 64, 8, 8]
        global_feat = self.global_extractor(x)  # [B, 256]
        local_feat = self.local_extractor(x)    # [B, 256, 2, 2]
        global_weight, local_weight = self.adaptive_attention(global_feat, local_feat)  # Each [B,1]

        # Weight the features
        global_feat_weighted = global_feat * global_weight  # [B, 256]
        local_feat_pooled = F.adaptive_avg_pool2d(local_feat, (1,1)).view(local_feat.size(0), -1)  # [B, 256]
        local_feat_weighted = local_feat_pooled * local_weight  # [B, 256]

        # Concatenate weighted features
        fused_feat = torch.cat((global_feat_weighted, local_feat_weighted), dim=1)  # [B, 512]

        # Classification
        out = self.fusion_fc(fused_feat)  # [B, num_classes]
        return out

# ----------------------------
# Evaluation Function
# ----------------------------

def evaluate_model(model, dataloader, device):
    """
    Evaluates the model on the given dataloader.
    Returns accuracy and confusion matrix.
    """
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)  # [B, num_classes]
            _, preds = torch.max(outputs, 1)  # [B]
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    acc = accuracy_score(all_labels, all_preds) * 100
    cm = confusion_matrix(all_labels, all_preds)
    return acc, cm

# ----------------------------
# Main Training and Evaluation Pipeline
# ----------------------------

def main():
    # ----------------------------
    # Device Configuration
    # ----------------------------
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # ----------------------------
    # Hyperparameters
    # ----------------------------
    num_epochs = 50  # Increased epochs for better convergence
    batch_size = 128
    learning_rate = 1e-3
    num_classes = 10  # For CIFAR-10

    # ----------------------------
    # Data Transformations
    # ----------------------------
    # Removed all data augmentation transforms as per user request
    transform_train = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    # ----------------------------
    # Load Datasets
    # ----------------------------
    trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                            download=True, transform=transform_train)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                              shuffle=True, num_workers=2)

    testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                           download=True, transform=transform_test)
    testloader = torch.utils.data.DataLoader(testset, batch_size=100,
                                             shuffle=False, num_workers=2)

    # ----------------------------
    # Initialize Model, Loss, Optimizer
    # ----------------------------
    model = DynamicHolisticPerceptionNetwork(num_classes=num_classes).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4)  # Added weight decay for regularization
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)  # Adjusted scheduler step size

    # ----------------------------
    # Training Loop
    # ----------------------------
    best_val_acc = 0.0
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        start_time = time.time()

        for inputs, labels in trainloader:
            inputs, labels = inputs.to(device), labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)  # [B, num_classes]
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

            # Statistics
            running_loss += loss.item() * inputs.size(0)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

        epoch_loss = running_loss / total
        epoch_acc = 100. * correct / total
        elapsed_time = time.time() - start_time

        # Validation
        val_acc, val_cm = evaluate_model(model, testloader, device)

        # Scheduler step
        scheduler.step()

        print(f"Epoch [{epoch+1}/{num_epochs}] - "
              f"Loss: {epoch_loss:.4f} - "
              f"Train Acc: {epoch_acc:.2f}% - "
              f"Val Acc: {val_acc:.2f}% - "
              f"Time: {elapsed_time:.2f}s")

        # Save the best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), 'best_model.pth')
            print(f"Best model saved with Val Acc: {best_val_acc:.2f}%")

    # ----------------------------
    # Final Evaluation
    # ----------------------------
    print("\nTraining Completed!")
    print(f"Best Validation Accuracy: {best_val_acc:.2f}%")

    # Load the best model for final evaluation with security warning addressed
    torch_version = torch.__version__
    print(f"PyTorch Version: {torch_version}")

    # Attempt to use weights_only=True if supported
    try:
        state_dict = torch.load('best_model.pth', weights_only=True)
    except TypeError:
        # weights_only not supported; fallback to default loading
        state_dict = torch.load('best_model.pth')
        print("weights_only parameter not supported in this PyTorch version. "
              "Consider upgrading PyTorch for enhanced security.")

    model.load_state_dict(state_dict)

    final_acc, final_cm = evaluate_model(model, testloader, device)
    print(f"\nFinal Test Accuracy: {final_acc:.2f}%")
    print("Confusion Matrix:")
    print(final_cm)

if __name__ == '__main__':
    main()


Using device: cuda
Files already downloaded and verified
Files already downloaded and verified
Epoch [1/50] - Loss: 1.3251 - Train Acc: 51.45% - Val Acc: 62.81% - Time: 25.09s
Best model saved with Val Acc: 62.81%
Epoch [2/50] - Loss: 0.9114 - Train Acc: 67.81% - Val Acc: 69.06% - Time: 16.00s
Best model saved with Val Acc: 69.06%
Epoch [3/50] - Loss: 0.7323 - Train Acc: 74.54% - Val Acc: 73.65% - Time: 16.32s
Best model saved with Val Acc: 73.65%
Epoch [4/50] - Loss: 0.6154 - Train Acc: 78.69% - Val Acc: 75.33% - Time: 16.12s
Best model saved with Val Acc: 75.33%
Epoch [5/50] - Loss: 0.5231 - Train Acc: 81.93% - Val Acc: 76.34% - Time: 17.66s
Best model saved with Val Acc: 76.34%
Epoch [6/50] - Loss: 0.4506 - Train Acc: 84.45% - Val Acc: 77.25% - Time: 16.13s
Best model saved with Val Acc: 77.25%
Epoch [7/50] - Loss: 0.3815 - Train Acc: 86.83% - Val Acc: 78.21% - Time: 16.23s
Best model saved with Val Acc: 78.21%
Epoch [8/50] - Loss: 0.3224 - Train Acc: 88.93% - Val Acc: 78.40% - Time