<a href="https://colab.research.google.com/github/eshal26/PCA-CNN/blob/main/resnet_PCA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
#importing necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
import os
import shutil
import sklearn
from sklearn.model_selection import train_test_split


In [3]:
train_dir = 'train_dataset'
val_dir = 'validation_dataset'
test_dir = 'test_dataset'

# Define transformations for training, validation, and testing data
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.8245, 0.8547, 0.9387], std = [0.1323, 0.1431, 0.0530])
])

val_test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.8245, 0.8547, 0.9387], std = [0.1323, 0.1431, 0.0530])
])

# Create datasets
train_dataset = datasets.ImageFolder(root=train_dir, transform=train_transform)
val_dataset = datasets.ImageFolder(root=val_dir, transform=val_test_transform)
test_dataset = datasets.ImageFolder(root=test_dir, transform=val_test_transform)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
class SimpleResNetWithPCA(nn.Module):
    def __init__(self, num_classes=2, pca_components=32, pca=None):
        super(SimpleResNetWithPCA, self).__init__()

        # First Residual Block
        self.layer1 = BasicBlock(3, 64)

        # Second Residual Block
        self.layer2 = BasicBlock(64, 128)

        self.relu = nn.ReLU(inplace=True)
        self.dropout1 = nn.Dropout(p=0.5)
        self.dropout2 = nn.Dropout(p=0.5)

        # PCA Parameters
        self.pca = pca  # Accept PCA instance externally or set to None
        self.pca_components = pca_components

        # Pooling and Fully Connected Layers
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc1 = nn.Linear(pca_components, 32)  # Adjusted input features after PCA
        self.fc2 = nn.Linear(32, num_classes)

    def extract_features(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)  # Flatten before PCA
        return x

    def forward(self, x):
        x = self.extract_features(x)
        device = x.device

        # Apply PCA if it is fitted
        if self.pca is not None:
            x_cpu = x.detach().cpu().numpy()  # Convert tensor to NumPy array
            x_pca = self.pca.transform(x_cpu)  # Apply PCA transformation
            x = torch.from_numpy(x_pca).to(device, dtype=torch.float32)  # Back to PyTorch tensor
        else:
            raise RuntimeError("PCA must be fitted before the forward pass")

        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        return x

    def fit_pca(self, dataloader, num_components=32):
        """
        Fit PCA on the extracted features of the model.
        """
        self.eval()  # Ensure model is in eval mode
        features = []

        with torch.no_grad():
            for inputs, _ in dataloader:
                inputs = inputs.to(next(self.parameters()).device)
                x = self.extract_features(inputs)
                features.append(x.cpu().numpy())

        features = np.vstack(features)  # Combine all features
        pca = PCA(n_components=num_components)
        pca.fit(features)
        self.pca = pca  # Assign the fitted PCA to the model


model = SimpleResNetWithPCA(num_classes=2, pca_components=32)

# Fit PCA on the features extracted from the training dataset
model.fit_pca(train_loader, num_components=32)

# Now the model is ready to apply PCA during the forward pass
print(model)


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score

# Training Loop
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device):
    model.to(device)

    for epoch in range(num_epochs):
        print(f"Epoch {epoch + 1}/{num_epochs}")
        print("-" * 30)

        # Training Phase
        model.train()
        running_loss = 0.0
        running_corrects = 0
        total_samples = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

            # Track statistics
            _, preds = torch.max(outputs, 1)
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
            total_samples += inputs.size(0)

        train_loss = running_loss / total_samples
        train_acc = running_corrects.double() / total_samples

        # Validation Phase
        model.eval()
        val_loss = 0.0
        val_corrects = 0
        val_samples = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)

                # Forward pass
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                # Track statistics
                _, preds = torch.max(outputs, 1)
                val_loss += loss.item() * inputs.size(0)
                val_corrects += torch.sum(preds == labels.data)
                val_samples += inputs.size(0)

        val_loss = val_loss / val_samples
        val_acc = val_corrects.double() / val_samples

        print(f"Train Loss: {train_loss:.4f} Acc: {train_acc:.4f}")
        print(f"Val Loss: {val_loss:.4f} Acc: {val_acc:.4f}")

    return model

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


num_epochs = 10
learning_rate = 0.001

# Step 1: Define the model without a fitted PCA
model = SimpleResNetWithPCA(num_classes=2, pca_components=32)

# Step 2: Fit PCA on training data
model.fit_pca(train_loader, num_components=32)

# Step 3: Use the model for training
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Step 4: Train the model
trained_model = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device=device)
