In [9]:
import os
import torch
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, random_split
from torchvision import transforms

In [10]:
# Set the path to your dataset
dataset_dir = 'dataset'

# Define transformations
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])


# Load the full dataset
full_dataset = ImageFolder(root=dataset_dir, transform=transform)

# Split the dataset into training and test sets (80% train, 20% test)
train_size = int(0.8 * len(full_dataset))
test_size = len(full_dataset) - train_size
train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [11]:
import torch.nn as nn
import torch.optim as optim

class DiffusionModelWithClassification(nn.Module):
    def __init__(self, num_classes):
        super(DiffusionModelWithClassification, self).__init__()
        # Encoder with additional layers and batch normalization
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(True),
            nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(True),
            nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(True),
        )

        # Decoder remains the same
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(True),
            nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(True),
            nn.ConvTranspose2d(64, 3, kernel_size=4, stride=2, padding=1),
            nn.Sigmoid(),
        )

        # Classifier with additional layers and dropout
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * 16 * 16, 512),
            nn.ReLU(True),
            nn.Dropout(0.5),
            nn.Linear(512, 256),
            nn.ReLU(True),
            nn.Linear(256, num_classes),
        )

    def forward(self, x):
        x = self.encoder(x)
        features = self.decoder(x)
        x = self.classifier(x)
        return features, x

# Number of classes (based on the number of subfolders)
num_classes = len(full_dataset.classes)
model = DiffusionModelWithClassification(num_classes=num_classes)
reconstruction_criterion = nn.MSELoss()
classification_criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [7]:
from tqdm import tqdm

num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        # Adding noise to the images
        noisy_images = images + 0.1 * torch.randn_like(images)
        noisy_images = torch.clamp(noisy_images, 0., 1.)

        # Forward pass
        features, class_outputs = model(noisy_images)

        # Calculate losses
        reconstruction_loss = reconstruction_criterion(features, images)
        classification_loss = classification_criterion(class_outputs, labels)
        loss = reconstruction_loss + classification_loss

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Print average loss per epoch
    average_loss = running_loss / len(train_loader)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {average_loss:.4f}')
    
print("Training Completed.")


Epoch 1/10: 100%|██████████| 516/516 [06:45<00:00,  1.27it/s]


Epoch [1/10], Loss: 2.5729


Epoch 2/10: 100%|██████████| 516/516 [06:46<00:00,  1.27it/s]


Epoch [2/10], Loss: 2.2817


Epoch 3/10: 100%|██████████| 516/516 [06:39<00:00,  1.29it/s]


Epoch [3/10], Loss: 2.0820


Epoch 4/10: 100%|██████████| 516/516 [06:48<00:00,  1.26it/s]


Epoch [4/10], Loss: 1.9520


Epoch 5/10: 100%|██████████| 516/516 [13:30<00:00,  1.57s/it]


Epoch [5/10], Loss: 1.8568


Epoch 6/10: 100%|██████████| 516/516 [14:32<00:00,  1.69s/it]


Epoch [6/10], Loss: 1.7773


Epoch 7/10:  50%|█████     | 258/516 [09:08<09:08,  2.12s/it]


KeyboardInterrupt: 

In [None]:
from tqdm import tqdm

correct = 0
total = 0

model.eval()
with torch.no_grad():
    for images, labels in tqdm(test_loader, desc="Evaluating"):
        # Forward pass
        _, outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

# Calculate accuracy
accuracy = 100 * correct / total
print(f'Accuracy on the test images: {accuracy:.2f}%')


Evaluating: 100%|██████████| 129/129 [00:30<00:00,  4.26it/s]

Accuracy on the test images: 30.47%





In [25]:
model_path = 'diffusion_model_with_classification.pth'
torch.save(model.state_dict(), model_path)
print(f"Model saved to {model_path}")

Model saved to diffusion_model_with_classification.pth


In [None]:
model = DiffusionModelWithClassification(num_classes=num_classes)
model.load_state_dict(torch.load('diffusion_model_with_classification.pth'))
model.eval()  # Set the model to evaluation mode


In [14]:
import torch
from sklearn.metrics import f1_score
import numpy as np
from tqdm import tqdm

# Step 1: Define your model architecture
# Replace this with your actual model class definition
import torch.nn as nn
import torch.optim as optim

class DiffusionModelWithClassification(nn.Module):
    def __init__(self, num_classes):
        super(DiffusionModelWithClassification, self).__init__()
        # Encoder with additional layers and batch normalization
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(True),
            nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(True),
            nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(True),
        )

        # Decoder remains the same
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(True),
            nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(True),
            nn.ConvTranspose2d(64, 3, kernel_size=4, stride=2, padding=1),
            nn.Sigmoid(),
        )

        # Classifier with additional layers and dropout
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * 16 * 16, 512),
            nn.ReLU(True),
            nn.Dropout(0.5),
            nn.Linear(512, 256),
            nn.ReLU(True),
            nn.Linear(256, num_classes),
        )

    def forward(self, x):
        x = self.encoder(x)
        features = self.decoder(x)
        x = self.classifier(x)
        return features, x

# Number of classes (based on the number of subfolders)
num_classes = len(full_dataset.classes)
model = DiffusionModelWithClassification(num_classes=num_classes)

# Step 2: Load the state dictionary
state_dict = torch.load('30%_accuracy.pth')
# Remove the mismatched layers from the state_dict
state_dict.pop('decoder.0.weight')
state_dict.pop('decoder.0.bias')
state_dict.pop('classifier.1.weight')
state_dict.pop('classifier.1.bias')

# Load the remaining layers
model.load_state_dict(state_dict, strict=False)

# Step 3: Set the model to evaluation mode
model.eval()

# Assuming your test_loader is already defined in the notebook
# Extract true labels and predictions
y_true = []
y_pred = []

with torch.no_grad():
    for images, labels in tqdm(test_loader, desc="Evaluating the model"):
        # Get the classification output (the second element in the tuple)
        _, outputs = model(images)
        
        # Find the class with the maximum score
        _, predicted = torch.max(outputs, 1)
        
        # Append true labels and predictions
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(predicted.cpu().numpy())

# Calculate the F1 score
f1 = f1_score(y_true, y_pred, average='weighted')

print(f"F1 Score: {f1}")

  state_dict = torch.load('30%_accuracy.pth')
Evaluating the model: 100%|██████████| 129/129 [00:46<00:00,  2.75it/s]

F1 Score: 0.020068148960429393



