In [None]:
import torch
import torch.optim as optim
from torchvision import models
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score
from torchvision import datasets, transforms
from google.colab import drive
from transformers import ViTForImageClassification
from torch import nn


In [None]:
pip install transformers




In [None]:

# Mount Google Drive
drive.mount('/content/drive')

# Define dataset transformations
transform = transforms.Compose([
    transforms.RandomResizedCrop((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


# Load the dataset
dataset = datasets.ImageFolder(root='/content/drive/MyDrive/test2', transform=transform)


Mounted at /content/drive


In [None]:
# Load the pretrained DenseNet121 model
model = models.densenet121(pretrained=True)

# Freeze all layers except the final classifier layers
for param in model.parameters():
    param.requires_grad = False

# Modify the classifier layer
num_features = model.classifier.in_features
model.classifier = torch.nn.Linear(num_features, len(dataset.classes))

# Move the model to the appropriate device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Define the criterion and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.classifier.parameters(), lr=0.001)

# Create DataLoader for training and validation
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

Downloading: "https://download.pytorch.org/models/densenet121-a639ec97.pth" to /root/.cache/torch/hub/checkpoints/densenet121-a639ec97.pth
100%|██████████| 30.8M/30.8M [00:00<00:00, 81.9MB/s]


In [None]:

def evaluate_model(model, loader):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            # Get the logits from the ImageClassifierOutput object if using ViT
            outputs = model(inputs).logits if isinstance(model, ViTForImageClassification) else model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    accuracy = accuracy_score(all_labels, all_preds)
    return accuracy

In [None]:
epochs = 10
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    # Evaluate after each epoch
    dense_net_accuracy  = evaluate_model(model, train_loader)
    print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}, Accuracy: {dense_net_accuracy :.4f}")

Epoch 1/10, Loss: 0.7076, Accuracy: 0.5818
Epoch 2/10, Loss: 0.6373, Accuracy: 0.6636
Epoch 3/10, Loss: 0.6174, Accuracy: 0.6909
Epoch 4/10, Loss: 0.5537, Accuracy: 0.7455
Epoch 5/10, Loss: 0.5821, Accuracy: 0.7091
Epoch 6/10, Loss: 0.5982, Accuracy: 0.7182
Epoch 7/10, Loss: 0.5611, Accuracy: 0.7591
Epoch 8/10, Loss: 0.5082, Accuracy: 0.7636
Epoch 9/10, Loss: 0.5078, Accuracy: 0.7818
Epoch 10/10, Loss: 0.5329, Accuracy: 0.7500


Step 2: Fine-Tune the Final Layers


In [None]:
# Unfreeze the last two convolutional layers
for param in model.features[6:].parameters():
    param.requires_grad = True

# Re-define the optimizer to include these layers
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-4)

# Create the new classifier with dropout and move it to the device
model.classifier = torch.nn.Sequential(
    torch.nn.Dropout(p=0.5),  # Add dropout with a 50% drop rate
    torch.nn.Linear(num_features, len(dataset.classes))
).to(device) # Move the classifier to the device


# Train the model again with fine-tuning
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    # Evaluate after each epoch
    fine_tuned_densenet_accuracy  = evaluate_model(model, train_loader)
    print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}, Accuracy: {fine_tuned_densenet_accuracy :.4f}")


Epoch 1/10, Loss: 0.7283, Accuracy: 0.6955
Epoch 2/10, Loss: 0.6124, Accuracy: 0.6909
Epoch 3/10, Loss: 0.5564, Accuracy: 0.8091
Epoch 4/10, Loss: 0.5003, Accuracy: 0.8045
Epoch 5/10, Loss: 0.4222, Accuracy: 0.8182
Epoch 6/10, Loss: 0.4339, Accuracy: 0.8318
Epoch 7/10, Loss: 0.3378, Accuracy: 0.9091
Epoch 8/10, Loss: 0.3090, Accuracy: 0.9091
Epoch 9/10, Loss: 0.2863, Accuracy: 0.9000
Epoch 10/10, Loss: 0.2681, Accuracy: 0.9273


Step 3: Add One More Convolutional Layer and Fine-Tune


In [None]:
# Add a new convolutional layer
model.features.add_module('new_conv', torch.nn.Conv2d(1024, 512, kernel_size=3, padding=1).to(device)) # Move the new layer to the device

# Calculate the correct number of in_features for the classifier
# This is necessary because we added a new convolutional layer
num_features = model.features[-1].out_channels # Get output channels of the last layer

# Re-initialize the classifier with the correct in_features
model.classifier = torch.nn.Linear(num_features, len(dataset.classes)).to(device)

# Unfreeze layers from the new convolution onward
for param in model.parameters():
    param.requires_grad = True

# Define optimizer for all layers
optimizer = optim.Adam(model.parameters(), lr=0.00005)

# Train again with the new layer
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    # Evaluate performance
    new_layer_densenet_accuracy  = evaluate_model(model, train_loader)
    print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}, Accuracy: {new_layer_densenet_accuracy :.4f}")


Epoch 1/10, Loss: 0.3428, Accuracy: 0.9045
Epoch 2/10, Loss: 0.2193, Accuracy: 0.9500
Epoch 3/10, Loss: 0.1685, Accuracy: 0.9182
Epoch 4/10, Loss: 0.1584, Accuracy: 0.9591
Epoch 5/10, Loss: 0.1838, Accuracy: 0.9136
Epoch 6/10, Loss: 0.2164, Accuracy: 0.9500
Epoch 7/10, Loss: 0.1348, Accuracy: 0.9318
Epoch 8/10, Loss: 0.1176, Accuracy: 0.9455
Epoch 9/10, Loss: 0.1706, Accuracy: 0.9364
Epoch 10/10, Loss: 0.1495, Accuracy: 0.9545


In [None]:
import os # Import the os module
from PIL import Image #

Step 4: Apply Pretrained Vision Transformer (ViT)


In [None]:
from transformers import ViTForImageClassification, ViTImageProcessor
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

# Load the pretrained ViT model
vit_model = ViTForImageClassification.from_pretrained(
    'google/vit-base-patch16-224-in21k',  # Provide the model name or path here
    hidden_dropout_prob=0.1,  # Specify configuration settings directly
    attention_probs_dropout_prob=0.1
)
vit_processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224-in21k')

# Move the ViT model to the appropriate device
vit_model = vit_model.to(device)
class CustomDataset(Dataset):
    def __init__(self, image_folder, transform=None):
        self.image_folder = image_folder
        self.transform = transform
        self.image_files = []
        self.labels = []

        # Get all image files and labels
        for class_idx, class_name in enumerate(os.listdir(image_folder)):
            class_path = os.path.join(image_folder, class_name)
            if os.path.isdir(class_path):
                for file_name in os.listdir(class_path):
                    if file_name.lower().endswith(('.jpg', '.png', '.jpeg')):
                        self.image_files.append(os.path.join(class_path, file_name))
                        self.labels.append(class_idx)  # Assign label based on class index

        # Print some information for debugging
        print(f"Found {len(self.image_files)} image files in {image_folder}")
        print(f"First 5 files: {self.image_files[:5]}")
        print(f"First 5 labels: {self.labels[:5]}")


    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        image = Image.open(self.image_files[idx]).convert('RGB') # Ensure RGB format
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label
vit_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])
vit_dataset = CustomDataset('/content/drive/MyDrive/test2', vit_transform)

# Create a DataLoader for ViT
vit_loader = DataLoader(vit_dataset, batch_size=32, shuffle=True)



Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Found 220 image files in /content/drive/MyDrive/test2
First 5 files: ['/content/drive/MyDrive/test2/Fake/fake_0.jpg', '/content/drive/MyDrive/test2/Fake/fake_100.jpg', '/content/drive/MyDrive/test2/Fake/fake_10.jpg', '/content/drive/MyDrive/test2/Fake/fake_1.jpg', '/content/drive/MyDrive/test2/Fake/fake_101.jpg']
First 5 labels: [0, 0, 0, 0, 0]


In [None]:

# Define the optimizer for Vision Transformer
optimizer = optim.Adam(vit_model.parameters(), lr=0.0001, weight_decay=1e-4)


for epoch in range(epochs):
    vit_model.train()
    running_loss = 0.0
    for inputs, labels in vit_loader:  # Now get labels from the loader
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = vit_model(inputs).logits
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    # Evaluate performance after each epoch
    vit_model_accuracy  = evaluate_model(vit_model, vit_loader)
    print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(vit_loader):.4f}, Accuracy: {vit_model_accuracy :.4f}")


Epoch 1/10, Loss: 0.6605, Accuracy: 0.8409
Epoch 2/10, Loss: 0.4981, Accuracy: 0.8682
Epoch 3/10, Loss: 0.3580, Accuracy: 0.9364
Epoch 4/10, Loss: 0.2566, Accuracy: 0.9682
Epoch 5/10, Loss: 0.1680, Accuracy: 0.9636
Epoch 6/10, Loss: 0.1341, Accuracy: 0.9818
Epoch 7/10, Loss: 0.1313, Accuracy: 0.9818
Epoch 8/10, Loss: 0.0765, Accuracy: 0.9955
Epoch 9/10, Loss: 0.0438, Accuracy: 0.9955
Epoch 10/10, Loss: 0.0299, Accuracy: 1.0000


Step 5: Record Performance


In [None]:
# After training the models, store the results
results = {
    'DenseNet121': dense_net_accuracy,
    'Fine-tuned DenseNet121': fine_tuned_densenet_accuracy,
    'DenseNet121 with new layer': new_layer_densenet_accuracy,
    'Vision Transformer': vit_model_accuracy
}

# You can print the results or save them to a file
import json
with open("performance_results.json", "w") as f:
    json.dump(results, f)
