In [1]:
import torch
from torchvision import models, datasets, transforms
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn as nn
from transformers import CLIPProcessor, CLIPModel
from PIL import Image

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [1]:
import os

dataset_path = '/kaggle/input/pacs-dataset/dct2_images'
print(os.listdir(dataset_path))


['dct2_images']


In [10]:
import os
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

transform_pacs = transforms.Compose([
    transforms.Resize((224, 224)),  
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

current_directory = os.getcwd()

print("Current Directory:", current_directory)

print("Contents of the current directory:", os.listdir(current_directory))

dataset_path = '/kaggle/input/pacs-dataset/dct2_images'

if os.path.exists(dataset_path):
    print(f"Dataset found at {dataset_path}")
else:
    print(f"Dataset not found at {dataset_path}. Please verify the path.")

pacs_dataset = datasets.ImageFolder(root=dataset_path, transform=transform_pacs)
pacs_loader = DataLoader(pacs_dataset, batch_size=32, shuffle=True)

print(f"Loaded {len(pacs_dataset)} images from PACS dataset.")


Current Directory: /kaggle/working
Contents of the current directory: ['.virtual_documents']
Dataset found at /kaggle/input/pacs-dataset/dct2_images
Loaded 9991 images from PACS dataset.


In [4]:
effnet_model = models.efficientnet_b4(weights=models.EfficientNet_B4_Weights.IMAGENET1K_V1)
effnet_model.classifier[1] = nn.Linear(effnet_model.classifier[1].in_features, len(pacs_dataset.classes))  

for param in effnet_model.features.parameters():
    param.requires_grad = False

effnet_model.to(device)

optimizer_effnet = optim.AdamW(effnet_model.classifier.parameters(), lr=1e-4)
criterion_effnet = nn.CrossEntropyLoss()

def train_effnet(model, loader, optimizer, criterion):
    model.train()
    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

def evaluate_effnet(model, loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f'EfficientNet PACS Accuracy: {accuracy:.2f}%')

train_effnet(effnet_model, pacs_loader, optimizer_effnet, criterion_effnet)
evaluate_effnet(effnet_model, pacs_loader)

EfficientNet PACS Accuracy: 72.14%


In [5]:
vit_model = models.vit_b_16(weights=models.ViT_B_16_Weights.IMAGENET1K_SWAG_E2E_V1)
vit_model.heads.head = nn.Linear(vit_model.heads.head.in_features, len(pacs_dataset.classes))  

for param in vit_model.encoder.parameters():
    param.requires_grad = False

vit_model.to(device)

optimizer_vit = optim.AdamW(vit_model.heads.parameters(), lr=1e-4)
criterion_vit = nn.CrossEntropyLoss()

def train_vit(model, loader, optimizer, criterion):
    model.train()
    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        images = transforms.Resize((384, 384))(images)
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

def evaluate_vit(model, loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            images = transforms.Resize((384, 384))(images)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f'ViT PACS Accuracy: {accuracy:.2f}%')

train_vit(vit_model, pacs_loader, optimizer_vit, criterion_vit)
evaluate_vit(vit_model, pacs_loader)

ViT PACS Accuracy: 75.36%


In [11]:
from PIL import Image
import torch
from transformers import CLIPModel, CLIPProcessor

clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
clip_model.to(device)

class_labels = ['dog', 'elephant', 'giraffe', 'guitar', 'horse', 'house', 'person']


def evaluate_clip(model, processor, loader, class_labels):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, targets in loader:
            pil_images = [Image.fromarray((image.permute(1, 2, 0).cpu().numpy() * 255).astype('uint8')) for image in images]
            
            image_inputs = processor(images=pil_images, return_tensors="pt", padding=True).to(device)
            text_inputs = processor(text=class_labels, return_tensors="pt", padding=True).to(device)
            
            outputs = model(**image_inputs, **text_inputs)
            
            logits_per_image = outputs.logits_per_image
            probs = logits_per_image.softmax(dim=1)
            
            _, predicted = torch.max(probs, 1)
            
            correct += (predicted.cpu() == targets).sum().item()
            total += images.size(0)
    
    accuracy = 100 * correct / total
    print(f'CLIP PACS Accuracy: {accuracy:.2f}%')

evaluate_clip(clip_model, clip_processor, pacs_loader, class_labels)


CLIP PACS Accuracy: 14.75%
