In [1]:
import torch
from torchvision import models

model = models.efficientnet_b4(weights=models.EfficientNet_B4_Weights.DEFAULT)

model.eval()

weights = models.EfficientNet_B4_Weights.DEFAULT.get_state_dict(progress=True)

import torch
from torch import nn
from torchvision import models, datasets, transforms
from torch.utils.data import DataLoader
import torch.optim as optim

efficientnet_b4_model = models.efficientnet_b4(weights=models.EfficientNet_B4_Weights.DEFAULT)

efficientnet_b4_model.classifier[1] = nn.Linear(efficientnet_b4_model.classifier[1].in_features, 10) 

for param in efficientnet_b4_model.features.parameters():
    param.requires_grad = False

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

optimizer = optim.AdamW(efficientnet_b4_model.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
efficientnet_b4_model.to(device)

epochs = 3
for epoch in range(epochs):
    efficientnet_b4_model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = efficientnet_b4_model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}')

efficientnet_b4_model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = efficientnet_b4_model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Test Accuracy on CIFAR-10: {accuracy:.2f}%')

Downloading: "https://download.pytorch.org/models/efficientnet_b4_rwightman-23ab8bcd.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b4_rwightman-23ab8bcd.pth
100%|██████████| 74.5M/74.5M [00:00<00:00, 199MB/s]


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:04<00:00, 34851200.57it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Epoch 1, Loss: 1.665076119275865
Epoch 2, Loss: 1.0600827824055044
Epoch 3, Loss: 0.8666328900873241
Test Accuracy on CIFAR-10: 79.99%


In [2]:
import torch
from torchvision import models

vit_b16_model = models.vit_b_16(weights=models.ViT_B_16_Weights.IMAGENET1K_SWAG_E2E_V1)

vit_b16_model.eval()

weights = models.ViT_B_16_Weights.IMAGENET1K_SWAG_E2E_V1.get_state_dict(progress=True)

import torch
from torch import nn
from torchvision import models, datasets, transforms
from torch.utils.data import DataLoader
import torch.optim as optim

vit_b16_model = models.vit_b_16(weights=models.ViT_B_16_Weights.IMAGENET1K_SWAG_E2E_V1)

vit_b16_model.heads.head = nn.Linear(vit_b16_model.heads.head.in_features, 10)  

for param in vit_b16_model.parameters():
    param.requires_grad = False

for param in vit_b16_model.heads.parameters():
    param.requires_grad = True

transform = transforms.Compose([
    transforms.Resize((384, 384)), 
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

optimizer = optim.AdamW(vit_b16_model.heads.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
vit_b16_model.to(device)

epochs = 3
for epoch in range(epochs):
    vit_b16_model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = vit_b16_model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}')

vit_b16_model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = vit_b16_model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Test Accuracy on CIFAR-10: {accuracy:.2f}%')



Downloading: "https://download.pytorch.org/models/vit_b_16_swag-9ac1b537.pth" to /root/.cache/torch/hub/checkpoints/vit_b_16_swag-9ac1b537.pth
100%|██████████| 331M/331M [00:06<00:00, 51.6MB/s] 


Files already downloaded and verified
Files already downloaded and verified
Epoch 1, Loss: 0.36059003191155764
Epoch 2, Loss: 0.1526837245679162
Epoch 3, Loss: 0.13233575544791368
Test Accuracy on CIFAR-10: 95.27%


In [3]:
import os
import torch
from torchvision import datasets, transforms
import torchvision.transforms.functional as F
from PIL import Image
import cv2
import numpy as np
import random
from datasets import load_dataset

shape_dir = './data/shape'
texture_dir = './data/texture'
color_dir = './data/color'

os.makedirs(shape_dir, exist_ok=True)
os.makedirs(texture_dir, exist_ok=True)
os.makedirs(color_dir, exist_ok=True)

transform = transforms.Compose([
    transforms.Resize((384, 384)),
    transforms.Grayscale(num_output_channels=3),  
    transforms.ToTensor(),
])
cifar10_train = datasets.CIFAR10(root='./data/cifar10', train=True, download=True, transform=transform)
cifar10_test = datasets.CIFAR10(root='./data/cifar10', train=False, download=True, transform=transform)

cifar10_data = cifar10_train + cifar10_test

def edge_transform(img):
    img = np.array(img) 
    img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) 
    edges = cv2.Canny(img_gray, threshold1=100, threshold2=200)  
    
    edges_rgb = np.stack((edges,)*3, axis=-1) 
    return Image.fromarray(edges_rgb) 

data_augmentation = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10), 
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),  
])

shape_transform = transforms.Compose([
    data_augmentation, 
    transforms.Lambda(lambda img: edge_transform(img)), 
    transforms.Resize((384, 384)),
    transforms.ToTensor()
])
texture_dataset = load_dataset("/kaggle/input/texture", split="train")

def get_texture_images(dataset):
    textures = []
    for item in dataset:
        img = item['image'] if isinstance(item['image'], Image.Image) else Image.open(item['image']).convert('RGB')
        textures.append(transforms.Resize((384, 384))(img))  
    return textures

texture_images = get_texture_images(texture_dataset)

def blend_with_texture(img, texture):
    alpha = 0.5  
    img_tensor = transforms.ToTensor()(img) if not isinstance(img, torch.Tensor) else img
    texture_tensor = transforms.ToTensor()(texture) if not isinstance(texture, torch.Tensor) else texture
    
    if img_tensor.size() != texture_tensor.size():
        texture_tensor = F.resize(texture_tensor, img_tensor.size()[1:]) 

    blended_tensor = (alpha * img_tensor) + ((1 - alpha) * texture_tensor)
    
    blended_image = transforms.ToPILImage()(blended_tensor.clamp(0, 1)) 
    return blended_image.convert('RGB')  

def apply_texture_transform(img):
    texture_img = random.choice(texture_images) 
    blended_image = blend_with_texture(img, texture_img)  
    return blended_image  

texture_transform = transforms.Compose([
    data_augmentation,  
    transforms.Lambda(lambda x: apply_texture_transform(x)),  
    transforms.Resize((384, 384)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

color_transform = transforms.Compose([
    data_augmentation, 
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize((384, 384)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

def save_images_limited(dataset, transform, save_dir, limit=1500):
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    
    for i, (img, label) in enumerate(dataset):
        if i >= limit:
            break
        
        img_pil = transforms.ToPILImage()(img) 
        img_transformed = transform(img_pil.convert('RGB')) 
        img_pil_transformed = transforms.ToPILImage()(img_transformed) 
        
        class_dir = os.path.join(save_dir, str(label))
        if not os.path.exists(class_dir):
            os.makedirs(class_dir)
        
        img_pil_transformed.save(os.path.join(class_dir, f'image_{i}.png'))


print("Saving shape bias dataset with edge detection method...")
save_images_limited(cifar10_data, shape_transform, shape_dir)

print("Saving texture bias dataset with DTD textures...")
save_images_limited(cifar10_data, texture_transform, texture_dir)

print("Saving color bias dataset...")
save_images_limited(cifar10_data, color_transform, color_dir)

print("Datasets saved successfully!")

shape_dataset = datasets.ImageFolder(root='./data/shape', transform=shape_transform)
texture_dataset = datasets.ImageFolder(root='./data/texture', transform=texture_transform)
color_dataset = datasets.ImageFolder(root='./data/color', transform=color_transform)

batch_size = 32
shape_loader = DataLoader(shape_dataset, batch_size=batch_size, shuffle=True)
texture_loader = DataLoader(texture_dataset, batch_size=batch_size, shuffle=True)
color_loader = DataLoader(color_dataset, batch_size=batch_size, shuffle=True)

vit_b16_model = vit_b16_model.to(device)
efficientnet_b4_model = efficientnet_b4_model.to(device)

def evaluate_model(model, data_loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in data_loader:
            images, labels = images.to(device), labels.to(device)  
            outputs = model(images)  
            _, predicted = torch.max(outputs, 1) 
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return accuracy

shape_bias_acc_vit = evaluate_model(vit_b16_model, shape_loader, device)
texture_bias_acc_vit = evaluate_model(vit_b16_model, texture_loader, device)
color_bias_acc_vit = evaluate_model(vit_b16_model, color_loader, device)

shape_bias_acc_effnet = evaluate_model(efficientnet_b4_model, shape_loader, device)
texture_bias_acc_effnet = evaluate_model(efficientnet_b4_model, texture_loader, device)
color_bias_acc_effnet = evaluate_model(efficientnet_b4_model, color_loader, device)

print(f'ViT_B_16 Shape Bias Accuracy: {shape_bias_acc_vit:.2f}%')
print(f'ViT_B_16 Texture Bias Accuracy: {texture_bias_acc_vit:.2f}%')
print(f'ViT_B_16 Color Bias Accuracy: {color_bias_acc_vit:.2f}%')

print(f'EfficientNet_B4 Shape Bias Accuracy: {shape_bias_acc_effnet:.2f}%')
print(f'EfficientNet_B4 Texture Bias Accuracy: {texture_bias_acc_effnet:.2f}%')
print(f'EfficientNet_B4 Color Bias Accuracy: {color_bias_acc_effnet:.2f}%')


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar10/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 48456864.79it/s]


Extracting ./data/cifar10/cifar-10-python.tar.gz to ./data/cifar10
Files already downloaded and verified


Resolving data files:   0%|          | 0/240 [00:00<?, ?it/s]

Downloading data:   0%|          | 0/240 [00:00<?, ?files/s]

Generating train split: 0 examples [00:00, ? examples/s]

Saving shape bias dataset with edge detection method...
Saving texture bias dataset with DTD textures...
Saving color bias dataset...
Datasets saved successfully!
ViT_B_16 Shape Bias Accuracy: 9.53%
ViT_B_16 Texture Bias Accuracy: 10.80%
ViT_B_16 Color Bias Accuracy: 16.00%
EfficientNet_B4 Shape Bias Accuracy: 10.13%
EfficientNet_B4 Texture Bias Accuracy: 9.00%
EfficientNet_B4 Color Bias Accuracy: 9.67%
