In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms

class PatchEmbedding(nn.Module):
    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
        super(PatchEmbedding, self).__init__()
        self.num_patches = (img_size // patch_size) ** 2
        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)

    def forward(self, x):
        x = self.proj(x)  
        x = x.flatten(2) 
        x = x.transpose(1, 2) 
        return x

class Attention(nn.Module):
    def __init__(self, dim, num_heads=8):
        super(Attention, self).__init__()
        self.num_heads = num_heads
        self.scale = (dim // num_heads) ** -0.5

        self.qkv = nn.Linear(dim, dim * 3, bias=False)
        self.proj = nn.Linear(dim, dim)

    def forward(self, x):
        B, N, C = x.shape
        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads)
        qkv = qkv.permute(2, 0, 3, 1, 4)  # (3, B, num_heads, N, head_dim)
        q, k, v = qkv[0], qkv[1], qkv[2]

        attn = (q @ k.transpose(-2, -1)) * self.scale
        attn = attn.softmax(dim=-1)
        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
        x = self.proj(x)
        return x

class MLP(nn.Module):
    def __init__(self, in_features, hidden_features, out_features, drop=0.):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(in_features, hidden_features)
        self.act = nn.GELU()
        self.fc2 = nn.Linear(hidden_features, out_features)
        self.drop = nn.Dropout(drop)

    def forward(self, x):
        x = self.fc1(x)
        x = self.act(x)
        x = self.drop(x)
        x = self.fc2(x)
        x = self.drop(x)
        return x

class Block(nn.Module):
    def __init__(self, dim, num_heads, mlp_ratio=4., drop=0.):
        super(Block, self).__init__()
        self.norm1 = nn.LayerNorm(dim)
        self.attn = Attention(dim, num_heads)
        self.norm2 = nn.LayerNorm(dim)
        self.mlp = MLP(dim, int(dim * mlp_ratio), dim, drop)

    def forward(self, x):
        x = x + self.attn(self.norm1(x))
        x = x + self.mlp(self.norm2(x))
        return x

class VisionTransformer(nn.Module):
    def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=2, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4., drop_rate=0.):
        super(VisionTransformer, self).__init__()
        self.patch_embed = PatchEmbedding(img_size, patch_size, in_chans, embed_dim)
        num_patches = self.patch_embed.num_patches

        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
        self.pos_drop = nn.Dropout(p=drop_rate)

        self.blocks = nn.ModuleList([Block(embed_dim, num_heads, mlp_ratio, drop_rate) for _ in range(depth)])
        self.norm = nn.LayerNorm(embed_dim)
        self.head = nn.Linear(embed_dim, num_classes)

    def forward(self, x):
        B = x.shape[0]
        x = self.patch_embed(x)

        cls_tokens = self.cls_token.expand(B, -1, -1)
        x = torch.cat((cls_tokens, x), dim=1)
        x = self.pos_drop(x + self.pos_embed)

        for blk in self.blocks:
            x = blk(x)

        x = self.norm(x)
        cls_token_final = x[:, 0]
        x = self.head(cls_token_final)
        return x

# Define transformations for the training and validation sets
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load the dataset
dataset = datasets.ImageFolder(root='C:/Users/nicla/Praktikum/CatsDogs/train_greensquare', transform=transform)

# Define the size of the training and validation sets
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size

# Split the dataset
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Initialize the Vision Transformer model
model = VisionTransformer(img_size=224, patch_size=16, num_classes=2, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4.)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001, betas=(0.9, 0.99))

# Training loop
num_epochs = 20
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

train_losses = []
val_losses = []
best_val_accuracy = 0.0

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * images.size(0)

    epoch_loss = running_loss / len(train_loader.dataset)
    train_losses.append(epoch_loss)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}")

    # Validation loop
    model.eval()
    val_loss = 0.0
    correct = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * images.size(0)
            
            _, preds = torch.max(outputs, 1)
            correct += torch.sum(preds == labels.data)
    
    val_loss = val_loss / len(val_loader.dataset)
    val_losses.append(val_loss)
    val_accuracy = correct.double() / len(val_loader.dataset)
    print(f"Validation Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}")

    # Check if this is the best model so far and save it
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        torch.save(model.state_dict(), 'best_transformerFULL_square_BATCH.pth')
        print("Model saved!")

# Define the transformation for the test set (same as training/validation set)
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load the test dataset
test_dataset = datasets.ImageFolder(root='C:/Users/nicla/Praktikum/CatsDogs/test_untouched', transform=test_transform)

# Create the test data loader
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Load the best model
model.load_state_dict(torch.load('best_transformerFULL_square_BATCH.pth'))
model.to(device)

# Evaluation on the test set
model.eval()
correct = 0
total = 0
test_loss = 0.0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        test_loss += loss.item() * images.size(0)
        
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

test_loss = test_loss / len(test_loader.dataset)
test_accuracy = correct / total

print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")


In [None]:
# Load the previously trained ViT model
model = VisionTransformer(img_size=224, patch_size=16, num_classes=2, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4.)
model.load_state_dict(torch.load('best_transformerFULL_normal.pth'))
model.to(device)

# Define transformations for the new dataset
transform_new = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load the new dataset
new_dataset = datasets.ImageFolder(root='C:/Users/nicla/Praktikum/CatsDogs/train', transform=transform_new)

# Split the new dataset into training and validation sets
new_train_size = int(0.8 * len(new_dataset))
new_val_size = len(new_dataset) - new_train_size
new_train_dataset, new_val_dataset = random_split(new_dataset, [new_train_size, new_val_size])

# Create data loaders for the new dataset
new_train_loader = DataLoader(new_train_dataset, batch_size=1, shuffle=True)
new_val_loader = DataLoader(new_val_dataset, batch_size=1, shuffle=False)

# Reset the optimizer
optimizer = optim.Adam(model.parameters(), lr=0.0001, betas=(0.9, 0.99))

# Training loop for the new dataset
num_epochs = 8
new_train_losses = []
new_val_losses = []
new_best_val_accuracy = 0.0

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in new_train_loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * images.size(0)

    epoch_loss = running_loss / len(new_train_loader.dataset)
    new_train_losses.append(epoch_loss)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}")

    # Validation loop
    model.eval()
    val_loss = 0.0
    correct = 0
    with torch.no_grad():
        for images, labels in new_val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * images.size(0)
            
            _, preds = torch.max(outputs, 1)
            correct += torch.sum(preds == labels.data)
    
    val_loss = val_loss / len(new_val_loader.dataset)
    new_val_losses.append(val_loss)
    val_accuracy = correct.double() / len(new_val_loader.dataset)
    print(f"Validation Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}")

    # Check if this is the best model so far and save it
    if val_accuracy > new_best_val_accuracy:
        new_best_val_accuracy = val_accuracy
        torch.save(model.state_dict(), 'best_transformerFULL_normal_lightdark_BATCH.pth')
        print("New model saved!")

# Final evaluation
print(f"Best Validation Accuracy on New Dataset: {new_best_val_accuracy:.4f}")

# Plotting training and validation losses for the new dataset
plt.figure(figsize=(10, 5))
plt.plot(range(1, num_epochs+1), new_train_losses, label='Training Loss')
plt.plot(range(1, num_epochs+1), new_val_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss on lightdark (ViT)')
plt.legend()
plt.show()

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
import matplotlib.pyplot as plt

# Define the transformation for the test set (same as training/validation set)
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load the test dataset
test_dataset = datasets.ImageFolder(root='C:/Users/nicla/Praktikum/CatsDogs/test_untouched', transform=test_transform)

# Create the test data loader
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Load the best model
model.load_state_dict(torch.load('best_transformerFULL_normal_lightdark_BATCH.pth'))
model.to(device)

# Evaluation on the test set
model.eval()
correct = 0
total = 0
test_loss = 0.0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        test_loss += loss.item() * images.size(0)
        
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

test_loss = test_loss / len(test_loader.dataset)
test_accuracy = correct / total

print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

In [None]:
# Load the previously trained ViT model
model = VisionTransformer(img_size=224, patch_size=16, num_classes=2, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4.)
model.load_state_dict(torch.load('best_transformerFULL_normal.pth'))
model.to(device)

# Define transformations for the new dataset
transform_new = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load the new dataset
new_dataset = datasets.ImageFolder(root='C:/Users/nicla/Praktikum/CatsDogs/train_greensquare', transform=transform_new)

# Split the new dataset into training and validation sets
new_train_size = int(0.8 * len(new_dataset))
new_val_size = len(new_dataset) - new_train_size
new_train_dataset, new_val_dataset = random_split(new_dataset, [new_train_size, new_val_size])

# Create data loaders for the new dataset
new_train_loader = DataLoader(new_train_dataset, batch_size=1, shuffle=True)
new_val_loader = DataLoader(new_val_dataset, batch_size=1, shuffle=False)

# Reset the optimizer
optimizer = optim.Adam(model.parameters(), lr=0.0001, betas=(0.9, 0.99))

# Training loop for the new dataset
num_epochs = 8
new_train_losses = []
new_val_losses = []
new_best_val_accuracy = 0.0

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in new_train_loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * images.size(0)

    epoch_loss = running_loss / len(new_train_loader.dataset)
    new_train_losses.append(epoch_loss)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}")

    # Validation loop
    model.eval()
    val_loss = 0.0
    correct = 0
    with torch.no_grad():
        for images, labels in new_val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * images.size(0)
            
            _, preds = torch.max(outputs, 1)
            correct += torch.sum(preds == labels.data)
    
    val_loss = val_loss / len(new_val_loader.dataset)
    new_val_losses.append(val_loss)
    val_accuracy = correct.double() / len(new_val_loader.dataset)
    print(f"Validation Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}")

    # Check if this is the best model so far and save it
    if val_accuracy > new_best_val_accuracy:
        new_best_val_accuracy = val_accuracy
        torch.save(model.state_dict(), 'best_transformerFULL_normal_square_BATCH.pth')
        print("New model saved!")

# Final evaluation
print(f"Best Validation Accuracy on New Dataset: {new_best_val_accuracy:.4f}")

# Plotting training and validation losses for the new dataset
plt.figure(figsize=(10, 5))
plt.plot(range(1, num_epochs+1), new_train_losses, label='Training Loss')
plt.plot(range(1, num_epochs+1), new_val_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss on green square (ViT)')
plt.legend()
plt.show()

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
import matplotlib.pyplot as plt

# Define the transformation for the test set (same as training/validation set)
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load the test dataset
test_dataset = datasets.ImageFolder(root='C:/Users/nicla/Praktikum/CatsDogs/test_untouched', transform=test_transform)

# Create the test data loader
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Load the best model
model.load_state_dict(torch.load('best_transformerFULL_normal_square_BATCH.pth'))
model.to(device)

# Evaluation on the test set
model.eval()
correct = 0
total = 0
test_loss = 0.0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        test_loss += loss.item() * images.size(0)
        
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

test_loss = test_loss / len(test_loader.dataset)
test_accuracy = correct / total

print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")