In [None]:
!pip install timm
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import DataLoader, random_split
from tqdm.notebook import tqdm
import timm
import torch.optim.lr_scheduler as lr_scheduler
import math
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, random_split
import pandas as pd
import torch
import numpy as np
# Set the device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

Collecting timm
  Downloading timm-0.9.8-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m21.4 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub (from timm)
  Downloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.0/302.0 kB[0m [31m29.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors (from timm)
  Downloading safetensors-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m77.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: safetensors, huggingface-hub, timm
Successfully installed huggingface-hub-0.18.0 safetensors-0.4.0 timm-0.9.8
cpu


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Pre-process the data

train_transform = transforms.Compose([
    # Geometric transformations
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),  # Flip the image with probability=0.5
    transforms.RandomVerticalFlip(p=0.5),  # Flip image vertically with probability=0.5
    transforms.RandomRotation(30),  # Rotate the image up to 30 degrees
    transforms.RandomRotation(60),  # Rotate the image up to 90 degrees
    transforms.RandomResizedCrop(32, scale=(0.8, 1.0)),  # Crop and resize

    # Color transformations
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),

    # Affine transformations
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.9, 1.1)),

    # Convert to tensor
    transforms.ToTensor(),

    # Cutout augmentation
    transforms.RandomErasing(p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=0, inplace=False),

    # Normalize (Note: These values are standard for ImageNet. Adjust if using a different dataset)
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),  # This may be optional if your images are already this size
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Standard normalization for pretrained models on ImageNet
])

In [None]:
def set_seed(seed_value=42):
    """Set seed for reproducibility."""
    torch.manual_seed(seed_value)  # Set the seed for torch
    torch.cuda.manual_seed(seed_value)  # If you're using GPU
    torch.cuda.manual_seed_all(seed_value)  # If using multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed_value)

In [None]:
# Load all the data

import os
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

data_root = '/content/drive/MyDrive/mushroom_data_new'
batch_size = 32


# 1. Load all data using ImageFolder
full_dataset = ImageFolder(data_root, transform=train_transform)

total_size = len(full_dataset)
train_size = int(0.7 * total_size)  # 70% for training
val_size = int(0.2 * total_size)   # 20% for validation
test_size = total_size - train_size - val_size  # 10% for testing

# 3. Use random_split to split the datasets
train_dataset, val_dataset, test_dataset = random_split(full_dataset, [train_size, val_size, test_size])

# Optional: You might want to apply different transformations to validation and test sets (e.g., no augmentations).
# To do this, create a function that modifies the transformations for a given subset of the dataset:
def set_transform(dataset_subset, transform):
    dataset_subset.dataset.transform = transform
    return dataset_subset

val_dataset = set_transform(val_dataset, val_transform)  # If you have a separate val_transform without augmentations
test_dataset = set_transform(test_dataset, val_transform)  # Use the same as validation for simplicity

# 4. Create DataLoaders for each set
trainloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
valloader = DataLoader(val_dataset, batch_size=32, shuffle=False)
testloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import timm

# 1. Encoder
class EncoderBlock(nn.Module):
    def __init__(self, d_model, nhead, feedforward_dim):
        super(EncoderBlock, self).__init__()
        self.multihead_attention = nn.MultiheadAttention(d_model, nhead)
        self.layer_norm1 = nn.LayerNorm(d_model)
        self.layer_norm2 = nn.LayerNorm(d_model)
        self.feed_forward = nn.Sequential(
            nn.Linear(d_model, feedforward_dim),
            nn.GELU(),
            nn.Linear(feedforward_dim, d_model)
        )

    def forward(self, x):
        attn_out, _ = self.multihead_attention(x, x, x)
        x = self.layer_norm1(x + attn_out)
        ff_out = self.feed_forward(x)
        x = self.layer_norm2(x + ff_out)
        return x

class ImagePatches(nn.Module):
    def __init__(self, patch_size):
        super(ImagePatches, self).__init__()
        self.patch_size = patch_size

    def forward(self, x):
        # Split images into patches
        patches = x.unfold(2, self.patch_size, self.patch_size).unfold(3, self.patch_size, self.patch_size)
        patches = patches.permute(0, 2, 3, 1, 4, 5)

        # Flatten patches
        patches = patches.reshape(patches.size(0), patches.size(1) * patches.size(2), -1)
        return patches

# Load the pretrained ViT model
vit_model = timm.create_model("vit_large_patch16_224")
num_classes = vit_model.head.in_features
vit_model.head = torch.nn.Linear(in_features=vit_model.head.in_features, out_features=num_classes)

patch_size = 16
image_patches = ImagePatches(patch_size)
encoder = EncoderBlock(d_model=patch_size*patch_size*3, nhead=8, feedforward_dim=2048)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(vit_model.parameters(), lr=0.001)

encoder = EncoderBlock(d_model=768, nhead=8, feedforward_dim=2048)  # Change dims if needed
num_epochs = 1

encoder = encoder.to(device)
vit_model = vit_model.to(device)




for epoch in range(num_epochs):
    vit_model.train()
    total_loss = 0.0
    correct_predictions = 0
    total_predictions = 0

    for images, labels in trainloader:
        images, labels = images.to(device), labels.to(device)

        patches = image_patches(images)  # Convert images into flattened patches
        encoded_patches = encoder(patches)

        # Reshape encoded patches if needed to be compatible with ViT model
        encoded_patches = encoded_patches.reshape(encoded_patches.size(0), 3, 224, 224)

        outputs = vit_model(encoded_patches)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        _, predicted = outputs.max(1)
        correct_predictions += (predicted == labels).sum().item()
        total_predictions += labels.size(0)

    # Compute average loss and accuracy for the epoch
    epoch_loss = total_loss / len(trainloader)
    epoch_accuracy = correct_predictions / total_predictions

    print(f"Epoch {epoch+1}/{num_epochs} - Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}")

