In [1]:
!kaggle datasets download -d xixuhu/office31 --unzip -p /content/

Dataset URL: https://www.kaggle.com/datasets/xixuhu/office31
License(s): unknown
Downloading office31.zip to /content
100% 75.9M/75.9M [00:05<00:00, 18.3MB/s]
100% 75.9M/75.9M [00:05<00:00, 14.1MB/s]


In [2]:
!pip install timm

Collecting timm
  Downloading timm-1.0.10-py3-none-any.whl.metadata (48 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/48.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.1/48.1 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Downloading timm-1.0.10-py3-none-any.whl (2.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m40.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: timm
Successfully installed timm-1.0.10


In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import timm
from torch.cuda.amp import autocast, GradScaler
from torchvision import datasets, transforms

# with `vit_base_patch16_224` (debo bhai)

In [None]:
class ImprovedModel(nn.Module):
    def __init__(self, latent_dim=512, num_classes=65):
        super(ImprovedModel, self).__init__()

        # Use ViT as the backbone encoder
        self.encoder = timm.create_model('vit_base_patch16_224', pretrained=True)

        # Get the feature size from ViT output
        self.encoder_head_dim = self.encoder.head.in_features
        self.encoder.head = nn.Identity()  # Remove the original classification head

        # Latent space projection
        self.fc_latent = nn.Linear(self.encoder_head_dim, latent_dim)

        # Classifier for 65 classes
        self.classifier = nn.Sequential(
            nn.ReLU(),
            nn.Linear(latent_dim, num_classes)
        )

    def forward(self, x):
        # Forward through ViT (feature extraction)
        features = self.encoder(x)  # ViT returns the features

        # Project to latent space
        latent = self.fc_latent(features)

        # Classification head
        output = self.classifier(latent)
        return output, latent

In [None]:
# Function to train the classifier on the source dataset
def train_classifier(model, dataloader, num_classes=65, n_epochs=10, lr=1e-3, weight_decay=1e-4):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=n_epochs)
    criterion = nn.CrossEntropyLoss()

    # Scaler for mixed precision
    scaler = GradScaler()

    for epoch in range(n_epochs):
        model.train()
        total_loss = 0
        correct_predictions = 0
        total_samples = 0

        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()

            with autocast():  # Use mixed precision
                outputs, _ = model(images)  # Model returns both output and latent vectors
                loss = criterion(outputs, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            total_loss += loss.item()

            # Calculate accuracy
            _, predicted = torch.max(outputs, 1)  # Get class with highest score
            correct_predictions += (predicted == labels).sum().item()
            total_samples += labels.size(0)

        # Step the scheduler after each epoch
        scheduler.step()

        # Calculate average loss and accuracy
        avg_loss = total_loss / len(dataloader)
        accuracy = correct_predictions / total_samples * 100

        print(f'Epoch [{epoch+1}/{n_epochs}], Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%')


In [None]:
# Function to evaluate the classifier on the target dataset
def evaluate_classifier(model, dataloader):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()  # Set model to evaluation mode

    correct_predictions = 0
    total_samples = 0

    with torch.no_grad():  # Disable gradient calculation
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)

            with autocast():  # Use mixed precision
                outputs, _ = model(images)
            _, predicted = torch.max(outputs, 1)

            correct_predictions += (predicted == labels).sum().item()
            total_samples += labels.size(0)

    accuracy = correct_predictions / total_samples * 100
    print(f'Accuracy on Target Domain: {accuracy:.2f}%')

In [None]:
transform_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

transform_eval = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [None]:
from tqdm import tqdm  # Import tqdm for progress bars

# Function to train the classifier on the source dataset with progress bar
def train_classifier(model, dataloader, num_classes=65, n_epochs=10, lr=1e-3, weight_decay=1e-4):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=n_epochs)
    criterion = nn.CrossEntropyLoss()

    # Scaler for mixed precision
    scaler = GradScaler()

    for epoch in range(n_epochs):
        model.train()
        total_loss = 0
        correct_predictions = 0
        total_samples = 0

        # Add tqdm progress bar
        progress_bar = tqdm(dataloader, desc=f"Epoch [{epoch+1}/{n_epochs}]")

        for images, labels in progress_bar:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()

            with autocast():  # Use mixed precision
                outputs, _ = model(images)  # Model returns both output and latent vectors
                loss = criterion(outputs, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            total_loss += loss.item()

            # Calculate accuracy
            _, predicted = torch.max(outputs, 1)  # Get class with highest score
            correct_predictions += (predicted == labels).sum().item()
            total_samples += labels.size(0)

            # Update tqdm progress bar description with current loss and accuracy
            progress_bar.set_postfix({
                'Loss': f'{loss.item():.4f}',
                'Accuracy': f'{(correct_predictions / total_samples * 100):.2f}%'
            })

        # Step the scheduler after each epoch
        scheduler.step()

        avg_loss = total_loss / len(dataloader)
        accuracy = correct_predictions / total_samples * 100

        print(f'Epoch [{epoch+1}/{n_epochs}], Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%')


# Function to evaluate the classifier on the target dataset with progress bar
def evaluate_classifier(model, dataloader):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()  # Set model to evaluation mode

    correct_predictions = 0
    total_samples = 0

    # Add tqdm progress bar
    progress_bar = tqdm(dataloader, desc="Evaluating")

    with torch.no_grad():  # Disable gradient calculation
        for images, labels in progress_bar:
            images, labels = images.to(device), labels.to(device)

            with autocast():  # Use mixed precision
                outputs, _ = model(images)
            _, predicted = torch.max(outputs, 1)

            correct_predictions += (predicted == labels).sum().item()
            total_samples += labels.size(0)

            # Update tqdm progress bar description with current accuracy
            progress_bar.set_postfix({
                'Accuracy': f'{(correct_predictions / total_samples * 100):.2f}%'
            })

    accuracy = correct_predictions / total_samples * 100
    print(f'Accuracy on Target Domain: {accuracy:.2f}%')


num_classes = 31
latent_dim = 1024
model = ImprovedModel(latent_dim=latent_dim, num_classes=num_classes)

source_dir = "/content/Office-31/amazon"
target_dir = "/content/Office-31/dslr"


dataset_source = datasets.ImageFolder(root=source_dir, transform=transform_train)
dataset_target = datasets.ImageFolder(root=target_dir, transform=transform_eval)

dataloader_source = DataLoader(dataset_source, batch_size=64, shuffle=True)
dataloader_target = DataLoader(dataset_target, batch_size=64, shuffle=True)

train_classifier(model, dataloader_source, num_classes=num_classes, n_epochs=10, lr=1e-3, weight_decay=1e-4)
evaluate_classifier(model, dataloader_target)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

  scaler = GradScaler()
  with autocast():  # Use mixed precision
Epoch [1/10]: 100%|██████████| 45/45 [00:40<00:00,  1.10it/s, Loss=3.0391, Accuracy=3.66%]


Epoch [1/10], Loss: 3.4673, Accuracy: 3.66%


Epoch [2/10]: 100%|██████████| 45/45 [00:42<00:00,  1.06it/s, Loss=3.4297, Accuracy=5.04%]


Epoch [2/10], Loss: 3.3820, Accuracy: 5.04%


Epoch [3/10]: 100%|██████████| 45/45 [00:39<00:00,  1.15it/s, Loss=3.4570, Accuracy=4.97%]


Epoch [3/10], Loss: 3.3577, Accuracy: 4.97%


Epoch [4/10]: 100%|██████████| 45/45 [00:40<00:00,  1.11it/s, Loss=3.5527, Accuracy=5.75%]


Epoch [4/10], Loss: 3.3295, Accuracy: 5.75%


Epoch [5/10]: 100%|██████████| 45/45 [00:39<00:00,  1.14it/s, Loss=3.6992, Accuracy=5.68%]


Epoch [5/10], Loss: 3.3502, Accuracy: 5.68%


Epoch [6/10]: 100%|██████████| 45/45 [00:40<00:00,  1.12it/s, Loss=3.6289, Accuracy=5.32%]


Epoch [6/10], Loss: 3.3484, Accuracy: 5.32%


Epoch [7/10]: 100%|██████████| 45/45 [00:39<00:00,  1.13it/s, Loss=3.5078, Accuracy=7.67%]


Epoch [7/10], Loss: 3.2856, Accuracy: 7.67%


Epoch [8/10]: 100%|██████████| 45/45 [00:39<00:00,  1.14it/s, Loss=3.3457, Accuracy=7.67%]


Epoch [8/10], Loss: 3.2610, Accuracy: 7.67%


Epoch [9/10]: 100%|██████████| 45/45 [00:39<00:00,  1.13it/s, Loss=2.6426, Accuracy=10.90%]


Epoch [9/10], Loss: 3.1682, Accuracy: 10.90%


Epoch [10/10]: 100%|██████████| 45/45 [00:39<00:00,  1.13it/s, Loss=3.4980, Accuracy=11.54%]


Epoch [10/10], Loss: 3.1280, Accuracy: 11.54%


  with autocast():  # Use mixed precision
Evaluating: 100%|██████████| 8/8 [00:07<00:00,  1.13it/s, Accuracy=3.61%]

Accuracy on Target Domain: 3.61%





# with `DeiT (data-efficient image transformers)`

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import timm
from torch.cuda.amp import autocast, GradScaler
from tqdm import tqdm

class ImprovedModel(nn.Module):
    def __init__(self, latent_dim=1024, num_classes=31):
        super(ImprovedModel, self).__init__()
        self.encoder = timm.create_model('deit_small_patch16_224', pretrained=True)
        self.encoder_head_dim = self.encoder.head.in_features
        self.encoder.head = nn.Identity()
        self.fc_latent = nn.Linear(self.encoder_head_dim, latent_dim)
        self.classifier = nn.Sequential(
            nn.ReLU(),
            nn.Linear(latent_dim, num_classes)
        )

    def forward(self, x):
        features = self.encoder(x)
        latent = self.fc_latent(features)
        output = self.classifier(latent)
        return output, latent

In [5]:
def train_classifier(model, dataloader, num_classes=31, n_epochs=10, lr=1e-3, weight_decay=1e-4):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=n_epochs)
    criterion = nn.CrossEntropyLoss()
    scaler = GradScaler()

    for epoch in range(n_epochs):
        model.train()
        total_loss = 0
        correct_predictions = 0
        total_samples = 0
        progress_bar = tqdm(dataloader, desc=f"Epoch [{epoch+1}/{n_epochs}]")

        for images, labels in progress_bar:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs, _ = model(images)
            loss = criterion(outputs, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct_predictions += (predicted == labels).sum().item()
            total_samples += labels.size(0)
            progress_bar.set_postfix({
                'Loss': f'{loss.item():.4f}',
                'Accuracy': f'{(correct_predictions / total_samples * 100):.2f}%'
            })

        scheduler.step()
        avg_loss = total_loss / len(dataloader)
        accuracy = correct_predictions / total_samples * 100
        print(f'Epoch [{epoch+1}/{n_epochs}], Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%')

In [6]:
def evaluate_classifier(model, dataloader):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()
    correct_predictions = 0
    total_samples = 0
    progress_bar = tqdm(dataloader, desc="Evaluating")

    with torch.no_grad():
        for images, labels in progress_bar:
            images, labels = images.to(device), labels.to(device)
            with autocast():
                outputs, _ = model(images)
            _, predicted = torch.max(outputs, 1)
            correct_predictions += (predicted == labels).sum().item()
            total_samples += labels.size(0)
            progress_bar.set_postfix({
                'Accuracy': f'{(correct_predictions / total_samples * 100):.2f}%'
            })

    accuracy = correct_predictions / total_samples * 100
    print(f'Accuracy on Target Domain: {accuracy:.2f}%')

In [7]:
transform_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

transform_eval = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [8]:
num_classes = 31
latent_dim = 1024
model = ImprovedModel(latent_dim=latent_dim, num_classes=num_classes)

source_dir = "/content/Office-31/amazon"
target_dir = "/content/Office-31/dslr"

dataset_source = datasets.ImageFolder(root=source_dir, transform=transform_train)
dataset_target = datasets.ImageFolder(root=target_dir, transform=transform_eval)

dataloader_source = DataLoader(dataset_source, batch_size=64, shuffle=True)
dataloader_target = DataLoader(dataset_target, batch_size=64, shuffle=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/88.2M [00:00<?, ?B/s]

In [9]:
train_classifier(model, dataloader_source, num_classes=num_classes, n_epochs=10, lr=1e-4, weight_decay=1e-4)
evaluate_classifier(model, dataloader_target)

  scaler = GradScaler()
Epoch [1/10]: 100%|██████████| 45/45 [00:52<00:00,  1.16s/it, Loss=2.3020, Accuracy=63.83%]


Epoch [1/10], Loss: 1.9667, Accuracy: 63.83%


Epoch [2/10]: 100%|██████████| 45/45 [00:45<00:00,  1.00s/it, Loss=0.1984, Accuracy=87.04%]


Epoch [2/10], Loss: 0.6053, Accuracy: 87.04%


Epoch [3/10]: 100%|██████████| 45/45 [00:45<00:00,  1.00s/it, Loss=0.0323, Accuracy=93.72%]


Epoch [3/10], Loss: 0.2879, Accuracy: 93.72%


Epoch [4/10]: 100%|██████████| 45/45 [00:45<00:00,  1.01s/it, Loss=0.0335, Accuracy=96.84%]


Epoch [4/10], Loss: 0.1558, Accuracy: 96.84%


Epoch [5/10]: 100%|██████████| 45/45 [00:44<00:00,  1.00it/s, Loss=0.0176, Accuracy=98.44%]


Epoch [5/10], Loss: 0.0872, Accuracy: 98.44%


Epoch [6/10]: 100%|██████████| 45/45 [00:45<00:00,  1.01s/it, Loss=0.0065, Accuracy=99.08%]


Epoch [6/10], Loss: 0.0588, Accuracy: 99.08%


Epoch [7/10]: 100%|██████████| 45/45 [00:47<00:00,  1.05s/it, Loss=0.0065, Accuracy=99.47%]


Epoch [7/10], Loss: 0.0385, Accuracy: 99.47%


Epoch [8/10]: 100%|██████████| 45/45 [00:45<00:00,  1.02s/it, Loss=0.0054, Accuracy=99.61%]


Epoch [8/10], Loss: 0.0333, Accuracy: 99.61%


Epoch [9/10]: 100%|██████████| 45/45 [00:45<00:00,  1.02s/it, Loss=0.0062, Accuracy=99.65%]


Epoch [9/10], Loss: 0.0239, Accuracy: 99.65%


Epoch [10/10]: 100%|██████████| 45/45 [00:45<00:00,  1.02s/it, Loss=0.0059, Accuracy=99.75%]


Epoch [10/10], Loss: 0.0238, Accuracy: 99.75%


  with autocast():
Evaluating: 100%|██████████| 8/8 [00:07<00:00,  1.11it/s, Accuracy=75.90%]

Accuracy on Target Domain: 75.90%



