1. Import Libraries

In [2]:
import os
import re
import pandas as pd
from PIL import Image
from tqdm import tqdm

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from transformers import ViTForImageClassification, ViTFeatureExtractor


  from .autonotebook import tqdm as notebook_tqdm


2. Define Custom Dataset

In [3]:
class CarDamageDataset(Dataset):
    def __init__(self, image_dir, label_csv, transform=None):
        self.image_dir = image_dir
        self.labels = pd.read_csv(label_csv)
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        img_name = self.labels.iloc[idx, 0]  
        label = self.labels.iloc[idx, 1]     

        image_path = os.path.join(self.image_dir, img_name)
        image = Image.open(image_path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        return image, torch.tensor(label), img_name

3. Prepare DataLoader

In [4]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

train_dataset = CarDamageDataset(
    image_dir='data/train/images',
    label_csv='data/train/train.csv',
    transform=transform
)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)


4. Load Pretrained ViT Model

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = ViTForImageClassification.from_pretrained(
    'google/vit-base-patch16-224',
    num_labels=3  # Update this to match number of classes
)
model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)
loss_fn = torch.nn.CrossEntropyLoss()

RuntimeError: Error(s) in loading state_dict for Linear:
	size mismatch for bias: copying a param with shape torch.Size([1000]) from checkpoint, the shape in current model is torch.Size([3]).

 5. Training Loop with Error Handling

In [None]:
def extract_number(filename):
    matches = re.findall(r'\d+', filename)
    return int(matches[0]) if matches else None

epochs = 5
for epoch in range(epochs):
    model.train()
    total_loss = 0

    for images, labels, filenames in tqdm(train_loader):
        try:
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = loss_fn(outputs.logits, labels)

          

6. Model Evaluation

In [None]:
# 🧪 Model Evaluation on Test Set

test_dataset = CarDamageDataset(
    image_dir='data/test/images',
    label_csv='data/test/test.csv',
    transform=transform
)

test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels, _ in tqdm(test_loader):
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.logits, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"✅ Test Accuracy: {accuracy:.2f}%")

7. Save the Model

In [None]:
MODEL_PATH = "car_dent_transformer.pth"
torch.save(model.state_dict(), MODEL_PATH)
print(f"✅ Model saved to {MODEL_PATH}")
