In [1]:
pip install torch torchvision transformers opencv-python scikit-learn pillow datasets




In [None]:
import os
import cv2
from PIL import Image
from sklearn.model_selection import train_test_split
from torchvision import transforms

# Set directories
DATASET_DIR = "./datasets/raw"
PROCESSED_DIR = "./datasets/processed"

# Transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Preprocess images
def preprocess_images(input_dir, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for folder in os.listdir(input_dir):
        folder_path = os.path.join(input_dir, folder)
        output_folder = os.path.join(output_dir, folder)
        os.makedirs(output_folder, exist_ok=True)

        for img_file in os.listdir(folder_path):
            img_path = os.path.join(folder_path, img_file)
            try:
                img = Image.open(img_path).convert("RGB")
                img = transform(img)
                output_path = os.path.join(output_folder, img_file)
                img.save(output_path)
            except Exception as e:
                print(f"Error processing {img_path}: {e}")

preprocess_images(DATASET_DIR, PROCESSED_DIR)


In [None]:
from transformers import ViTForImageClassification, ViTFeatureExtractor
from torch.utils.data import DataLoader, Dataset
import torch
from torchvision import datasets

# Define ViT Model
model = ViTForImageClassification.from_pretrained(
    "google/vit-base-patch16-224-in21k",
    num_labels=2  # Binary classification: helmet or no-helmet
)

feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")


In [None]:
class HelmetDataset(Dataset):
    def __init__(self, image_paths, labels, feature_extractor):
        self.image_paths = image_paths
        self.labels = labels
        self.feature_extractor = feature_extractor

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert("RGB")
        label = self.labels[idx]
        inputs = self.feature_extractor(images=image, return_tensors="pt")
        return inputs["pixel_values"].squeeze(0), torch.tensor(label)

# Load data
train_data = HelmetDataset(train_image_paths, train_labels, feature_extractor)
val_data = HelmetDataset(val_image_paths, val_labels, feature_extractor)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
val_loader = DataLoader(val_data, batch_size=32)


In [None]:
from torch.optim import Adam
from torch.nn import CrossEntropyLoss
from transformers import get_scheduler

# Define optimizer and scheduler
optimizer = Adam(model.parameters(), lr=5e-5)
loss_fn = CrossEntropyLoss()

num_training_steps = len(train_loader) * num_epochs
lr_scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)

# Training loop
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for batch in train_loader:
        images, labels = batch
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)
        loss = loss_fn(outputs.logits, labels)
        total_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        lr_scheduler.step()

    print(f"Epoch {epoch + 1}, Loss: {total_loss / len(train_loader)}")


In [None]:
from sklearn.metrics import classification_report

model.eval()
all_preds, all_labels = [], []

with torch.no_grad():
    for batch in val_loader:
        images, labels = batch
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)
        preds = torch.argmax(outputs.logits, dim=-1)

        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

print(classification_report(all_labels, all_preds))


In [None]:
# Save the model
model.save_pretrained("./models/helmet_vit")
feature_extractor.save_pretrained("./models/helmet_vit")

# Load for inference
from transformers import pipeline

helmet_detector = pipeline("image-classification", model="./models/helmet_vit")
result = helmet_detector("./test_image.jpg")
print(result)
