In [3]:
import os
import numpy as np
import torch
import tensorflow as tf
from ultralytics import YOLO
from tensorflow.keras.applications import VGG16
from transformers import SwinForImageClassification
from torchvision import transforms
from PIL import Image
from tensorflow.keras import layers, models

# Paths to the dataset directories
train_dir = '/Users/rukmini/Documents/Project/newdata/reduced_train'
test_dir = '/Users/rukmini/Documents/Project/newdata/reduced_test'

# YOLO Model for Object Detection
def detect_objects(image_path, yolo_model):
    """Detect objects using YOLO and return cropped ROIs."""
    results = yolo_model(image_path)
    rois = []
    for result in results[0].boxes:
        x1, y1, x2, y2 = map(int, result.xyxy[0])
        rois.append((x1, y1, x2, y2))
    return rois

# VGG Model for Classification
def create_vgg_model(num_classes):
    """Build and return a VGG16-based classification model."""
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    base_model.trainable = False
    model = models.Sequential([
        base_model,
        layers.Flatten(),
        layers.Dense(256, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation='softmax')
    ])
    return model

# Swin Transformer for Classification
def create_swin_model(num_classes):
    """Load a Swin Transformer model for classification."""
    model = SwinForImageClassification.from_pretrained(
        "microsoft/swin-tiny-patch4-window7-224",
        num_labels=num_classes,
        ignore_mismatched_sizes=True
    )
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    return model, device

# Preprocess image for Swin Transformer
def preprocess_image_for_swin(image):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    return transform(image)

# Combined Prediction Pipeline
def predict_pipeline(image_path, yolo_model, vgg_model, swin_model, swin_device):
    """Run detection and classification pipeline on a single image."""
    # Detect objects using YOLO
    rois = detect_objects(image_path, yolo_model)
    image = Image.open(image_path).convert("RGB")
    
    predictions = []
    for roi in rois:
        x1, y1, x2, y2 = roi
        cropped_image = image.crop((x1, y1, x2, y2))
        
        # VGG Classification
        cropped_image_vgg = cropped_image.resize((224, 224))
        cropped_image_vgg = np.array(cropped_image_vgg) / 255.0
        cropped_image_vgg = np.expand_dims(cropped_image_vgg, axis=0)
        vgg_pred = np.argmax(vgg_model.predict(cropped_image_vgg))
        
        # Swin Classification
        cropped_image_swin = preprocess_image_for_swin(cropped_image)
        cropped_image_swin = cropped_image_swin.unsqueeze(0).to(swin_device)
        swin_outputs = swin_model(cropped_image_swin)
        swin_pred = torch.argmax(swin_outputs.logits, dim=1).item()
        
        # Combine predictions
        predictions.append((roi, vgg_pred, swin_pred))
    
    return predictions

# Initialize Models
yolo_model = YOLO("yolov8n.pt")  # Replace with your trained YOLO model
num_classes = len([d for d in os.listdir(train_dir) if os.path.isdir(os.path.join(train_dir, d))])
vgg_model = create_vgg_model(num_classes)
vgg_model.load_weights("vgg_ucf_model.h5")  # Load pre-trained weights
swin_model, swin_device = create_swin_model(num_classes)

# Test Combined Pipeline
test_image_path = '/Users/rukmini/Downloads/archive/test/class1/sample.png'
predictions = predict_pipeline(test_image_path, yolo_model, vgg_model, swin_model, swin_device)

# Display Results
for roi, vgg_pred, swin_pred in predictions:
    print(f"ROI: {roi}, VGG Prediction: {vgg_pred}, Swin Prediction: {swin_pred}")


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
Some weights of SwinForImageClassification were not initialized from the model checkpoint at microsoft/swin-tiny-patch4-window7-224 and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([14, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([14]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.





FileNotFoundError: /Users/rukmini/Downloads/archive/test/class1/sample.png does not exist