In [2]:
import torch
from torchvision.datasets import ImageFolder
from PIL import Image
import os
from pathlib import Path
from transformers import ViTForImageClassification, ViTImageProcessor

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
device="cuda" if torch.cuda.is_available() else "cpu"

# Load the trained model
def load_trained_model(model_path, num_classes):
    model = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224",num_labels=num_classes,ignore_mismatched_sizes=True)
    model.load_state_dict(torch.load(model_path))
    model.to(device)
    model.eval()
    return model

In [6]:
vit_processors = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224")



In [4]:
# Get class names from training dataset
train_dataset = ImageFolder(root=r"C:\Users\Ahmed Pasha\OneDrive\Desktop\garbage\data\train")
class_names = train_dataset.classes

# Load the best model
model = load_trained_model(r"C:\Users\Ahmed Pasha\OneDrive\Desktop\garbage\src\models\best_vit_model.pth", len(class_names))

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([10]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([10, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
def vit_transformers(image):
    """Transform function for ViT model"""
    # Process the image using ViT processor
    inputs = vit_processors(images=image, return_tensors="pt")
    return inputs['pixel_values'].squeeze(0)

In [8]:
# ==================== SINGLE IMAGE PREDICTION ====================
def predict_single_image(image_path, model, transform, class_names):
    # Load and transform image
    image = Image.open(image_path).convert("RGB")
    image_tensor = transform(image).unsqueeze(0)  # Add batch dimension
    image_tensor = image_tensor.to(device)
    
    # Make prediction
    with torch.no_grad():
        output = model(pixel_values=image_tensor)
        logits = output.logits
        probabilities = torch.nn.functional.softmax(logits, dim=1)
        confidence, predicted_idx = torch.max(probabilities, 1)
    
    predicted_class = class_names[predicted_idx.item()]
    confidence_score = confidence.item()
    
    return predicted_class, confidence_score

# Example usage for single image
single_image_path = r"C:\Users\Ahmed Pasha\OneDrive\Desktop\garbage\data\test\biological\biological_7.jpg"
predicted_class, confidence = predict_single_image(single_image_path, model, vit_transformers, class_names)
print(f"Predicted Class: {predicted_class}")
print(f"Confidence: {confidence:.4f} ({confidence*100:.2f}%)")




Predicted Class: biological
Confidence: 0.9997 (99.97%)


In [None]:
# ==================== FOLDER PREDICTION ====================
def predict_folder(folder_path, model, transform, class_names, save_results=True):
    """
    Predict classes for all images in a folder
    
    Args:
        folder_path: Path to folder containing images
        model: Trained model
        transform: Image transformation pipeline
        class_names: List of class names
        save_results: Whether to save results to CSV
    
    Returns:
        results: List of dictionaries with predictions
    """
    results = []
    image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.gif']
    
    # Get all image files in folder
    image_files = []
    for ext in image_extensions:
        image_files.extend(Path(folder_path).glob(f"*{ext}"))
        image_files.extend(Path(folder_path).glob(f"*{ext.upper()}"))
    
    print(f"Found {len(image_files)} images in folder")
    
    for img_path in image_files:
        try:
            # Predict
            predicted_class, confidence = predict_single_image(
                str(img_path), model, transform, class_names
            )
            
            result = {
                'image_name': img_path.name,
                'predicted_class': predicted_class,
                'confidence': confidence
            }
            results.append(result)
            
            print(f"{img_path.name}: {predicted_class} ({confidence*100:.2f}%)")
            
        except Exception as e:
            print(f"Error processing {img_path.name}: {str(e)}")
    
    # Save results to CSV
    if save_results and results:
        import pandas as pd
        df = pd.DataFrame(results)
        output_path = os.path.join(folder_path, "predictions.csv")
        df.to_csv(output_path, index=False)
        print(f"\nâœ… Results saved to: {output_path}")
    
    return results

# Example usage for folder
test_folder_path = r"C:\Users\Ahmed Pasha\OneDrive\Desktop\garbage\data\test"
results = predict_folder(test_folder_path, model, vit_transformers, class_names)

# Print summary
print("\n" + "="*50)
print("PREDICTION SUMMARY")
print("="*50)
class_counts = {}
for result in results:
    class_name = result['predicted_class']
    class_counts[class_name] = class_counts.get(class_name, 0) + 1

for class_name, count in class_counts.items():
    print(f"{class_name}: {count} images")


# ==================== BONUS: BATCH PREDICTION FOR FASTER PROCESSING ====================
def predict_folder_batch(folder_path, model, transform, class_names, batch_size=32):
    """
    Predict classes for folder images using batched processing (faster)
    """
    from torch.utils.data import Dataset, DataLoader
    
    class ImageDataset(Dataset):
        def __init__(self, image_paths, transform):
            self.image_paths = image_paths
            self.transform = transform
        
        def __len__(self):
            return len(self.image_paths)
        
        def __getitem__(self, idx):
            img_path = self.image_paths[idx]
            image = Image.open(img_path).convert("RGB")
            image_tensor = self.transform(image)
            return image_tensor, img_path.name
    
    # Get all image files
    image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.gif']
    image_files = []
    for ext in image_extensions:
        image_files.extend(Path(folder_path).glob(f"*{ext}"))
        image_files.extend(Path(folder_path).glob(f"*{ext.upper()}"))
    
    dataset = ImageDataset(image_files, transform)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    
    results = []
    model.eval()
    
    with torch.no_grad():
        for images, image_names in loader:
            images = images.to(device)
            output = model(pixel_values=images)
            logits = output.logits
            probabilities = torch.nn.functional.softmax(logits, dim=1)
            confidences, predicted_indices = torch.max(probabilities, 1)
            
            for i in range(len(image_names)):
                result = {
                    'image_name': image_names[i],
                    'predicted_class': class_names[predicted_indices[i].item()],
                    'confidence': confidences[i].item()
                }
                results.append(result)
                print(f"{image_names[i]}: {result['predicted_class']} ({result['confidence']*100:.2f}%)")
    
    return results

# Example usage for batch prediction (faster)
# results_batch = predict_folder_batch(test_folder_path, model, vit_transformers, class_names, batch_size=32)

Found 0 images in folder

PREDICTION SUMMARY
