<a href="https://colab.research.google.com/github/ayagup/stablediffusion/blob/main/image_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# %pip install torch>=2.0.0 transformers>=4.35.0 accelerate>=0.24.0 Pillow>=10.0.0 requests>=2.31.0 numpy<2.0.0 timm>=0.9.0


/bin/bash: line 1: 2.0.0: No such file or directory
Note: you may need to restart the kernel to use updated packages.


In [None]:
# %pip install torch transformers accelerate Pillow opencv-python matplotlib requests "numpy<2.0.0" timm

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [None]:
"""
Simple Image Classification Example
Quick test for image classification using Hugging Face models
"""

import torch
from transformers import AutoImageProcessor, AutoModelForImageClassification
from PIL import Image
import os
import gc

# Suppress warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# Memory optimization
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'


def simple_image_classification(
    image_path: str,
    model_name: str = "google/vit-base-patch16-224",
    top_k: int = 5,
):
    """
    Classify an image using pre-trained models

    Args:
        image_path: Path to input image (local file or URL)
        model_name: HuggingFace model name
        top_k: Number of top predictions to return

    Returns:
        List of (label, score) tuples
    """

    # Clear GPU memory from any previous runs
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        gc.collect()
        print("üßπ Cleared GPU memory cache\n")

    # Check GPU
    device = "cuda" if torch.cuda.is_available() else "cpu"
    num_gpus = torch.cuda.device_count() if device == "cuda" else 0

    print(f"\n{'='*60}")
    print("Simple Image Classification")
    print(f"{'='*60}")
    print(f"Device: {device}")
    if num_gpus > 0:
        print(f"GPUs: {num_gpus}")
        for i in range(num_gpus):
            print(f"  GPU {i}: {torch.cuda.get_device_name(i)}")
    print(f"{'='*60}\n")

    # Load image
    print(f"Loading image: {image_path}")
    if image_path.startswith('http://') or image_path.startswith('https://'):
        # Load from URL
        import requests
        from io import BytesIO
        response = requests.get(image_path)
        image = Image.open(BytesIO(response.content)).convert('RGB')
    else:
        image = Image.open(image_path).convert('RGB')

    print(f"Image size: {image.size[0]}x{image.size[1]}")

    # Load model and processor
    print(f"\nLoading model: {model_name}")
    print("(This will download the model on first run)\n")

    processor = AutoImageProcessor.from_pretrained(model_name)
    model = AutoModelForImageClassification.from_pretrained(model_name)

    # Move to GPU with memory optimization
    if device == "cuda":
        if num_gpus > 1:
            print(f"üöÄ Multi-GPU mode: distributing across {num_gpus} GPUs")
            model = torch.nn.DataParallel(model)
            model = model.to(device)
        else:
            print(f"‚ö° Single GPU mode")
            model = model.to(device)
    else:
        model = model.to(device)

    model.eval()

    print("Classifying image...")

    # Prepare inputs
    inputs = processor(images=image, return_tensors="pt")
    inputs = {k: v.to(device) for k, v in inputs.items()}

    # Inference
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits

    # Get predictions
    probabilities = torch.nn.functional.softmax(logits, dim=-1)
    top_probs, top_indices = torch.topk(probabilities, top_k)

    # Get labels
    top_probs = top_probs.cpu().numpy()[0]
    top_indices = top_indices.cpu().numpy()[0]

    results = []
    print(f"\n{'='*60}")
    print(f"Top {top_k} Predictions:")
    print(f"{'='*60}")

    for i, (idx, prob) in enumerate(zip(top_indices, top_probs), 1):
        label = model.module.config.id2label[idx] if hasattr(model, 'module') else model.config.id2label[idx]
        results.append((label, prob))

        # Format probability as percentage
        confidence = prob * 100
        bar_length = int(confidence / 2)  # Scale to 50 chars max
        bar = "‚ñà" * bar_length + "‚ñë" * (50 - bar_length)

        print(f"{i}. {label}")
        print(f"   {bar} {confidence:.2f}%\n")

    print(f"{'='*60}\n")

    return results




2025-10-18 18:39:43.965227: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1760812784.159675      37 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1760812784.221188      37 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [None]:
# if __name__ == "__main__":
#     import argparse

#     parser = argparse.ArgumentParser(description='Simple Image Classification')
#     parser.add_argument('--image', type=str, required=True,
#                         help='Path to input image or URL')
#     parser.add_argument('--model', type=str, default='google/vit-base-patch16-224',
#                         help='Model name (see README for options)')
#     parser.add_argument('--top-k', type=int, default=5,
#                         help='Number of top predictions to show')

#     args = parser.parse_args()

try:
    simple_image_classification(
        image_path='https://eskipaper.com/images/dogs-20.jpg',
        model_name='google/vit-base-patch16-224',
        top_k=5,
    )
except Exception as e:
    print(f"\n‚ùå Error: {e}")
    import traceback
    traceback.print_exc()


üßπ Cleared GPU memory cache


Simple Image Classification
Device: cuda
GPUs: 1
  GPU 0: Tesla P100-PCIE-16GB

Loading image: https://eskipaper.com/images/dogs-20.jpg
Image size: 1366x768

Loading model: google/vit-base-patch16-224
(This will download the model on first run)



preprocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

‚ö° Single GPU mode
Classifying image...

Top 5 Predictions:
1. collie
   ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë 68.73%

2. Shetland sheepdog, Shetland sheep dog, Shetland
   ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë 29.59%

3. Border collie
   ‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë 0.92%

4. groenendael
   ‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë 0.08%

5. Australian terrier
   ‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë 0.05%


