In [1]:
# Check current PyTorch installation
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda if torch.cuda.is_available() else 'Not available'}")

if torch.cuda.is_available():
    print(f"Number of GPUs: {torch.cuda.device_count()}")
    for i in range(torch.cuda.device_count()):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
else:
    print("CUDA is not available - need to install CUDA-enabled PyTorch")

PyTorch version: 2.4.0+cpu
CUDA available: False
CUDA version: Not available
CUDA is not available - need to install CUDA-enabled PyTorch


In [None]:
# Uninstall current CPU-only PyTorch
import subprocess
import sys

print("Uninstalling CPU-only PyTorch...")
result = subprocess.run([sys.executable, '-m', 'pip', 'uninstall', 'torch', 'torchvision', 'torchaudio', '-y'], 
                       capture_output=True, text=True)
print(result.stdout)

print("\nInstalling CUDA-enabled PyTorch...")
# Install PyTorch with CUDA 12.1 support (compatible with your CUDA 12.7 driver)
result = subprocess.run([sys.executable, '-m', 'pip', 'install', 'torch', 'torchvision', 'torchaudio', 
                        '--index-url', 'https://download.pytorch.org/whl/cu121'], 
                       capture_output=True, text=True)
print(result.stdout)
if result.stderr:
    print("Errors:", result.stderr)

print("Installation complete! Please restart your kernel and run the verification cell.")

Uninstalling CPU-only PyTorch...
Found existing installation: torch 2.4.0
Uninstalling torch-2.4.0:
  Successfully uninstalled torch-2.4.0
Found existing installation: torchvision 0.19.0
Uninstalling torchvision-0.19.0:
  Successfully uninstalled torchvision-0.19.0


Installing CUDA-enabled PyTorch...
Looking in indexes: https://download.pytorch.org/whl/cu121
Collecting torch
  Using cached https://download.pytorch.org/whl/cu121/torch-2.5.1%2Bcu121-cp311-cp311-win_amd64.whl (2449.4 MB)
Collecting torchvision
  Using cached https://download.pytorch.org/whl/cu121/torchvision-0.20.1%2Bcu121-cp311-cp311-win_amd64.whl (6.1 MB)
Collecting torchaudio
  Using cached https://download.pytorch.org/whl/cu121/torchaudio-2.5.1%2Bcu121-cp311-cp311-win_amd64.whl (4.1 MB)
Collecting sympy==1.13.1 (from torch)
  Using cached https://download.pytorch.org/whl/sympy-1.13.1-py3-none-any.whl (6.2 MB)
Installing collected packages: sympy, torch, torchvision, torchaudio

  Attempting uninstall: sympy

    Foun

: 

In [2]:
# RESTART KERNEL FIRST, then run this cell
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda if torch.cuda.is_available() else 'Not available'}")

if torch.cuda.is_available():
    print(f"Number of GPUs: {torch.cuda.device_count()}")
    print(f"Current device: {torch.cuda.current_device()}")
    for i in range(torch.cuda.device_count()):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
        props = torch.cuda.get_device_properties(i)
        print(f"  Memory: {props.total_memory // 1024**3} GB")
        print(f"  Compute Capability: {props.major}.{props.minor}")
        
    # Test CUDA functionality
    print("\n🧪 Testing CUDA functionality...")
    x = torch.randn(1000, 1000).cuda()
    y = torch.randn(1000, 1000).cuda()
    z = torch.matmul(x, y)
    print(f"✅ CUDA test successful! Result shape: {z.shape}")
else:
    print("❌ CUDA is still not available")

PyTorch version: 2.5.1+cu121
CUDA available: True
CUDA version: 12.1
Number of GPUs: 1
Current device: 0
GPU 0: NVIDIA GeForce RTX 3060 Laptop GPU
  Memory: 5 GB
  Compute Capability: 8.6

🧪 Testing CUDA functionality...
✅ CUDA test successful! Result shape: torch.Size([1000, 1000])


In [None]:
import os
import random
import shutil

def sample_and_split_images(source_dir, output_dir, sample_size=1000, train_ratio=0.85):
    """
    Randomly sample images from a directory and split them into train and test sets.

    Args:
        source_dir (str): Path to the directory containing the images.
        output_dir (str): Path to the output directory where train/test sets will be saved.
        sample_size (int): Number of images to sample.
        train_ratio (float): Ratio of images to use for training (default: 0.85).
    """
    # Ensure the source directory exists
    if not os.path.exists(source_dir):
        raise FileNotFoundError(f"Source directory '{source_dir}' does not exist.")

    # Get all .jpg files in the source directory
    all_images = [f for f in os.listdir(source_dir) if f.lower().endswith('.jpg')]
    if len(all_images) < sample_size:
        raise ValueError(f"Not enough images in the source directory. Found {len(all_images)}, need {sample_size}.")

    # Randomly sample 10,000 images
    sampled_images = random.sample(all_images, sample_size)

    # Split into train and test sets
    train_size = int(sample_size * train_ratio)
    train_images = sampled_images[:train_size]
    test_images = sampled_images[train_size:]

    # Create output directories
    train_dir = os.path.join(output_dir, 'train_big')
    test_dir = os.path.join(output_dir, 'test_big')
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    # Copy images to train directory
    for img in train_images:
        shutil.copy(os.path.join(source_dir, img), os.path.join(train_dir, img))

    # Copy images to test directory
    for img in test_images:
        shutil.copy(os.path.join(source_dir, img), os.path.join(test_dir, img))

    print(f"✅ Successfully sampled and split images.")
    print(f"Train set: {len(train_images)} images -> {train_dir}")
    print(f"Test set: {len(test_images)} images -> {test_dir}")


In [None]:
# Example usage
source_directory = r"D:\Downloads\VIRAC\VIRAC-OCR\image_crawl\archive"
output_directory = r"D:\Downloads\VIRAC\VIRAC-OCR\image_crawl"
sample_and_split_images(source_directory, output_directory)

✅ Successfully sampled and split images.
Train set: 850 images -> D:\Downloads\VIRAC\VIRAC-OCR\image_crawl\train_big
Test set: 150 images -> D:\Downloads\VIRAC\VIRAC-OCR\image_crawl\test_big
