In [None]:
# Enable auto-reload for development
%load_ext autoreload
%autoreload 2

import sys

# Find the 'programming' folder in current path
from pathlib import Path

# Start with current working directory
current_path = Path.cwd()
print(f"Current working directory: {current_path}")

programming_root = next(
    (
        parent
        for parent in [current_path] + list(current_path.parents)
        if parent.name.lower() == 'programming'
    ),
    None,
)
if programming_root:
    print(f"✓ Found programming root: {programming_root}")
else:
    print("⚠ 'programming' folder not found in path")
    print(f"Path checked: {current_path}")

# Show the structure
if programming_root:
    print(f"\nRelative to programming root:")
else:
    # stop here if not found
    print("Cannot display structure without 'programming' root.")
    exit(1)

package_list = ("home-media-ai", "hydra-image-processor")
for package in package_list:
    package_path = programming_root / package / "src" / "python"
    if package_path.exists():
        sys.path.insert(0, str(package_path))
        print(f"Added {package} to sys.path: {package_path}")
    else:
        print(f" - {package} (not found)")

# Database
from sqlalchemy import create_engine, text
from sqlalchemy.orm import sessionmaker

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Image processing
import rawpy
import numpy as np
from skimage.transform import rescale

# Add project to path
package_root = Path().resolve().parent
if str(package_root) not in sys.path:
    sys.path.insert(0, str(package_root))

sys.path.insert(0, str(Path.cwd().parent))

import hydra_image_processor as Hydra

# Configure plotting
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')
%matplotlib inline


# README Code Examples - Testing

Below are all the code examples from the README. Run these cells to verify they work correctly.

## Test 1: Basic MediaQuery Usage

In [None]:
from home_media_ai import MediaQuery

print("=" * 60)
print("Test 1: Basic MediaQuery Usage")
print("=" * 60)

# Context manager (recommended) - auto-closes session
print("\n1a. Context manager with chained filters:")
with MediaQuery() as query:
    results = query.canon().raw().rating_min(4).year(2024).all()
    print(f"Found {len(results)} Canon RAW photos rated 4+ from 2024")
    for photo in results[:3]:  # Show first 3
        print(f"  - {photo.filename}: {photo.rating} stars")

# Manual session management
print("\n1b. Manual session management:")
query = MediaQuery()
results = query.dng().all()
print(f"Found {len(results)} DNG files")
query.close()  # Important: close when done

# Simple filter
print("\n1c. Simple filter (all RAW):")
with MediaQuery() as query:
    raw_files = query.raw().all()
    print(f"Found {len(raw_files)} RAW files total")

# Return as DataFrame
print("\n1d. Return as DataFrame:")
with MediaQuery() as query:
    df = query.rating_min(3).has_gps().limit(5).to_dataframe()
    print(f"DataFrame shape: {df.shape}")
    print(f"Columns: {list(df.columns)}")

# Get file paths only
print("\n1e. Get file paths only:")
with MediaQuery() as query:
    paths = query.jpeg().year(2024).limit(3).to_paths()
    print(f"Found {len(paths)} JPEG paths from 2024")

# Random sampling
print("\n1f. Random sampling:")
with MediaQuery() as query:
    samples = query.rating(4).random(3)
    print(f"Random sample of {len(samples)} 4-star photos")

# Statistics
print("\n1g. Statistics:")
with MediaQuery() as query:
    stats = query.canon().raw().stats()
    print(f"Canon RAW stats:")
    print(f"  - Count: {stats['count']}")
    print(f"  - Total size: {stats['total_size_mb']:.2f} MB")
    print(f"  - Avg rating: {stats['avg_rating']}")

print("\n✅ Test 1 PASSED\n")


## Test 2: Advanced Session Management

In [None]:
from home_media_ai import session_scope, get_session, Media, MediaQuery

print("=" * 60)
print("Test 2: Advanced Session Management")
print("=" * 60)

# Context manager with auto-commit/rollback
print("\n2a. session_scope() context manager:")
with session_scope() as session:
    results = session.query(Media).filter(Media.rating == 5).all()
    print(f"Found {len(results)} 5-star photos using session_scope()")

# Manual session management
print("\n2b. Manual get_session():")
session = get_session()
try:
    results = session.query(Media).limit(5).all()
    print(f"Retrieved {len(results)} media items using get_session()")
finally:
    session.close()

# Use with MediaQuery for custom session
print("\n2c. MediaQuery with custom session:")
with session_scope() as session:
    query = MediaQuery(session)
    results = query.rating(5).all()
    print(f"Found {len(results)} 5-star photos using MediaQuery with custom session")

print("\n✅ Test 2 PASSED\n")


## Test 3: Reading Media Files

In [None]:
from home_media_ai import MediaQuery, read_image_as_array

print("=" * 60)
print("Test 3: Reading Media Files")
print("=" * 60)

# Query and read a file
print("\n3a. Read single DNG file:")
with MediaQuery() as query:
    media = query.dng().first()

    if media:
        # Method 1: Use Media.read_as_array() convenience method
        img_array = media.read_as_array()
        print(f"Read {media.filename} using media.read_as_array()")
        print(f"  Shape: {img_array.shape}, dtype: {img_array.dtype}")

        # Method 2: Get path and read manually
        file_path = media.get_full_path()
        img_array2 = read_image_as_array(file_path)
        print(f"Read {media.filename} using read_image_as_array()")
        print(f"  Shape: {img_array2.shape}, dtype: {img_array2.dtype}")
    else:
        print("  No DNG files found in database")

# Reading Multiple Files
print("\n3b. Read multiple files:")
with MediaQuery() as query:
    images = query.canon().rating_min(4).limit(3).all()

    print(f"Reading {len(images)} Canon photos rated 4+:")
    for media in images:
        img = media.read_as_array()
        print(f"  - {media.filename}: {img.shape}, {img.dtype}")

print("\n✅ Test 3 PASSED\n")


## Test 4: Complete Example Workflow

In [None]:
from home_media_ai import MediaQuery

print("=" * 60)
print("Test 4: Complete Example Workflow")
print("=" * 60)

# Simple and clean - session management handled automatically
print("\nQuery and process Canon RAW photos from 2024:")
with MediaQuery() as query:
    # Query files
    photos = query.canon().raw().rating_min(4).year(2024).limit(5).all()

    print(f"Found {len(photos)} Canon RAW photos rated 4+ from 2024")

    # Process files
    for photo in photos:
        img = photo.read_as_array()
        print(f"  - {photo.filename}: {img.shape}")
    # Session auto-closes here

print("\n✅ Test 4 PASSED\n")


## Test 5: ML Integration Example

In [None]:
from home_media_ai import MediaQuery
import numpy as np

print("=" * 60)
print("Test 5: ML Integration Example")
print("=" * 60)

# Mock feature extraction function for demo
def extract_features(img):
    """Extract simple features from image (demo only)."""
    return {
        'mean_brightness': np.mean(img),
        'std_dev': np.std(img),
        'shape': img.shape
    }

# Extract features from rated images
print("\nExtracting features from rated images:")
with MediaQuery() as query:
    rated_media = query.rating_min(4).limit(3).all()

    features_list = []
    for media in rated_media:
        img = media.read_as_array()
        features = extract_features(img)
        features_list.append(features)
        print(f"  - {media.filename}:")
        print(f"      Shape: {features['shape']}")
        print(f"      Mean brightness: {features['mean_brightness']:.2f}")

print(f"\nExtracted features from {len(features_list)} images")
print("\n✅ Test 5 PASSED\n")


## ✅ All Tests Complete

Run all cells above to verify that all README examples work correctly.