# tool_unit_test.ipynb — Focused Unit Tests (No External Side Effects) ✨

This notebook provides focused, unit-level tests for the core utilities in this repository.
It emphasizes dependency injection (e.g., for file copying) and avoids external side effects
such as database writes. The goal is to validate behavior quickly and locally.

Guidelines:
- Keep tests deterministic and fast.
- Avoid network/DB side effects.
- Use temporary directories/files.
- Use dependency injection (e.g., copier) for IO seams.

Note: Database-related utilities are intentionally not exercised here. Consider separate,
environment-driven integration tests for those paths. 🛡️


In [1]:
# Environment and imports
from __future__ import annotations

import sys
import json
import logging
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
import tempfile
import shutil

# Set project root for imports
PROJECT_ROOT = Path.cwd()
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

# Configure logging for test visibility
logging.basicConfig(level=logging.INFO, format='[%(levelname)s] %(message)s')

# Imports from the code-under-test
from core_engine.image_similarity_system.utils import (
    get_image_metadata,
    image_path_generator,
    save_similar_images_to_folder,
    convert_numpy_to_native_python,
)

# Image creation helper (uses PIL for simplicity; fallback to OpenCV if needed)
def create_temp_image(path: Union[str, Path], size: Tuple[int, int] = (64, 48), color=(255, 0, 0)) -> None:
    """Create a simple RGB image for testing.

    Args:
        path (Union[str, Path]): Destination path.
        size (Tuple[int, int]): (width, height).
        color (tuple): RGB color tuple.
    """
    try:
        from PIL import Image
        img = Image.new('RGB', size, color)
        img.save(str(path))
    except Exception:
        # Fallback to OpenCV if PIL is unavailable
        import numpy as np
        import cv2
        arr = np.zeros((size[1], size[0], 3), dtype=np.uint8)
        arr[:, :] = (color[2], color[1], color[0])  # BGR
        cv2.imwrite(str(path), arr)


[INFO] Loading faiss with AVX2 support.
[INFO] Successfully loaded faiss with AVX2 support.


## Test 1: get_image_metadata — non-existent and valid image paths 📐


In [2]:
# Non-existent path should return safe defaults
meta_missing = get_image_metadata('this_file_does_not_exist.png')
assert isinstance(meta_missing, dict)
assert meta_missing['dimensions_str'] == 'N/A'
assert meta_missing['size_bytes'] == -1
print('Missing file metadata:', meta_missing)

# Valid image should return dimensions and size
with tempfile.TemporaryDirectory() as td:
    tmp_dir = Path(td)
    img_path = tmp_dir / 'test_image.png'
    create_temp_image(img_path, size=(80, 60))

    meta = get_image_metadata(str(img_path))
    assert meta['dimensions_str'] in ('80 x 60', '60 x 80'), 'Unexpected dimensions string'
    assert meta['size_bytes'] > 0
    print('Valid file metadata:', meta)




Missing file metadata: {'dimensions_str': 'N/A', 'size_bytes': -1}
Valid file metadata: {'dimensions_str': '80 x 60', 'size_bytes': 150}


## Test 2: save_similar_images_to_folder — dependency injection for copier 📦
Use the new `copier` parameter to avoid real filesystem copies and validate behavior.


In [3]:
# Fake copier that records calls but does not perform IO
copier_calls: List[Tuple[Path, Path]] = []
def fake_copier(src: Union[str, Path], dst: Union[str, Path]) -> None:
    copier_calls.append((Path(src), Path(dst)))

config = {
    'search_task': {
        'copy_query_image_to_output': True,
        'copy_similar_images_to_output': True,
        'save_search_summary_json': True,
        'top_k': 2,
        'save_query_in_separate_subfolder_if_copied': True,
    }
}

with tempfile.TemporaryDirectory() as td:
    tmp_dir = Path(td)
    q_img = tmp_dir / 'query.png'
    s1 = tmp_dir / 'similar_1.png'
    s2 = tmp_dir / 'similar_2.png'
    out_dir = tmp_dir / 'out'
    create_temp_image(q_img, (50, 40))
    create_temp_image(s1, (30, 30))
    
    create_temp_image(s2, (30, 30))

    results = [(str(s1), 0.91), (str(s2), 0.89)]

    mapping, json_path, summary, missing_count = save_similar_images_to_folder(
        search_results=results,
        output_folder=out_dir,
        query_image_path=q_img,
        config_for_summary=config,
        search_method_actually_used='unit-test',
        model_name_used='unit-model',
        total_search_time_seconds=0.01,
        json_filename='summary.json',
        copier=fake_copier,
    )

    # Assertions: copier was invoked 1 (query) + 2 (results) = 3 times
    assert len(copier_calls) == 3, f'Expected 3 copy calls, got {len(copier_calls)}'
    # Summary file should be written
    assert json_path is not None and json_path.name == 'summary.json'
    assert isinstance(summary, dict)
    assert missing_count == 0
    # Mapping includes the query and both results
    assert any('query.png' in k for k in mapping.keys())
    assert sum('similar_' in k for k in mapping.keys()) == 2

    print('Copier calls:', copier_calls)
    print('JSON summary path:', json_path)
    print('Mapping count:', len(mapping), 'Missing files:', missing_count)


Copier calls: [(WindowsPath('C:/Users/jeeb/AppData/Local/Temp/tmpr7qkr35t/query.png'), WindowsPath('C:/Users/jeeb/AppData/Local/Temp/tmpr7qkr35t/out/_query_image_source/query.png')), (WindowsPath('C:/Users/jeeb/AppData/Local/Temp/tmpr7qkr35t/similar_1.png'), WindowsPath('C:/Users/jeeb/AppData/Local/Temp/tmpr7qkr35t/out/rank_001_similar_1.png')), (WindowsPath('C:/Users/jeeb/AppData/Local/Temp/tmpr7qkr35t/similar_2.png'), WindowsPath('C:/Users/jeeb/AppData/Local/Temp/tmpr7qkr35t/out/rank_002_similar_2.png'))]
JSON summary path: C:\Users\jeeb\AppData\Local\Temp\tmpr7qkr35t\out\summary.json
Mapping count: 3 Missing files: 0


## Test 3: image_path_generator — extension filtering 🖼️


In [8]:
with tempfile.TemporaryDirectory() as td:
    p = Path(td)
    # Create valid and invalid files
    valid1 = p / 'a.jpg'
    valid2 = p / 'b.PNG'
    invalid = p / 'c.txt'

    create_temp_image(valid1, (5, 5))
    create_temp_image(valid2, (5, 5))
    invalid.write_text('not an image', encoding='utf-8')

    found = list(image_path_generator(p, scan_subfolders=True))
    # Case-insensitive extension filtering
    names = {f.name.lower() for f in found}
    assert 'a.jpg' in names and 'b.png' in names
    assert 'c.txt' not in names
    print('Found images:', names)

Found images: {'b.png', 'a.jpg'}


## Test 4: convert_numpy_to_native_python — JSON serializability 🔁


In [11]:
import numpy as np
payload = {
    'a': np.float32(1.5),
    'b': np.int32(7),
    'c': np.array([1, 2, 3], dtype=np.int16),
    'd': [np.float64(2.5), np.bool_(True)],
}
converted = convert_numpy_to_native_python(payload)
# Should be JSON serializable
json.dumps(converted)
assert isinstance(converted['a'], float)
assert isinstance(converted['b'], int)
assert isinstance(converted['c'], list)
assert isinstance(converted['d'][1], bool)
print('Conversion succeeded and is JSON-serializable.')
print ('converted json:', converted)


Conversion succeeded and is JSON-serializable.
converted json: {'a': 1.5, 'b': 7, 'c': [1, 2, 3], 'd': [2.5, True]}


### Notes
- Database-related functions (e.g., save_results_to_postgresql, save_tif_search_results_to_postgresql)
  are intentionally not executed here to avoid external side effects and environment coupling.
- Prefer running those paths in a separate integration test notebook or a pytest suite configured
  with a test database. 🧪
