# core

> Core utilities for file discovery and generic operations

In [None]:
#| default_exp core

In [None]:
#| export
import re
from pathlib import Path

In [None]:
#| exporti
def _find_files_recursive(
    dirs: list[str] | list[Path], # List of directories to search recursively
    exts: list[str],                # List of file extensions (e.g., [".jpg", ".png", ".mp3"])
    prefix: str = ""                      # File name prefix to match (optional, empty string matches all)
) -> list[str]:                          # List of file paths matching the criteria
    """Recursively search directories for files with specified extensions and prefix.
    
    Searches each directory in search_dirs recursively and returns all files
    that match the given extensions and prefix.
    
    Examples:
        >>> _find_files_recursive(["images"], [".jpg", ".png"])
        ['images/05_Keho.jpg', 'images/05_Kasvot.jpg', ...]
        
        >>> _find_files_recursive(["images", "audio"], [".jpg", ".mp3"], prefix="05_")
        ['images/05_Keho.jpg', 'audio/05_Keho_00.mp3', ...]
    """
    # Compile regex pattern for extensions (case-insensitive)
    ext_pattern = "|".join(re.escape(ext) for ext in exts)
    pattern = re.compile(f"^{re.escape(prefix)}.*({ext_pattern})$", re.IGNORECASE)
    
    found = []    
    for d in dirs:
        cur = Path(d)
        if not cur.exists(): continue
        for f in cur.rglob('*'):
            if not f.is_file(): continue
            if pattern.search(f.name):
                found.append(str(f))
    
    return sorted(found)

In [None]:
#| export
def ffr(
    dirs: list[str] | list[Path], # List of directories to search recursively
    exts: list[str],               # List of file extensions (e.g., [".jpg", ".png", ".mp3"])
    prefix: str = ""               # File name prefix to match (optional, empty string matches all)
) -> list[str]:                    # List of file paths matching the criteria
    """Recursively search directories for files with specified extensions and prefix.
    
    Short alias for find_files_recursive. Searches each directory recursively
    and returns all files matching the given extensions and prefix.
    
    Examples:
        >>> ffr(["images"], [".jpg", ".png"])
        ['images/05_Keho.jpg', 'images/05_Kasvot.jpg', ...]
        
        >>> ffr(["images", "audio"], [".jpg", ".mp3"], prefix="05_")
        ['images/05_Keho.jpg', 'audio/05_Keho_00.mp3', ...]
    """
    return _find_files_recursive(dirs, exts, prefix)

In [None]:
#| eval: false
from suomi.core import *

In [None]:
ffr(["images"], [".jpg", ".png"])

[]

In [None]:
#| export
def cattxt(fn: str) -> list[str]:
    """Read text file and return list of lines (without newlines)."""
    with open(fn, encoding="utf-8") as f:
        return f.read().splitlines()

#cattxt("lääkarissä_kasvot.txt")[:4]

## Tests

In [None]:
#| test
# Test: ffr finds files with specified extensions
import tempfile
from pathlib import Path

with tempfile.TemporaryDirectory() as tmpdir:
    # Create test files
    test_dir = Path(tmpdir) / "test"
    test_dir.mkdir()
    (test_dir / "file1.jpg").touch()
    (test_dir / "file2.png").touch()
    (test_dir / "file3.mp3").touch()
    (test_dir / "file4.txt").touch()
    
    # Test finding image files
    result = ffr([str(test_dir)], [".jpg", ".png"])
    assert len(result) == 2, f"Expected 2 files, got {len(result)}"
    assert any("file1.jpg" in r for r in result), "file1.jpg not found"
    assert any("file2.png" in r for r in result), "file2.png not found"
    
    # Test finding audio files
    result = ffr([str(test_dir)], [".mp3"])
    assert len(result) == 1, f"Expected 1 file, got {len(result)}"
    assert any("file3.mp3" in r for r in result), "file3.mp3 not found"

print("✓ ffr extension filtering test passed")

✓ ffr extension filtering test passed


In [None]:
#| test
# Test: ffr handles non-existent directories gracefully
import tempfile
from pathlib import Path

with tempfile.TemporaryDirectory() as tmpdir:
    existing_dir = Path(tmpdir) / "exists"
    non_existent_dir = Path(tmpdir) / "does_not_exist"
    existing_dir.mkdir()
    (existing_dir / "file.jpg").touch()

    # Test with mix of existing and non-existing directories
    result = ffr([str(existing_dir), str(non_existent_dir)], [".jpg"])
    assert len(result) == 1, f"Expected 1 file (non-existent dir should be skipped), got {len(result)}"
    assert any("file.jpg" in r for r in result), "file.jpg not found"

print("✓ ffr non-existent directory handling test passed")

✓ ffr non-existent directory handling test passed


In [None]:
#| test
# Test: ffr handles case-insensitive extensions
import tempfile
from pathlib import Path

with tempfile.TemporaryDirectory() as tmpdir:
    test_dir = Path(tmpdir) / "test"
    test_dir.mkdir()
    (test_dir / "photo.JPG").touch()
    (test_dir / "image.Png").touch()
    (test_dir / "sound.MP3").touch()

    # Test case-insensitive matching
    result = ffr([str(test_dir)], [".jpg", ".png", ".mp3"])
    assert len(result) == 3, f"Expected 3 files (case-insensitive), got {len(result)}"
    assert any("photo.JPG" in r for r in result), "photo.JPG not found"
    assert any("image.Png" in r for r in result), "image.Png not found"
    assert any("sound.MP3" in r for r in result), "sound.MP3 not found"

print("✓ ffr case-insensitive extension test passed")

✓ ffr case-insensitive extension test passed


In [None]:
#| test
# Test: ffr searches recursively in subdirectories
import tempfile
from pathlib import Path

with tempfile.TemporaryDirectory() as tmpdir:
    test_dir = Path(tmpdir) / "test"
    sub_dir = test_dir / "subdir" / "nested"
    sub_dir.mkdir(parents=True)
    (test_dir / "root.jpg").touch()
    (sub_dir / "deep.jpg").touch()

    # Test recursive search
    result = ffr([str(test_dir)], [".jpg"])
    assert len(result) == 2, f"Expected 2 files (root + nested), got {len(result)}"
    assert any("root.jpg" in r for r in result), "root.jpg not found"
    assert any("deep.jpg" in r for r in result), "deep.jpg in nested dir not found"

print("✓ ffr recursive search test passed")

✓ ffr recursive search test passed


In [None]:
#| test
# Test: ffr searches multiple directories
import tempfile
from pathlib import Path

with tempfile.TemporaryDirectory() as tmpdir:
    dir1 = Path(tmpdir) / "images"
    dir2 = Path(tmpdir) / "audio"
    dir1.mkdir()
    dir2.mkdir()
    (dir1 / "photo.jpg").touch()
    (dir2 / "sound.mp3").touch()

    # Test searching both directories
    result = ffr([str(dir1), str(dir2)], [".jpg", ".mp3"])
    assert len(result) == 2, f"Expected 2 files from both directories, got {len(result)}"
    assert any("photo.jpg" in r for r in result), "photo.jpg not found"
    assert any("sound.mp3" in r for r in result), "sound.mp3 not found"

print("✓ ffr multiple directory test passed")

✓ ffr multiple directory test passed


In [None]:
#| test
# Test: ffr filters by prefix
import tempfile
from pathlib import Path

with tempfile.TemporaryDirectory() as tmpdir:
    test_dir = Path(tmpdir) / "test"
    test_dir.mkdir()
    (test_dir / "05_Keho.jpg").touch()
    (test_dir / "05_Kasvot.jpg").touch()
    (test_dir / "06_Ruoka.jpg").touch()
    (test_dir / "other.jpg").touch()

    # Test prefix filtering
    result = ffr([str(test_dir)], [".jpg"], prefix="05_")
    assert len(result) == 2, f"Expected 2 files with prefix '05_', got {len(result)}"
    assert any("05_Keho.jpg" in r for r in result), "05_Keho.jpg not found"
    assert any("05_Kasvot.jpg" in r for r in result), "05_Kasvot.jpg not found"
    assert not any("06_Ruoka.jpg" in r for r in result), "06_Ruoka.jpg should be filtered out"
    assert not any("other.jpg" in r for r in result), "other.jpg should be filtered out"

print("✓ ffr prefix filtering test passed")

✓ ffr prefix filtering test passed


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()