diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 5e64f44..b65fa66 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -3,8 +3,8 @@ version: 2 updates: - # Monitor Python dependencies via pip - - package-ecosystem: "pip" + # Monitor Go module dependencies + - package-ecosystem: "gomod" directory: "/" schedule: interval: "weekly" @@ -12,7 +12,7 @@ updates: time: "09:00" # Group all minor and patch updates together groups: - python-dependencies: + go-dependencies: patterns: - "*" update-types: @@ -23,7 +23,7 @@ updates: # Add labels for easy filtering labels: - "dependencies" - - "python" + - "go" # Target the main branch target-branch: "main" diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 62c3ff9..f623fe3 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -2,20 +2,22 @@ name: Continuous integration on: push: - branches: [ main, develop ] + branches: + - main + - develop paths: - - 'src/**' - - 'tests/**' - - 'pyproject.toml' - - 'uv.lock' + - '**/*.go' + - 'go.mod' + - 'go.sum' - '.github/workflows/CI.yml' pull_request: - branches: [ main, develop ] + branches: + - main + - develop paths: - - 'src/**' - - 'tests/**' - - 'pyproject.toml' - - 'uv.lock' + - '**/*.go' + - 'go.mod' + - 'go.sum' - '.github/workflows/CI.yml' jobs: @@ -24,70 +26,70 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.11'] + go-version: ['1.22'] steps: - uses: actions/checkout@v6 - - name: Install ffmpeg and tkinter (Ubuntu) - run: | - sudo apt-get update - sudo apt-get install -y ffmpeg python3-tk + - name: Install ffmpeg + run: sudo apt-get update && sudo apt-get install -y ffmpeg - - name: Set up UV - uses: astral-sh/setup-uv@v7 + - name: Set up Go + uses: actions/setup-go@v5 with: - python-version: ${{ matrix.python-version }} + go-version: ${{ matrix.go-version }} - - name: Install the project dependencies - run: uv sync --locked --all-extras --dev + - name: Download dependencies + run: go mod download - name: Run tests - run: | - uv run pytest + run: go test ./... coverage: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@v4 - name: Install ffmpeg run: sudo apt-get update && sudo apt-get install -y ffmpeg - - name: Set up UV - uses: astral-sh/setup-uv@v7 + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: '1.22' - - name: Install the project dependencies - run: uv sync --locked --all-extras --dev + - name: Download dependencies + run: go mod download - - name: Run tests - run: | - uv run pytest --cov=morphic --cov-report=xml + - name: Run tests with coverage + run: go test -coverprofile=coverage.out ./... + + - name: Convert coverage to lcov + run: go tool cover -html=coverage.out -o coverage.html - name: Upload coverage to Codecov - uses: codecov/codecov-action@v6 + uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} slug: exTerEX/morphic fail_ci_if_error: true - files: ./coverage.xml + files: ./coverage.out flags: unittests verbose: true - syntax-check: + lint: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6 - - - name: Set up UV - uses: astral-sh/setup-uv@v7 + - uses: actions/checkout@v4 - - name: Install the project dependencies - run: uv sync --locked --all-extras --dev + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: '1.22' - - name: Run Ruff format - run: uv run ruff format --check + - name: Run go vet + run: go vet ./... - - name: Run Ruff linter - run: uv run ruff check --output-format=github . \ No newline at end of file + - name: Build + run: go build ./... \ No newline at end of file diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index f84aba1..152ce67 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -2,16 +2,24 @@ name: "CodeQL Advanced" on: push: - branches: [ "main" ] + branches: + - main + - develop + - feature/** paths: - - 'src/**' - - 'tests/**' + - '**/*.go' + - 'go.mod' + - 'go.sum' - '.github/workflows/codeql.yml' pull_request: - branches: [ "main" ] + branches: + - main + - develop + - feature/** paths: - - 'src/**' - - 'tests/**' + - '**/*.go' + - 'go.mod' + - 'go.sum' - '.github/workflows/codeql.yml' schedule: - cron: '25 8 * * 5' @@ -40,10 +48,8 @@ jobs: fail-fast: false matrix: include: - - language: actions - build-mode: none - - language: python - build-mode: none + - language: go + build-mode: autobuild # CodeQL supports the following values keywords for 'language': 'actions', 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'rust', 'swift' # Use `c-cpp` to analyze code written in C, C++ or both # Use 'java-kotlin' to analyze code written in Java, Kotlin or both @@ -56,11 +62,10 @@ jobs: - name: Checkout repository uses: actions/checkout@v6 - # Add any setup steps before running the `github/codeql-action/init` action. - # This includes steps like installing compilers or runtimes (`actions/setup-node` - # or others). This is typically only required for manual builds. - # - name: Setup runtime (example) - # uses: actions/setup-example@v1 + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version-file: 'go.mod' # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index 80e7f01..74c65b8 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -5,26 +5,24 @@ on: branches: - main paths: - - 'src/**' - 'docs/**' - - 'pyproject.toml' + - 'README.md' - '.github/workflows/documentation.yml' pull_request: branches: - main paths: - - 'src/**' - 'docs/**' - - 'pyproject.toml' + - 'README.md' - '.github/workflows/documentation.yml' -# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +# Allow deployment to GitHub Pages permissions: contents: read pages: write id-token: write -# Allow only one concurrent deployment +# Only one concurrent deployment; skip queued runs but do not cancel in-progress concurrency: group: "pages" cancel-in-progress: false @@ -34,38 +32,42 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6 + - name: Checkout + uses: actions/checkout@v6 - - name: Install uv - uses: astral-sh/setup-uv@v7 + - name: Setup Ruby (for Jekyll) + uses: ruby/setup-ruby@v1 + with: + ruby-version: '3.3' + bundler-cache: true + working-directory: docs - - name: Install the project dependencies - run: uv sync --dev + - name: Setup Pages + id: pages + uses: actions/configure-pages@v5 - - name: Build documentation + - name: Build with Jekyll run: | cd docs - uv run sphinx-build -b html . _build/html - touch _build/html/.nojekyll - - - name: Setup Pages - if: github.event_name == 'push' && github.ref == 'refs/heads/main' - uses: actions/configure-pages@v6 + bundle exec jekyll build --baseurl "${{ steps.pages.outputs.base_path }}" + env: + JEKYLL_ENV: production - name: Upload artifact for GitHub Pages uses: actions/upload-pages-artifact@v5 if: github.event_name == 'push' && github.ref == 'refs/heads/main' with: - path: docs/_build/html/ + path: docs/_site/ - - name: Upload documentation artifact (PR) - uses: actions/upload-artifact@v7 + - name: Upload artifact (PR preview) if: github.event_name == 'pull_request' + uses: actions/upload-artifact@v7 with: - name: documentation - path: docs/_build/html/ + name: docs-preview + path: docs/_site/ + retention-days: 7 - deploy: + deploy: # Only deploy on push to main, not on PRs if: github.event_name == 'push' && github.ref == 'refs/heads/main' needs: build runs-on: ubuntu-latest @@ -76,4 +78,4 @@ jobs: steps: - name: Deploy to GitHub Pages id: deployment - uses: actions/deploy-pages@v5 \ No newline at end of file + uses: actions/deploy-pages@v4 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..2a04bb8 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,87 @@ +name: Release + +on: + push: + tags: + - 'v[0-9]+.[0-9]+.[0-9]+' + +permissions: + contents: write + +jobs: + release: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Extract version components + id: version + run: | + TAG="${GITHUB_REF_NAME}" + MAJOR=$(echo "$TAG" | cut -d. -f1 | tr -d 'v') + MINOR=$(echo "$TAG" | cut -d. -f2) + PATCH=$(echo "$TAG" | cut -d. -f3) + echo "tag=$TAG" >> "$GITHUB_OUTPUT" + echo "major=$MAJOR" >> "$GITHUB_OUTPUT" + if [[ "$MINOR" == "0" && "$PATCH" == "0" ]]; then + echo "is_major=true" >> "$GITHUB_OUTPUT" + else + echo "is_major=false" >> "$GITHUB_OUTPUT" + fi + + - name: Build commit changelog + run: | + PREV_TAG=$(git describe --tags --abbrev=0 HEAD^ 2>/dev/null || echo "") + if [[ -n "$PREV_TAG" ]]; then + git log "${PREV_TAG}..HEAD" --pretty=format:"- %s (%h)" --no-merges > /tmp/changelog.txt + else + git log --pretty=format:"- %s (%h)" --no-merges > /tmp/changelog.txt + fi + + - name: Generate AI release notes + run: | + TAG="${{ steps.version.outputs.tag }}" + CHANGELOG=$(cat /tmp/changelog.txt) + + # Build the request payload safely via jq so commit messages are properly escaped + jq -n \ + --arg model "gpt-4o-mini" \ + --arg system "You are a technical writer generating GitHub release notes for Morphic, a Go-based local media management tool with an image/video converter, perceptual duplicate finder, and a date/rename-based file organizer. Write concise, user-friendly release notes in markdown. Group changes under headers such as '## New Features', '## Bug Fixes', '## Improvements', and '## Other Changes'. Omit empty sections. Do not invent changes that are not present in the commit list." \ + --arg user "Generate release notes for ${TAG} based on these git commits:\n\n${CHANGELOG}" \ + '{model: $model, messages: [{role: "system", content: $system}, {role: "user", content: $user}], max_tokens: 1024}' \ + > /tmp/ai_request.json + + RESPONSE=$(curl --silent \ + -X POST "https://models.inference.ai.azure.com/chat/completions" \ + -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \ + -H "Content-Type: application/json" \ + --data-binary @/tmp/ai_request.json) || true + + NOTES=$(printf '%s' "$RESPONSE" | jq -r '.choices[0].message.content // empty' 2>/dev/null) + + if [[ -z "$NOTES" ]]; then + # Fallback: plain changelog when the AI call is unavailable + printf '## Changes in %s\n\n%s\n' "$TAG" "$CHANGELOG" > /tmp/release_notes.txt + else + printf '%s\n' "$NOTES" > /tmp/release_notes.txt + fi + + - name: Create GitHub Release + uses: softprops/action-gh-release@v2 + with: + name: "Morphic ${{ steps.version.outputs.tag }}" + body_path: /tmp/release_notes.txt + draft: false + prerelease: false + + - name: Create release branch for major version + if: steps.version.outputs.is_major == 'true' + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + BRANCH="release/v${{ steps.version.outputs.major }}" + git checkout -b "$BRANCH" + git push origin "$BRANCH" diff --git a/.gitignore b/.gitignore index 633961c..1a25e65 100644 --- a/.gitignore +++ b/.gitignore @@ -1,108 +1,155 @@ -# Python -__pycache__/ -*.py[codz] -*$py.class +### Go ### +# If you prefer the allow list template instead of the deny list, see community template: +# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore +# +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll *.so +*.dylib +bin/ -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py.cover -.hypothesis/ -.pytest_cache/ -cover/ - -# Translations -*.mo -*.pot - -# Flask -instance/ -.webassets-cache - -# Sphinx documentation -docs/_build/ -docs/_static/ -docs/_templates/ - -# Jupyter Notebook -.ipynb_checkpoints - -# Environments -.env -.envrc -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# UV -# uv.lock - -# Ruff -.ruff_cache/ - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre / pytype -.pyre/ -.pytype/ - -# IDEs -#.idea/ -# .vscode/ - -# OS -.DS_Store -Thumbs.db - -# Temporary files -*.tmp -*.temp -*.swp -*~ -.#* -temp/ - -# PyPI -.pypirc +# Test binary, built with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + +# Dependency directories (remove the comment below to include it) +vendor/ + +# Go workspace file +go.work + +### GoLand ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# AWS User-specific +.idea/**/aws.xml + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# SonarLint plugin +.idea/sonarlint/ + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +### GoLand Patch ### +# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 + +# *.iml +# modules.xml +.idea/misc.xml +# *.ipr + +# Sonarlint plugin +# https://plugins.jetbrains.com/plugin/7973-sonarlint +.idea/**/sonarlint/ + +# SonarQube Plugin +# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin +.idea/**/sonarIssues.xml + +# Markdown Navigator plugin +# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced +.idea/**/markdown-navigator.xml +.idea/**/markdown-navigator-enh.xml +.idea/**/markdown-navigator/ + +# Cache file creation bug +# See https://youtrack.jetbrains.com/issue/JBR-2257 +.idea/$CACHE_FILE$ + +# CodeStream plugin +# https://plugins.jetbrains.com/plugin/12206-codestream +.idea/codestream.xml + +# Azure Toolkit for IntelliJ plugin +# https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij +.idea/**/azureSettings.xml + +### VisualStudioCode ### +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +!.vscode/*.code-snippets + +# Local History for Visual Studio Code +.history/ + +# Built Visual Studio Code Extensions +*.vsix + +### VisualStudioCode Patch ### +# Ignore all local history of files +.history +.ionide diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100644 index bda641f..0000000 --- a/.idea/misc.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index 9b38853..050cbee 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,7 +1,3 @@ { - "python.testing.pytestArgs": [ - "tests" - ], - "python.testing.unittestEnabled": false, - "python.testing.pytestEnabled": true + "go.testExplorer.enable": true } \ No newline at end of file diff --git a/Makefile b/Makefile index b982023..568efbf 100644 --- a/Makefile +++ b/Makefile @@ -1,46 +1,17 @@ -.PHONY: test coverage lint format clean help install run docs +# Go targets +.PHONY: build test run vet tidy -help: - @echo "Available targets:" - @echo " install - Install dependencies with uv" - @echo " run - Start the web UI" - @echo " test - Run all tests" - @echo " coverage - Run tests with coverage report" - @echo " lint - Run linting checks" - @echo " format - Format code with ruff" - @echo " docs - Build Sphinx documentation" - @echo " clean - Remove build artifacts" +tidy: + go mod tidy -install: - uv sync --dev --all-extras +build: tidy + go build -o ./bin/morphic ./cmd/morphic -run: install - uv run morphic +test: + go test ./... -test: install - uv run pytest +vet: + go vet ./... -coverage: install - uv run pytest --cov=morphic --cov-report=html --cov-report=term-missing - -lint: install - uv run ruff check src/morphic/ - uv run pyright src/morphic/ - -format: install - uv run ruff format src/morphic/ tests/ - uv run ruff check --fix src/morphic/ tests/ - -docs: install - uv run sphinx-build -b html docs/ docs/_build/html - -clean: - rm -rf build/ - rm -rf dist/ - rm -rf *.egg-info/ - rm -rf .pytest_cache/ - rm -rf htmlcov/ - rm -rf .coverage - rm -rf .ruff_cache/ - rm -rf docs/_build/ - find . -type d -name __pycache__ -exec rm -rf {} + +run: build + ./bin/morphic diff --git a/README.md b/README.md index 00d2faa..80c0c53 100644 --- a/README.md +++ b/README.md @@ -1,114 +1,130 @@ # morphic -Unified media toolkit — format conversion, duplicate detection, EXIF inspection, batch resizing, and file organization in a single tabbed web UI. +A self-hosted media toolkit — format conversion, duplicate detection, and file organisation in a single dark-themed web UI, built entirely in Go. + +[![Build](https://github.com/exterex/morphic/actions/workflows/documentation.yml/badge.svg)](https://github.com/exterex/morphic/actions/workflows/documentation.yml) + +--- ## Features -### Converter -- **Folder scanning** — enter any path, toggle subfolder recursion, filter by images / videos / both -- **File summary** — colour-coded bar chart and badge counts per extension -- **Batch conversion** — select files, pick a target format, convert in one click -- **Live progress** — real-time progress bar with per-file success / error feedback -- **Image formats** — jpg, png, tif, bmp, webp, gif, ico, heic, heif, avif (via Pillow) -- **Video formats** — mp4, mov, avi, mkv, webm, flv, wmv, m4v, mpeg, 3gp, ts (via ffmpeg) - -### Dupfinder -- **Perceptual hashing** — find visually similar images and videos, not just exact matches -- **GPU acceleration** — CUDA (PyTorch/CuPy), ROCm, OpenCL, with CPU fallback -- **Video analysis** — extract and hash frames to detect duplicate video content -- **Batch processing** — process thousands of files with configurable thresholds -- **Space savings** — see how much disk space you'd recover by removing duplicates - -### Inspector -- **EXIF metadata** — read, edit, and strip EXIF tags from images (via piexif) -- **Integrity checking** — validate images (Pillow verify + load) and videos (ffprobe) -- **Background scanning** — scan entire folders with progress tracking -- **GPS decoding** — automatic DMS-to-decimal coordinate conversion - -### Resizer -- **Batch resize** — resize images in bulk with configurable dimensions -- **Four modes** — fit (contain), fill (cover + crop), stretch (exact), pad (letterbox) -- **Quality control** — configurable JPEG/WebP quality and background color for padding -- **Format override** — optionally convert output format during resize - -### Organizer -- **Date sorting** — sort files into date-based folder structures (EXIF → mtime fallback) -- **Batch renaming** — rename files with template tokens ({date}, {seq}, {original}, {ext}) -- **Plan & execute** — preview the plan before committing (move or copy) -- **Conflict detection** — automatically detects and skips naming conflicts - -### Shared -- **Native folder browser** — OS-native file dialog (tkinter, zenity, kdialog, etc.) -- **Thumbnail generation** — image and video thumbnails in the web UI -- **Dark theme** — clean, responsive interface +### 🔄 Converter -## Quick Start +Convert images and videos between popular formats directly in the browser. -```bash -# Install and launch -uv sync -morphic -``` +- Scan any folder (with optional subfolder recursion) filtered by images, videos, or both +- Per-extension summary with badge counts and filter pills +- Select individual files or batch-select by extension +- Pick a target format and convert in one click — live progress bar with per-file feedback +- **Images** — jpg, png, tif, bmp, webp, gif, ico, avif (via the `imaging` library) +- **Videos** — mp4, mov, avi, mkv, webm, flv, wmv, m4v, mpeg, 3gp, ts (via ffmpeg) +- Optionally delete originals after a successful conversion +- Stop an in-flight conversion at any time -The browser opens automatically at **http://127.0.0.1:8000**. +### 🔍 Dupfinder -```bash -# With options -morphic --port 9000 --folder ~/Pictures --no-browser -``` +Find visually similar media using perceptual hashing — catches re-encoded or resized duplicates that byte-comparison misses. -## Prerequisites +- Three hash types combined (pHash, aHash, dHash) for higher accuracy +- Configurable similarity threshold per type (images: 90 %, videos: 85 % default) +- Video analysis by comparing frame hashes across a clip +- Multi-worker concurrent hashing with a configurable worker count +- Grouped results with thumbnail previews, file sizes, and space-savings estimate +- One-click auto-select of duplicates (keeps the largest file in each group) +- Stop scan mid-flight and discard partial results + +### 📂 Organizer + +Restructure a media collection into clean date-based folders or rename files in bulk. -- **Python ≥ 3.10** -- **[uv](https://docs.astral.sh/uv/)** — fast Python package manager -- **ffmpeg** (optional) — required for video conversion +- **Date sort** — moves/copies files into `{year}/{month}/{day}` trees using EXIF date or mtime fallback +- **Rename** — template tokens: `{date}`, `{datetime}`, `{original}`, `{ext}`, `{seq}`, `{seq:N}` (zero-padded) +- Preview the full plan before executing — see every source → destination path +- Move or copy mode +- Conflict detection — skips renames that would overwrite another file + +--- + +## Quick Start ```bash -# Ubuntu / Debian -sudo apt install ffmpeg +git clone https://github.com/exterex/morphic +cd morphic +make build # compiles to ./bin/morphic +./bin/morphic # opens http://127.0.0.1:8000 in the browser ``` -### Optional Extras - ```bash -uv sync --extra gpu # NVIDIA CUDA via PyTorch + CuPy -uv sync --extra heif # HEIF/HEIC support via pillow-heif +# Custom options +./bin/morphic --port 9000 --folder ~/Pictures --no-browser ``` -> GPU environment requirements -> - Use Python 3.10-3.13 for the torchvision/CuPy stack (PyTorch 1.13.x + CuPy 13.x). -> - Set up a dedicated venv with `python3.11 -m venv .venv` and activate it before running `uv sync --extra gpu`. -> - On Python 3.14 the recommended GPU extras are skipped because PyTorch/CuPy wheels are not yet published for that interpreter in this branch. +--- + +## Prerequisites + +| Dependency | Required for | Install | +|---|---|---| +| Go 1.22+ | Building | https://go.dev/dl/ | +| ffmpeg | Video conversion & video duplicate detection | `sudo apt install ffmpeg` | + +ffmpeg is optional — the app starts without it and greys out video features automatically. -> GPU note for GTX 10-series (sm_61): -> - This repository uses `torch` from the optional `gpu` group. -> - For NVIDIA GeForce GTX 1070, install torch 1.13.x (CUDA 11.6/11.7) in a Python 3.11 environment -> (`torch>=1.13.1,<2.0.0`), because newer PyTorch binary builds drop support for sm_61. -> - Example: -> `pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 -f https://download.pytorch.org/whl/torch_stable.html` -> - Then `uv sync --extra gpu` can resolve `cupy` and `pyopencl` components normally. +--- ## Development ```bash -make install # Install all dependencies -make test # Run tests -make coverage # Run with coverage report (94%+) -make lint # Lint (ruff + pyright) -make format # Auto-format -make docs # Build Sphinx documentation -make clean # Remove build artifacts +make tidy # go mod tidy +make build # build binary → ./bin/morphic +make test # go test ./... +make vet # go vet ./... +make run # build + start dev server ``` -## Documentation +--- -Build and view the full documentation: +## Architecture -```bash -make docs -open docs/_build/html/index.html ``` +cmd/morphic/ CLI entry-point (flags, server startup) +internal/ + converter/ Folder scanner, image/video conversion logic + dupfinder/ Perceptual hashing, duplicate grouping, job runner + organizer/ Date sorter, batch renamer, plan executor + shared/ Job store, file browser, thumbnail generator, constants +web/ + server.go Gin router, embedded static assets + routes_*.go HTTP handlers per module + templates/ index.html (single-page UI) + static/ app.js, style.css +``` + +--- + +## API Overview + +All endpoints are under `/api/`. + +| Method | Path | Description | +|--------|------|-------------| +| `POST` | `/api/converter/scan` | Scan a folder for convertible media | +| `POST` | `/api/converter/convert` | Start a batch conversion job | +| `GET` | `/api/converter/progress/:id/poll` | Poll conversion progress | +| `POST` | `/api/converter/progress/:id/cancel` | Cancel a running conversion | +| `POST` | `/api/dupfinder/scan` | Start a duplicate scan job | +| `GET` | `/api/dupfinder/scan/:id/status` | Poll scan status | +| `GET` | `/api/dupfinder/scan/:id/results` | Fetch scan results | +| `POST` | `/api/dupfinder/scan/:id/cancel` | Cancel a running scan | +| `POST` | `/api/organizer/plan` | Create an organisation plan | +| `POST` | `/api/organizer/execute/:id` | Execute a previewed plan | +| `POST` | `/api/organizer/cancel/:id` | Cancel a running plan job | +| `GET` | `/api/browse` | List directories (in-page browser) | +| `GET` | `/api/thumbnail` | Generate a JPEG thumbnail | +| `GET` | `/api/system_info` | Report Go/ffmpeg version info | + +--- ## License -MIT +Released under the [MIT License](LICENSE). diff --git a/assets/test/anim.gif b/assets/test/anim.gif new file mode 100644 index 0000000..53b23de Binary files /dev/null and b/assets/test/anim.gif differ diff --git a/assets/test/checkerboard.png b/assets/test/checkerboard.png new file mode 100644 index 0000000..5713c47 Binary files /dev/null and b/assets/test/checkerboard.png differ diff --git a/assets/test/gradient.png b/assets/test/gradient.png new file mode 100644 index 0000000..1dd1f78 Binary files /dev/null and b/assets/test/gradient.png differ diff --git a/assets/test/grayscale.png b/assets/test/grayscale.png new file mode 100644 index 0000000..e583ac4 Binary files /dev/null and b/assets/test/grayscale.png differ diff --git a/assets/test/large_1000x1000.png b/assets/test/large_1000x1000.png new file mode 100644 index 0000000..305102b Binary files /dev/null and b/assets/test/large_1000x1000.png differ diff --git a/assets/test/panorama.jpg b/assets/test/panorama.jpg new file mode 100644 index 0000000..30f59eb Binary files /dev/null and b/assets/test/panorama.jpg differ diff --git a/assets/test/portrait.jpg b/assets/test/portrait.jpg new file mode 100644 index 0000000..1f29157 Binary files /dev/null and b/assets/test/portrait.jpg differ diff --git a/assets/test/sample1.avif b/assets/test/sample1.avif new file mode 100644 index 0000000..2ee09f4 Binary files /dev/null and b/assets/test/sample1.avif differ diff --git a/assets/test/solid_blue.jpg b/assets/test/solid_blue.jpg new file mode 100644 index 0000000..0a3ddb8 Binary files /dev/null and b/assets/test/solid_blue.jpg differ diff --git a/assets/test/solid_green.jpg b/assets/test/solid_green.jpg new file mode 100644 index 0000000..475913f Binary files /dev/null and b/assets/test/solid_green.jpg differ diff --git a/assets/test/solid_red.jpg b/assets/test/solid_red.jpg new file mode 100644 index 0000000..5b065c3 Binary files /dev/null and b/assets/test/solid_red.jpg differ diff --git a/assets/test/solid_red_copy.jpg b/assets/test/solid_red_copy.jpg new file mode 100644 index 0000000..5b065c3 Binary files /dev/null and b/assets/test/solid_red_copy.jpg differ diff --git a/assets/test/tiny_1x1.png b/assets/test/tiny_1x1.png new file mode 100644 index 0000000..a8e858d Binary files /dev/null and b/assets/test/tiny_1x1.png differ diff --git a/cmd/morphic/main.go b/cmd/morphic/main.go new file mode 100644 index 0000000..58b3294 --- /dev/null +++ b/cmd/morphic/main.go @@ -0,0 +1,48 @@ +package main + +import ( + "flag" + "fmt" + "log" + "os/exec" + "runtime" + + "github.com/exterex/morphic/internal/shared" + "github.com/exterex/morphic/web" +) + +func main() { + host := flag.String("host", "127.0.0.1", "Host to bind to") + port := flag.Int("port", 8000, "Port to listen on") + noBrowser := flag.Bool("no-browser", false, "Don't open browser automatically") + flag.Parse() + + addr := fmt.Sprintf("%s:%d", *host, *port) + url := fmt.Sprintf("http://%s", addr) + + if !*noBrowser { + go openBrowser(url) + } + + log.Printf("Morphic %s starting on %s", shared.Version, url) + + router := web.SetupRouter() + if err := router.Run(addr); err != nil { + log.Fatalf("Failed to start server: %v", err) + } +} + +func openBrowser(url string) { + var cmd *exec.Cmd + switch runtime.GOOS { + case "linux": + cmd = exec.Command("xdg-open", url) + case "darwin": + cmd = exec.Command("open", url) + case "windows": + cmd = exec.Command("cmd", "/c", "start", url) + default: + return + } + _ = cmd.Start() +} diff --git a/docs/Gemfile b/docs/Gemfile new file mode 100644 index 0000000..bc7584d --- /dev/null +++ b/docs/Gemfile @@ -0,0 +1,4 @@ +source "https://rubygems.org" + +gem "github-pages", group: :jekyll_plugins +gem "jekyll-theme-cayman" diff --git a/docs/_config.yml b/docs/_config.yml new file mode 100644 index 0000000..98448de --- /dev/null +++ b/docs/_config.yml @@ -0,0 +1,7 @@ +title: morphic +description: A self-hosted media toolkit — format conversion, duplicate detection, and file organisation in a single web UI. +theme: jekyll-theme-cayman +show_downloads: false +repository: exterex/morphic +github: + repository_url: https://github.com/exterex/morphic diff --git a/docs/api/converter.rst b/docs/api/converter.rst deleted file mode 100644 index 66b1337..0000000 --- a/docs/api/converter.rst +++ /dev/null @@ -1,25 +0,0 @@ -morphic.converter -================= - -File format conversion engine for images (Pillow) and videos (ffmpeg). - -.. module:: morphic.converter - -Constants ---------- - -.. automodule:: morphic.converter.constants - :members: - :undoc-members: - -Converter ---------- - -.. automodule:: morphic.converter.converter - :members: - -Scanner -------- - -.. automodule:: morphic.converter.scanner - :members: diff --git a/docs/api/dupfinder.rst b/docs/api/dupfinder.rst deleted file mode 100644 index 2e255c4..0000000 --- a/docs/api/dupfinder.rst +++ /dev/null @@ -1,40 +0,0 @@ -morphic.dupfinder -================= - -Duplicate image and video detection via perceptual hashing with optional -GPU acceleration. - -.. module:: morphic.dupfinder - -Accelerator ------------ - -GPU/CPU backend selection and batch operations. - -.. automodule:: morphic.dupfinder.accelerator - :members: - :undoc-members: - -Images ------- - -Image duplicate detection. - -.. automodule:: morphic.dupfinder.images - :members: - -Videos ------- - -Video duplicate detection. - -.. automodule:: morphic.dupfinder.videos - :members: - -Scanner -------- - -Background scan job management (used by the web API). - -.. automodule:: morphic.dupfinder.scanner - :members: diff --git a/docs/api/frontend.rst b/docs/api/frontend.rst deleted file mode 100644 index 67393b7..0000000 --- a/docs/api/frontend.rst +++ /dev/null @@ -1,30 +0,0 @@ -morphic.frontend -================ - -Flask web application providing a unified tabbed interface. - -.. module:: morphic.frontend - -Application Factory -------------------- - -.. automodule:: morphic.frontend.app - :members: - -Shared Routes -------------- - -.. automodule:: morphic.frontend.routes_shared - :members: - -Converter Routes ----------------- - -.. automodule:: morphic.frontend.routes_converter - :members: - -Dupfinder Routes ----------------- - -.. automodule:: morphic.frontend.routes_dupfinder - :members: diff --git a/docs/api/shared.rst b/docs/api/shared.rst deleted file mode 100644 index 5bbc682..0000000 --- a/docs/api/shared.rst +++ /dev/null @@ -1,31 +0,0 @@ -morphic.shared -============== - -Constants, utilities, and helpers shared across all morphic modules. - -.. module:: morphic.shared - -Constants ---------- - -.. automodule:: morphic.shared.constants - :members: - :undoc-members: - -Utilities ---------- - -.. automodule:: morphic.shared.utils - :members: - -File Browser ------------- - -.. automodule:: morphic.shared.file_browser - :members: - -Thumbnails ----------- - -.. automodule:: morphic.shared.thumbnails - :members: diff --git a/docs/changelog.rst b/docs/changelog.rst deleted file mode 100644 index 3a9e79c..0000000 --- a/docs/changelog.rst +++ /dev/null @@ -1,23 +0,0 @@ -Changelog -========= - -1.0.0 (2026-03-06) -------------------- - -Initial public release. - -- Unified media toolkit: format conversion and duplicate detection in a - single tabbed web UI -- **Converter** — scan folders and batch-convert image/video files; - 22 image formats (Pillow) and 21 video formats (ffmpeg) -- **Dupfinder** — find visually similar images and videos using - perceptual hashing; GPU acceleration via CUDA, ROCm, and OpenCL -- **Shared UI** — tabbed Flask web interface with in-page folder - browser, thumbnail preview, and dark theme -- Native folder-picker dialogs (tkinter / zenity / kdialog / osascript / - PowerShell) with in-page fallback -- GPU-accelerated batch processing (CUDA/PyTorch, CuPy, OpenCL) -- ``morphic`` CLI with ``--host``, ``--port``, ``--folder``, - ``--debug``, and ``--no-browser`` options -- Sphinx documentation published to GitHub Pages -- Comprehensive test suite with 94 %+ code coverage diff --git a/docs/cli.rst b/docs/cli.rst deleted file mode 100644 index cc77af1..0000000 --- a/docs/cli.rst +++ /dev/null @@ -1,56 +0,0 @@ -Command-Line Interface -====================== - -morphic -------- - -.. code-block:: text - - usage: morphic [-h] [--host HOST] [--port PORT] [--folder FOLDER] - [--debug] [--no-browser] - - Morphic — media format converter & duplicate finder - -Options -^^^^^^^ - -``--host HOST`` - Host to bind to. Default: ``127.0.0.1``. - -``--port PORT`` - Port to listen on. Default: ``8000``. - -``--folder FOLDER`` - Pre-populate the folder path in the UI. - -``--debug`` - Enable Flask debug mode with auto-reload. - -``--no-browser`` - Don't auto-open the browser on start. - -Examples -^^^^^^^^ - -.. code-block:: bash - - # Default: open browser on http://127.0.0.1:8000 - morphic - - # Custom port, no auto-open - morphic --port 9000 --no-browser - - # Pre-select a folder - morphic --folder ~/Pictures - - # Debug mode - morphic --debug - -Running as a module -^^^^^^^^^^^^^^^^^^^ - -You can also run morphic as a Python module: - -.. code-block:: bash - - python -m morphic.frontend diff --git a/docs/conf.py b/docs/conf.py deleted file mode 100644 index 2f91912..0000000 --- a/docs/conf.py +++ /dev/null @@ -1,52 +0,0 @@ -# Sphinx configuration for morphic -# https://www.sphinx-doc.org/en/master/usage/configuration.html - -import importlib.metadata - -# -- Project information ----------------------------------------------------- - -project = "morphic" -author = "Andreas Sagen" -copyright = "2026, Andreas Sagen" # noqa: A001 -release = importlib.metadata.version("morphic") -version = ".".join(release.split(".")[:2]) - -# -- General configuration --------------------------------------------------- - -extensions = [ - "sphinx.ext.autodoc", - "sphinx.ext.napoleon", - "sphinx.ext.viewcode", - "sphinx.ext.intersphinx", - "sphinx_copybutton", -] - -templates_path = ["_templates"] -exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] - -# -- Options for HTML output ------------------------------------------------- - -html_theme = "furo" -html_static_path = ["_static"] -html_title = f"morphic {release}" - -# -- Extension configuration ------------------------------------------------- - -autodoc_member_order = "bysource" -autodoc_typehints = "description" -autodoc_default_options = { - "members": True, - "undoc-members": True, - "show-inheritance": True, -} - -napoleon_google_docstring = True -napoleon_numpy_docstring = True -napoleon_include_init_with_doc = True - -intersphinx_mapping = { - "python": ("https://docs.python.org/3", None), - "flask": ("https://flask.palletsprojects.com/en/stable/", None), - "numpy": ("https://numpy.org/doc/stable/", None), - "pillow": ("https://pillow.readthedocs.io/en/stable/", None), -} diff --git a/docs/configuration.rst b/docs/configuration.rst deleted file mode 100644 index 9d97509..0000000 --- a/docs/configuration.rst +++ /dev/null @@ -1,63 +0,0 @@ -Configuration -============= - -morphic is configured through CLI arguments and module-level constants. -There is no configuration file. - -Shared Constants ----------------- - -The shared module defines default thresholds and extensions used by both -the converter and dupfinder modules. - -.. list-table:: Key Constants - :header-rows: 1 - :widths: 35 15 50 - - * - Constant - - Value - - Description - * - ``IMAGE_EXTENSIONS`` - - 22 formats - - ``.jpg``, ``.png``, ``.webp``, ``.tif``, ``.bmp``, ``.heif``, etc. - * - ``VIDEO_EXTENSIONS`` - - 21 formats - - ``.mp4``, ``.avi``, ``.mkv``, ``.mov``, ``.webm``, etc. - * - ``EXCLUDED_FOLDERS`` - - 25 names - - ``node_modules``, ``.git``, ``__pycache__``, etc. - * - ``DEFAULT_IMAGE_THRESHOLD`` - - 0.90 - - Similarity threshold for image duplicate detection - * - ``DEFAULT_VIDEO_THRESHOLD`` - - 0.85 - - Similarity threshold for video duplicate detection - * - ``DEFAULT_HASH_SIZE`` - - 16 - - Hash size for perceptual hashing - * - ``DEFAULT_NUM_FRAMES`` - - 10 - - Number of frames extracted from each video - * - ``DEFAULT_NUM_WORKERS`` - - 4 - - Default worker thread count - -GPU Acceleration ----------------- - -The dupfinder module automatically detects available GPU backends in -this priority order: - -1. **CUDA** (via PyTorch) — NVIDIA GPUs -2. **CUDA** (via CuPy) — NVIDIA GPUs (fallback) -3. **ROCm** (via PyTorch) — AMD GPUs -4. **OpenCL** (via PyOpenCL) — Any OpenCL-capable GPU -5. **CPU** — Multiprocessing fallback (always available) - -Install optional extras to enable GPU support: - -.. code-block:: bash - - uv sync --extra cuda # NVIDIA - uv sync --extra rocm # AMD - uv sync --extra opencl # OpenCL diff --git a/docs/development.rst b/docs/development.rst deleted file mode 100644 index 9978239..0000000 --- a/docs/development.rst +++ /dev/null @@ -1,106 +0,0 @@ -Development -=========== - -Setting up ----------- - -Clone the repository and install dependencies: - -.. code-block:: bash - - git clone https://github.com/andreassagen/morphic.git - cd morphic - uv sync --dev - -Project Layout --------------- - -.. code-block:: text - - morphic/ - ├── src/morphic/ - │ ├── __init__.py # Package root, version - │ ├── shared/ # Constants, utils, thumbnails, file browser - │ │ ├── constants.py - │ │ ├── utils.py - │ │ ├── file_browser.py - │ │ └── thumbnails.py - │ ├── converter/ # Format conversion engine - │ │ ├── constants.py - │ │ ├── converter.py - │ │ └── scanner.py - │ ├── dupfinder/ # Duplicate detection - │ │ ├── accelerator.py - │ │ ├── images.py - │ │ ├── videos.py - │ │ └── scanner.py - │ └── frontend/ # Flask web UI - │ ├── app.py - │ ├── routes_shared.py - │ ├── routes_converter.py - │ ├── routes_dupfinder.py - │ ├── templates/ - │ └── static/ - ├── tests/ # pytest test suite (395+ tests) - ├── docs/ # Sphinx documentation - ├── pyproject.toml - └── Makefile - -Running Tests -------------- - -.. code-block:: bash - - # Run all tests - make test - - # Run with coverage report - make coverage - - # Run a specific test file - uv run pytest tests/test_shared_utils.py -v - -Linting & Formatting ---------------------- - -.. code-block:: bash - - # Lint with ruff + pyright - make lint - - # Auto-format - make format - -Building Documentation ----------------------- - -.. code-block:: bash - - make docs - # Output is in docs/_build/html/ - -Architecture ------------- - -morphic follows a modular architecture with three main modules that share -a common set of constants and utilities via ``morphic.shared``: - -**morphic.shared** - Common constants (file extensions, thresholds), utility functions - (file scanning, formatting), native folder browser dialog, and - thumbnail generation. - -**morphic.converter** - File format conversion engine. Uses Pillow for images and ffmpeg - (subprocess) for videos. The scanner discovers files and determines - compatible target formats. - -**morphic.dupfinder** - Duplicate detection via perceptual hashing. Images are hashed with - ``imagehash.phash``. Videos have frames extracted and hashed individually. - A GPU accelerator provides optional CUDA/ROCm/OpenCL acceleration for - batch operations. - -**morphic.frontend** - Flask web application that provides a unified tabbed interface for both - modules. Uses blueprints for route organization. diff --git a/docs/getting_started.rst b/docs/getting_started.rst deleted file mode 100644 index 503ad42..0000000 --- a/docs/getting_started.rst +++ /dev/null @@ -1,62 +0,0 @@ -Getting Started -=============== - -Installation ------------- - -Install morphic using `uv `_: - -.. code-block:: bash - - uv sync --dev - -Or with pip: - -.. code-block:: bash - - pip install -e . - -Quick Start ------------ - -Launch the web interface: - -.. code-block:: bash - - morphic - -This opens a tabbed web UI on ``http://127.0.0.1:8000`` with two modules: - -**Converter tab** - Scan folders for media files and batch-convert between formats. - Supports 22 image formats and 21 video formats. - -**Dupfinder tab** - Find duplicate images and videos using perceptual hashing. - Supports GPU acceleration (CUDA, ROCm, OpenCL) when available. - -Pre-populate a folder: - -.. code-block:: bash - - morphic --folder /path/to/media - -Requirements ------------- - -- Python 3.10+ -- ``ffmpeg`` on ``PATH`` (for video conversion) -- Optional: NVIDIA/AMD GPU for accelerated duplicate detection - -Optional GPU extras: - -.. code-block:: bash - - # NVIDIA CUDA - uv sync --extra cuda - - # AMD ROCm - uv sync --extra rocm - - # OpenCL - uv sync --extra opencl diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..41c8882 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,160 @@ +--- +layout: default +title: morphic — Self-hosted Media Toolkit +--- + +# morphic + +A self-hosted media toolkit written in Go. Convert formats, find duplicates, and organise your media library — all from a single dark-themed web UI with no cloud dependencies. + +--- + +## Getting Started + +```bash +git clone https://github.com/exterex/morphic +cd morphic +make build +./bin/morphic # opens http://127.0.0.1:8000 +``` + +**Prerequisites** + +| Dependency | Required for | +|---|---| +| Go 1.22+ | Building | +| ffmpeg *(optional)* | Video conversion, video duplicate detection | + +Install ffmpeg on Ubuntu/Debian: + +```bash +sudo apt install ffmpeg +``` + +--- + +## Modules + +### 🔄 Converter + +Scan a folder and convert images or videos to any supported format. + +- **Images** — jpg, png, tif, bmp, webp, gif, ico, avif +- **Videos** — mp4, mov, avi, mkv, webm, flv, wmv, m4v, mpeg, 3gp, ts +- Filter by type, click a format pill, batch-select files, pick a target +- Live progress bar, per-file success/error feedback +- Stop an in-flight conversion at any time + +### 🔍 Dupfinder + +Find visually similar media using perceptual hashing — catches re-encoded and resized duplicates that byte-comparison misses. + +- Combines pHash, aHash, and dHash +- Configurable similarity thresholds +- Video support: hashes sampled frames across each clip +- Grouped results with thumbnail preview and space-savings estimate +- Auto-select duplicates (keeps the largest file per group) +- Cancel scan mid-flight + +### 📂 Organizer + +Restructure and rename a media collection. + +**Date sort** — moves or copies files into `{year}/{month}/{day}` trees: +- Reads EXIF `DateTimeOriginal` first; falls back to file modification time +- Preview the full plan before executing + +**Rename** — template tokens available: + +| Token | Output | +|---|---| +| `{date}` | `YYYYMMDD` | +| `{datetime}` | `YYYYMMDD_HHMMSS` | +| `{original}` | Original filename without extension | +| `{ext}` | File extension without dot | +| `{seq}` | Sequential integer | +| `{seq:N}` | Sequential integer zero-padded to N digits | + +--- + +## API Reference + +All endpoints are under `/api/`. + +### Converter + +| Method | Path | Description | +|--------|------|-------------| +| `POST` | `/api/converter/scan` | Scan folder for media | +| `GET` | `/api/converter/formats` | List supported conversion formats | +| `POST` | `/api/converter/convert` | Start batch conversion job | +| `GET` | `/api/converter/progress/:id` | Fetch job progress page | +| `GET` | `/api/converter/progress/:id/poll` | Poll job progress (JSON) | +| `POST` | `/api/converter/progress/:id/cancel` | Cancel running job | +| `POST` | `/api/converter/delete` | Delete listed files | + +### Dupfinder + +| Method | Path | Description | +|--------|------|-------------| +| `POST` | `/api/dupfinder/scan` | Start duplicate scan | +| `GET` | `/api/dupfinder/scan/:id/status` | Poll scan status | +| `GET` | `/api/dupfinder/scan/:id/results` | Fetch grouped results | +| `POST` | `/api/dupfinder/scan/:id/cancel` | Cancel running scan | +| `POST` | `/api/dupfinder/delete` | Delete selected files | + +### Organizer + +| Method | Path | Description | +|--------|------|-------------| +| `POST` | `/api/organizer/plan` | Create organisation plan | +| `GET` | `/api/organizer/plan/:id` | Get plan status & preview | +| `POST` | `/api/organizer/execute/:id` | Execute a previewed plan | +| `GET` | `/api/organizer/execute/:id/status` | Poll execution status | +| `POST` | `/api/organizer/cancel/:id` | Cancel plan/execute job | + +### Shared + +| Method | Path | Description | +|--------|------|-------------| +| `GET` | `/api/browse` | List directories | +| `POST` | `/api/browse/native` | Open OS native folder picker | +| `GET` | `/api/thumbnail` | Generate JPEG thumbnail by path | +| `GET` | `/api/system_info` | Go/ffmpeg build info | +| `GET` | `/api/media` | Serve a media file for preview | + +--- + +## Architecture + +``` +cmd/morphic/ CLI entry-point and server startup +internal/ + converter/ Folder scanner, image/video conversion + dupfinder/ Perceptual hashing, grouping, async job runner + organizer/ Date sorter, batch renamer, plan executor + shared/ Generic job store, file utilities, thumbnails +web/ + server.go Gin router with embedded static assets + routes_*.go HTTP handlers per module + templates/ index.html — single-page UI + static/ app.js, style.css +``` + +--- + +## Development + +```bash +make tidy # tidy go.mod / go.sum +make build # compile → ./bin/morphic +make test # run unit tests +make vet # go vet +make run # build + start server +``` + +--- + +## License + +[MIT License](https://github.com/exterex/morphic/blob/main/LICENSE) diff --git a/docs/index.rst b/docs/index.rst deleted file mode 100644 index 751ae0c..0000000 --- a/docs/index.rst +++ /dev/null @@ -1,44 +0,0 @@ -morphic -======= - -**Unified media toolkit** — format conversion and duplicate detection in a -single tabbed web interface. - -.. image:: https://img.shields.io/badge/python-3.10+-blue.svg - :alt: Python 3.10+ - -.. image:: https://img.shields.io/badge/license-MIT-green.svg - :alt: MIT License - ---------- - -.. toctree:: - :maxdepth: 2 - :caption: User Guide - - getting_started - cli - configuration - -.. toctree:: - :maxdepth: 2 - :caption: API Reference - - api/shared - api/converter - api/dupfinder - api/frontend - -.. toctree:: - :maxdepth: 1 - :caption: Development - - development - changelog - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..1eff467 --- /dev/null +++ b/go.mod @@ -0,0 +1,47 @@ +module github.com/exterex/morphic + +go 1.22 + +require ( + github.com/corona10/goimagehash v1.1.0 + github.com/disintegration/imaging v1.6.2 + github.com/dsoprea/go-exif/v3 v3.0.1 + github.com/gin-gonic/gin v1.10.0 + github.com/google/uuid v1.6.0 +) + +require ( + github.com/bytedance/sonic v1.11.6 // indirect + github.com/bytedance/sonic/loader v0.1.1 // indirect + github.com/cloudwego/base64x v0.1.4 // indirect + github.com/cloudwego/iasm v0.2.0 // indirect + github.com/dsoprea/go-logging v0.0.0-20200710184922-b02d349568dd // indirect + github.com/dsoprea/go-utility/v2 v2.0.0-20221003172846-a3e1774ef349 // indirect + github.com/gabriel-vasile/mimetype v1.4.3 // indirect + github.com/gin-contrib/sse v0.1.0 // indirect + github.com/go-errors/errors v1.4.2 // indirect + github.com/go-playground/locales v0.14.1 // indirect + github.com/go-playground/universal-translator v0.18.1 // indirect + github.com/go-playground/validator/v10 v10.20.0 // indirect + github.com/goccy/go-json v0.10.2 // indirect + github.com/golang/geo v0.0.0-20210211234256-740aa86cb551 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/klauspost/cpuid/v2 v2.2.7 // indirect + github.com/leodido/go-urn v1.4.0 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 // indirect + github.com/pelletier/go-toml/v2 v2.2.2 // indirect + github.com/twitchyliquid64/golang-asm v0.15.1 // indirect + github.com/ugorji/go/codec v1.2.12 // indirect + golang.org/x/arch v0.8.0 // indirect + golang.org/x/crypto v0.23.0 // indirect + golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8 // indirect + golang.org/x/net v0.25.0 // indirect + golang.org/x/sys v0.20.0 // indirect + golang.org/x/text v0.15.0 // indirect + google.golang.org/protobuf v1.34.1 // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..d33a3d9 --- /dev/null +++ b/go.sum @@ -0,0 +1,147 @@ +github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0= +github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4= +github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM= +github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU= +github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y= +github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w= +github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg= +github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY= +github.com/corona10/goimagehash v1.1.0 h1:teNMX/1e+Wn/AYSbLHX8mj+mF9r60R1kBeqE9MkoYwI= +github.com/corona10/goimagehash v1.1.0/go.mod h1:VkvE0mLn84L4aF8vCb6mafVajEb6QYMHl2ZJLn0mOGI= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/disintegration/imaging v1.6.2 h1:w1LecBlG2Lnp8B3jk5zSuNqd7b4DXhcjwek1ei82L+c= +github.com/disintegration/imaging v1.6.2/go.mod h1:44/5580QXChDfwIclfc/PCwrr44amcmDAg8hxG0Ewe4= +github.com/dsoprea/go-exif/v2 v2.0.0-20200321225314-640175a69fe4/go.mod h1:Lm2lMM2zx8p4a34ZemkaUV95AnMl4ZvLbCUbwOvLC2E= +github.com/dsoprea/go-exif/v3 v3.0.0-20200717053412-08f1b6708903/go.mod h1:0nsO1ce0mh5czxGeLo4+OCZ/C6Eo6ZlMWsz7rH/Gxv8= +github.com/dsoprea/go-exif/v3 v3.0.0-20210625224831-a6301f85c82b/go.mod h1:cg5SNYKHMmzxsr9X6ZeLh/nfBRHHp5PngtEPcujONtk= +github.com/dsoprea/go-exif/v3 v3.0.0-20221003160559-cf5cd88aa559/go.mod h1:rW6DMEv25U9zCtE5ukC7ttBRllXj7g7TAHl7tQrT5No= +github.com/dsoprea/go-exif/v3 v3.0.0-20221003171958-de6cb6e380a8/go.mod h1:akyZEJZ/k5bmbC9gA612ZLQkcED8enS9vuTiuAkENr0= +github.com/dsoprea/go-exif/v3 v3.0.1 h1:/IE4iW7gvY7BablV1XY0unqhMv26EYpOquVMwoBo/wc= +github.com/dsoprea/go-exif/v3 v3.0.1/go.mod h1:10HkA1Wz3h398cDP66L+Is9kKDmlqlIJGPv8pk4EWvc= +github.com/dsoprea/go-logging v0.0.0-20190624164917-c4f10aab7696/go.mod h1:Nm/x2ZUNRW6Fe5C3LxdY1PyZY5wmDv/s5dkPJ/VB3iA= +github.com/dsoprea/go-logging v0.0.0-20200517223158-a10564966e9d/go.mod h1:7I+3Pe2o/YSU88W0hWlm9S22W7XI1JFNJ86U0zPKMf8= +github.com/dsoprea/go-logging v0.0.0-20200710184922-b02d349568dd h1:l+vLbuxptsC6VQyQsfD7NnEC8BZuFpz45PgY+pH8YTg= +github.com/dsoprea/go-logging v0.0.0-20200710184922-b02d349568dd/go.mod h1:7I+3Pe2o/YSU88W0hWlm9S22W7XI1JFNJ86U0zPKMf8= +github.com/dsoprea/go-utility v0.0.0-20200711062821-fab8125e9bdf/go.mod h1:95+K3z2L0mqsVYd6yveIv1lmtT3tcQQ3dVakPySffW8= +github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e/go.mod h1:uAzdkPTub5Y9yQwXe8W4m2XuP0tK4a9Q/dantD0+uaU= +github.com/dsoprea/go-utility/v2 v2.0.0-20221003142440-7a1927d49d9d/go.mod h1:LVjRU0RNUuMDqkPTxcALio0LWPFPXxxFCvVGVAwEpFc= +github.com/dsoprea/go-utility/v2 v2.0.0-20221003160719-7bc88537c05e/go.mod h1:VZ7cB0pTjm1ADBWhJUOHESu4ZYy9JN+ZPqjfiW09EPU= +github.com/dsoprea/go-utility/v2 v2.0.0-20221003172846-a3e1774ef349 h1:DilThiXje0z+3UQ5YjYiSRRzVdtamFpvBQXKwMglWqw= +github.com/dsoprea/go-utility/v2 v2.0.0-20221003172846-a3e1774ef349/go.mod h1:4GC5sXji84i/p+irqghpPFZBF8tRN/Q7+700G0/DLe8= +github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0= +github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk= +github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE= +github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= +github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU= +github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y= +github.com/go-errors/errors v1.0.1/go.mod h1:f4zRHt4oKfwPJE5k8C9vpYG+aDHdBFUsgrm6/TyX73Q= +github.com/go-errors/errors v1.0.2/go.mod h1:psDX2osz5VnTOnFWbDeWwS7yejl+uV3FEWEp4lssFEs= +github.com/go-errors/errors v1.1.1/go.mod h1:psDX2osz5VnTOnFWbDeWwS7yejl+uV3FEWEp4lssFEs= +github.com/go-errors/errors v1.4.2 h1:J6MZopCL4uSllY1OfXM374weqZFFItUbrImctkmUxIA= +github.com/go-errors/errors v1.4.2/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3BopGUQ5a5Og= +github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= +github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= +github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= +github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= +github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= +github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= +github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBExVwjEviJTixqxL8= +github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM= +github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= +github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= +github.com/golang/geo v0.0.0-20190916061304-5b978397cfec/go.mod h1:QZ0nwyI2jOfgRAoBvP+ab5aRr7c9x7lhGEJrKvBwjWI= +github.com/golang/geo v0.0.0-20200319012246-673a6f80352d/go.mod h1:QZ0nwyI2jOfgRAoBvP+ab5aRr7c9x7lhGEJrKvBwjWI= +github.com/golang/geo v0.0.0-20210211234256-740aa86cb551 h1:gtexQ/VGyN+VVFRXSFiguSNcXmS6rkKT+X7FdIrTtfo= +github.com/golang/geo v0.0.0-20210211234256-740aa86cb551/go.mod h1:QZ0nwyI2jOfgRAoBvP+ab5aRr7c9x7lhGEJrKvBwjWI= +github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI= +github.com/jessevdk/go-flags v1.5.0/go.mod h1:Fw0T6WPc1dYxT4mKEZRfG5kJhaTDP9pj1c2EWnYs/m4= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM= +github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= +github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M= +github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ= +github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 h1:zYyBkD/k9seD2A7fsi6Oo2LfFZAehjjQMERAvZLEDnQ= +github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646/go.mod h1:jpp1/29i3P1S/RLdc7JQKbRpFeM1dOBd8T9ki5s+AY8= +github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM= +github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= +github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= +github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE= +github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= +golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= +golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc= +golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI= +golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= +golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8 h1:hVwzHzIUGRjiF7EcUjqNxk3NCfkPxbDKRdnNE1Rpg0U= +golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200320220750-118fecf932d8/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20221002022538-bcab6841153b/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk= +golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= +golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220928140112-f11e5e49a4ec/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= +golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg= +google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.7/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50= +rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= diff --git a/internal/converter/constants.go b/internal/converter/constants.go new file mode 100644 index 0000000..db2c865 --- /dev/null +++ b/internal/converter/constants.go @@ -0,0 +1,119 @@ +package converter + +import ( + "sort" + "strings" + + "github.com/exterex/morphic/internal/shared" +) + +// VideoContainerConfig describes a supported output video container. +type VideoContainerConfig struct { + Name string `json:"name"` + Codecs []string `json:"codecs"` + Extensions []string `json:"extensions"` +} + +// VideoContainers lists the three supported output video containers. +var VideoContainers = []VideoContainerConfig{ + { + Name: "MP4", + Codecs: []string{"h264", "h265", "av1"}, + Extensions: []string{".mp4", ".m4a", ".m4p", ".m4b", ".m4r", ".m4v"}, + }, + { + Name: "Matroska", + Codecs: []string{"h264", "h265", "av1", "vp9"}, + Extensions: []string{".mkv", ".mk3d", ".mka", ".mks"}, + }, + { + Name: "WebM", + Codecs: []string{"vp8", "vp9", "av1"}, + Extensions: []string{".webm"}, + }, +} + +// Canonical image formats we can write to. +var canonicalImage = map[string]struct{}{ + ".jpg": {}, + ".png": {}, + ".tif": {}, + ".bmp": {}, + ".webp": {}, + ".gif": {}, + ".ico": {}, + ".avif": {}, +} + +// canonicalVideo is the set of all supported output video extensions (derived from VideoContainers). +var canonicalVideo map[string]struct{} + +// ImageConversions maps source image extension to list of target extensions. +var ImageConversions map[string][]string + +// VideoConversions maps source video extension to list of all canonical video output extensions. +var VideoConversions map[string][]string + +func init() { + canonicalVideo = make(map[string]struct{}) + for _, c := range VideoContainers { + for _, ext := range c.Extensions { + canonicalVideo[ext] = struct{}{} + } + } + + allVideoTargets := make([]string, 0, len(canonicalVideo)) + for ext := range canonicalVideo { + allVideoTargets = append(allVideoTargets, ext) + } + sort.Strings(allVideoTargets) + + ImageConversions = make(map[string][]string) + for ext := range shared.ImageExtensions { + norm := shared.NormaliseExt(ext) + if _, ok := canonicalImage[norm]; !ok { + continue + } + var targets []string + for t := range canonicalImage { + if t != norm { + targets = append(targets, t) + } + } + sort.Strings(targets) + ImageConversions[ext] = targets + } + + VideoConversions = make(map[string][]string) + for ext := range shared.VideoExtensions { + norm := shared.NormaliseExt(ext) + var targets []string + for _, t := range allVideoTargets { + if t != norm { + targets = append(targets, t) + } + } + VideoConversions[ext] = targets + } +} + +// GetCompatibleTargets returns the list of extensions a source can convert to. +func GetCompatibleTargets(sourcePath string) []string { + ext := shared.NormaliseExt(strings.ToLower(extOf(sourcePath))) + if targets, ok := ImageConversions[ext]; ok { + return targets + } + if targets, ok := VideoConversions[ext]; ok { + return targets + } + return nil +} + +func extOf(path string) string { + for i := len(path) - 1; i >= 0; i-- { + if path[i] == '.' { + return path[i:] + } + } + return "" +} diff --git a/internal/converter/converter.go b/internal/converter/converter.go new file mode 100644 index 0000000..c4d5d13 --- /dev/null +++ b/internal/converter/converter.go @@ -0,0 +1,319 @@ +package converter + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + "strconv" + "strings" + + "github.com/disintegration/imaging" + "github.com/exterex/morphic/internal/shared" +) + +// toWindowsPath converts a WSL /mnt/X/... path to a Windows X:\... path. +// Windows-native executables (e.g. ffmpeg.exe) cannot access /mnt/ paths directly. +// Paths not matching /mnt// are returned unchanged. +func toWindowsPath(p string) string { + // /mnt/d/foo/bar → D:\foo\bar + if !strings.HasPrefix(p, "/mnt/") || len(p) < 7 { + return p + } + rest := p[5:] // strip "/mnt/" + slash := strings.IndexByte(rest, '/') + var drive, tail string + if slash == -1 { + drive = rest + tail = "" + } else { + drive = rest[:slash] + tail = rest[slash+1:] + } + if len(drive) != 1 { + return p + } + return strings.ToUpper(drive) + ":\\" + strings.ReplaceAll(tail, "/", "\\") +} + +// pathForBin returns the path in the format expected by the given binary. +// When bin is a Windows executable (.exe), WSL /mnt/ paths are converted. +func pathForBin(bin, p string) string { + if strings.HasSuffix(strings.ToLower(bin), ".exe") { + return toWindowsPath(p) + } + return p +} + +// On WSL2, ffmpeg.exe (Windows build) supports AVIF output; /usr/bin/ffmpeg does not. +func ffmpegCandidates() []string { + var bins []string + for _, name := range []string{"ffmpeg", "ffmpeg.exe"} { + if _, err := exec.LookPath(name); err == nil { + bins = append(bins, name) + } + } + return bins +} + +// probeVideoBitrate returns the total bitrate (bits/s) of source, or 0 on failure. +// It derives the ffprobe binary from the ffmpeg binary (ffmpeg → ffprobe, ffmpeg.exe → ffprobe.exe). +func probeVideoBitrate(source, ffmpegBin string) int64 { + probeBin := strings.Replace(ffmpegBin, "ffmpeg", "ffprobe", 1) + if _, err := exec.LookPath(probeBin); err != nil { + return 0 + } + out, err := exec.Command(probeBin, + "-v", "quiet", + "-show_entries", "format=bit_rate", + "-of", "default=noprint_wrappers=1", + source).Output() + if err != nil { + return 0 + } + for _, line := range strings.Split(string(out), "\n") { + if strings.HasPrefix(line, "bit_rate=") { + val := strings.TrimSpace(strings.TrimPrefix(line, "bit_rate=")) + if n, err := strconv.ParseInt(val, 10, 64); err == nil && n > 0 { + return n + } + } + } + return 0 +} + +// ffmpegHasEncoder checks if the given binary has a particular encoder. +func ffmpegHasEncoder(bin, encoder string) bool { + out, err := exec.Command(bin, "-hide_banner", "-encoders").Output() + if err != nil { + return false + } + for _, line := range strings.Split(string(out), "\n") { + if strings.Contains(line, encoder) { + return true + } + } + return false +} + +// getVideoEncoder returns the ffmpeg encoder name for the given codec ID. +// Codec IDs: h264, h265, av1, vp8, vp9. +func getVideoEncoder(codec string) (string, error) { + bin := "ffmpeg" + if candidates := ffmpegCandidates(); len(candidates) > 0 { + bin = candidates[0] + } + + switch codec { + case "h264": + return "libx264", nil + case "h265": + return "libx265", nil + case "av1": + for _, enc := range []string{"libsvtav1", "libaom-av1"} { + if ffmpegHasEncoder(bin, enc) { + return enc, nil + } + } + return "", fmt.Errorf("no AV1 encoder available (libsvtav1 or libaom-av1 required)") + case "vp8": + return "libvpx", nil + case "vp9": + return "libvpx-vp9", nil + } + return "", fmt.Errorf("unknown codec: %s", codec) +} + +// ConvertImage converts an image file using the imaging library. +func ConvertImage(source, targetExt, outputDir string) (string, error) { + ext := shared.NormaliseExt(normaliseTargetExt(targetExt)) + + stem := strings.TrimSuffix(filepath.Base(source), filepath.Ext(source)) + var dest string + if outputDir != "" { + os.MkdirAll(outputDir, 0755) + dest = filepath.Join(outputDir, stem+ext) + } else { + dest = filepath.Join(filepath.Dir(source), stem+ext) + } + + // Avoid overwriting + if _, err := os.Stat(dest); err == nil { + dest = filepath.Join(filepath.Dir(dest), + strings.TrimSuffix(filepath.Base(dest), ext)+"_converted"+ext) + } + + sourceExt := shared.NormaliseExt(strings.ToLower(filepath.Ext(source))) + if sourceExt == ".avif" || ext == ".avif" { + return convertImageByFFmpeg(source, dest, ext) + } + + img, err := imaging.Open(source) + if err != nil { + // Relax: fallback to ffmpeg conversion for special unsupported formats + return convertImageByFFmpeg(source, dest, ext) + } + + opts := []imaging.EncodeOption{} + extLower := strings.ToLower(ext) + if extLower == ".jpg" || extLower == ".jpeg" { + opts = append(opts, imaging.JPEGQuality(95)) + } + + if err := imaging.Save(img, dest, opts...); err != nil { + // Fallback to ffmpeg for formats imaging can't encode + return convertImageByFFmpeg(source, dest, ext) + } + + return dest, nil +} + +func convertImageByFFmpeg(source, dest, ext string) (string, error) { + candidates := ffmpegCandidates() + if len(candidates) == 0 { + return "", fmt.Errorf("ffmpeg is not installed or not on PATH") + } + + extLower := strings.ToLower(ext) + + var lastErr error + for _, bin := range candidates { + src := pathForBin(bin, source) + dst := pathForBin(bin, dest) + args := []string{"-y", "-i", src} + + if extLower == ".avif" { + // AV1 (YUV 4:2:0) requires even dimensions and no alpha channel. + // crop: trim 1px from odd dimensions. format=yuv420p: strip alpha (rgba → yuv420p). + args = append(args, "-vf", "crop=trunc(iw/2)*2:trunc(ih/2)*2,format=yuv420p") + if ffmpegHasEncoder(bin, "libsvtav1") { + args = append(args, "-c:v", "libsvtav1", "-crf", "28", "-preset", "8") + } else if ffmpegHasEncoder(bin, "libaom-av1") { + args = append(args, "-c:v", "libaom-av1", "-crf", "28", "-cpu-used", "4") + } else { + args = append(args, "-c:v", "libx264") + } + } else if extLower == ".webp" { + args = append(args, "-c:v", "libwebp") + } else if extLower == ".png" || extLower == ".jpg" || extLower == ".jpeg" || extLower == ".bmp" || extLower == ".gif" { + // no explicit codec required + } else { + // generic fallback for unknown image extensions + } + + args = append(args, dst) + + out, err := exec.Command(bin, args...).CombinedOutput() + if err == nil { + return dest, nil + } + lastErr = fmt.Errorf("ffmpeg image conversion failed: %s", strings.TrimSpace(string(out))) + } + return "", lastErr +} + +// ConvertVideo converts a video file using ffmpeg. +// codec is one of: h264, h265, av1, vp8, vp9. Defaults to h264 when empty. +func ConvertVideo(source, targetExt, codec, outputDir string, av1CRF int) (string, error) { + candidates := ffmpegCandidates() + if len(candidates) == 0 { + return "", fmt.Errorf("ffmpeg is not installed or not on PATH") + } + bin := candidates[0] + + ext := normaliseTargetExt(targetExt) + + stem := strings.TrimSuffix(filepath.Base(source), filepath.Ext(source)) + var dest string + if outputDir != "" { + os.MkdirAll(outputDir, 0755) + dest = filepath.Join(outputDir, stem+ext) + } else { + dest = filepath.Join(filepath.Dir(source), stem+ext) + } + + // Avoid overwriting + if _, err := os.Stat(dest); err == nil { + dest = filepath.Join(filepath.Dir(dest), + strings.TrimSuffix(filepath.Base(dest), ext)+"_converted"+ext) + } + + if codec == "" { + codec = "h264" + } + + encoder, err := getVideoEncoder(codec) + if err != nil { + return "", err + } + + cmd := []string{bin, "-y", "-i", source, "-c:v", encoder, "-c:a", "aac"} + + isAV1 := encoder == "libsvtav1" || encoder == "libaom-av1" + if isAV1 { + // AV1 requires even dimensions for YUV 4:2:0 + cmd = append(cmd, "-vf", "crop=trunc(iw/2)*2:trunc(ih/2)*2") + } + + switch encoder { + case "libsvtav1": + crf := 35 + if av1CRF >= 10 && av1CRF <= 63 { + crf = av1CRF + } + cmd = append(cmd, "-preset", "8", "-crf", fmt.Sprintf("%d", crf)) + case "libaom-av1": + crf := 35 + if av1CRF >= 10 && av1CRF <= 63 { + crf = av1CRF + } + cmd = append(cmd, "-cpu-used", "4", "-crf", fmt.Sprintf("%d", crf)) + case "libvpx", "libvpx-vp9": + crf := 35 + if av1CRF >= 10 && av1CRF <= 63 { + crf = av1CRF + } + cmd = append(cmd, "-crf", fmt.Sprintf("%d", crf), "-b:v", "0") + case "libx265": + cmd = append(cmd, "-preset", "fast", "-crf", "28") + default: // libx264 + cmd = append(cmd, "-preset", "fast", "-crf", "23") + } + + // For AV1, cap output bitrate at 65% of source to guarantee a size reduction. + if isAV1 { + if br := probeVideoBitrate(source, bin); br > 0 { + maxrate := br * 65 / 100 + cmd = append(cmd, "-maxrate", fmt.Sprintf("%d", maxrate), + "-bufsize", fmt.Sprintf("%d", br*2)) + } + } + + cmd = append(cmd, dest) + + out, err2 := exec.Command(cmd[0], cmd[1:]...).CombinedOutput() + if err2 != nil { + return "", fmt.Errorf("ffmpeg error: %s", strings.TrimSpace(string(out))) + } + + return dest, nil +} + +// ConvertFile is the high-level converter — routes to image or video handler. +// codec is used only for video conversion (h264, h265, av1, vp8, vp9). +func ConvertFile(source, targetExt, codec, outputDir string, av1CRF int) (string, error) { + if shared.IsImage(source) { + return ConvertImage(source, targetExt, outputDir) + } + if shared.IsVideo(source) { + return ConvertVideo(source, targetExt, codec, outputDir, av1CRF) + } + return "", fmt.Errorf("unsupported file type: %s", source) +} + +func normaliseTargetExt(ext string) string { + if !strings.HasPrefix(ext, ".") { + ext = "." + ext + } + return shared.NormaliseExt(ext) +} diff --git a/internal/converter/converter_test.go b/internal/converter/converter_test.go new file mode 100644 index 0000000..d81cb97 --- /dev/null +++ b/internal/converter/converter_test.go @@ -0,0 +1,200 @@ +package converter_test + +import ( + "os" + "path/filepath" + "strings" + "testing" + + "github.com/exterex/morphic/internal/converter" +) + +// assetsDir returns the absolute path to assets/test relative to this file. +func assetsDir(t *testing.T) string { + t.Helper() + // From internal/converter/ we go up three levels to reach the repo root. + dir, err := filepath.Abs(filepath.Join("..", "..", "assets", "test")) + if err != nil { + t.Fatalf("cannot resolve assets dir: %v", err) + } + return dir +} + +// ── ScanFolder ───────────────────────────────────────────────────────────── + +func TestScanFolder_basic(t *testing.T) { + dir := assetsDir(t) + result, err := converter.ScanFolder(dir, false, "both") + if err != nil { + t.Fatalf("ScanFolder returned error: %v", err) + } + if result.Total == 0 { + t.Error("expected at least one file in assets/test, got 0") + } + if result.Folder != dir { + t.Errorf("folder mismatch: got %q want %q", result.Folder, dir) + } +} + +func TestScanFolder_imageFilter(t *testing.T) { + dir := assetsDir(t) + result, err := converter.ScanFolder(dir, false, "images") + if err != nil { + t.Fatalf("ScanFolder(images) error: %v", err) + } + for _, f := range result.Files { + if f.Type != "image" { + t.Errorf("expected image type, got %q for %s", f.Type, f.Name) + } + } +} + +func TestScanFolder_videoFilter(t *testing.T) { + dir := assetsDir(t) + result, err := converter.ScanFolder(dir, false, "videos") + if err != nil { + t.Fatalf("ScanFolder(videos) error: %v", err) + } + for _, f := range result.Files { + if f.Type != "video" { + t.Errorf("expected video type, got %q for %s", f.Type, f.Name) + } + } +} + +func TestScanFolder_invalidPath(t *testing.T) { + _, err := converter.ScanFolder("/nonexistent/path/xyz", false, "both") + if err == nil { + t.Error("expected error for nonexistent folder, got nil") + } +} + +func TestScanFolder_summaryMatchesTotal(t *testing.T) { + dir := assetsDir(t) + result, err := converter.ScanFolder(dir, true, "both") + if err != nil { + t.Fatalf("ScanFolder error: %v", err) + } + sum := 0 + for _, v := range result.Summary { + sum += v + } + if sum != result.Total { + t.Errorf("summary counts (%d) don't match total (%d)", sum, result.Total) + } +} + +// ── ConvertImage ──────────────────────────────────────────────────────────── + +func TestConvertImage_jpgToPng(t *testing.T) { + src := filepath.Join(assetsDir(t), "sample1.jpg") + if _, err := os.Stat(src); os.IsNotExist(err) { + t.Skip("sample1.jpg not present") + } + + tmp := t.TempDir() + out, err := converter.ConvertImage(src, ".png", tmp) + if err != nil { + t.Fatalf("ConvertImage jpg→png failed: %v", err) + } + if _, err := os.Stat(out); err != nil { + t.Errorf("output file not created: %v", err) + } + if !strings.HasSuffix(strings.ToLower(out), ".png") { + t.Errorf("expected .png output, got %s", out) + } +} + +func TestConvertImage_pngToWebp(t *testing.T) { + src := filepath.Join(assetsDir(t), "sample2.png") + if _, err := os.Stat(src); os.IsNotExist(err) { + t.Skip("sample2.png not present") + } + + tmp := t.TempDir() + out, err := converter.ConvertImage(src, ".webp", tmp) + if err != nil { + t.Fatalf("ConvertImage png→webp failed: %v", err) + } + if _, err := os.Stat(out); err != nil { + t.Errorf("output file not created: %v", err) + } +} + +func TestConvertImage_tifToJpg(t *testing.T) { + src := filepath.Join(assetsDir(t), "sample3.tif") + if _, err := os.Stat(src); os.IsNotExist(err) { + t.Skip("sample3.tif not present") + } + + tmp := t.TempDir() + out, err := converter.ConvertImage(src, ".jpg", tmp) + if err != nil { + t.Fatalf("ConvertImage tif→jpg failed: %v", err) + } + if _, err := os.Stat(out); err != nil { + t.Errorf("output file not created: %v", err) + } +} + +func TestConvertImage_rgbaPngToJpg(t *testing.T) { + src := filepath.Join(assetsDir(t), "sample_rgba.png") + if _, err := os.Stat(src); os.IsNotExist(err) { + t.Skip("sample_rgba.png not present") + } + + tmp := t.TempDir() + out, err := converter.ConvertImage(src, ".jpg", tmp) + if err != nil { + t.Fatalf("ConvertImage rgba.png→jpg failed: %v", err) + } + if _, err := os.Stat(out); err != nil { + t.Errorf("output file not created: %v", err) + } +} + +func TestConvertImage_noOverwrite(t *testing.T) { + src := filepath.Join(assetsDir(t), "sample1.jpg") + if _, err := os.Stat(src); os.IsNotExist(err) { + t.Skip("sample1.jpg not present") + } + + tmp := t.TempDir() + // Convert twice — second call must not overwrite; it adds "_converted" suffix + out1, err := converter.ConvertImage(src, ".png", tmp) + if err != nil { + t.Fatalf("first convert failed: %v", err) + } + out2, err := converter.ConvertImage(src, ".png", tmp) + if err != nil { + t.Fatalf("second convert failed: %v", err) + } + if out1 == out2 { + t.Error("expected second output path to differ (no-overwrite), got same path") + } +} + +// ── ImageConversions table ────────────────────────────────────────────────── + +func TestImageConversions_nonEmpty(t *testing.T) { + if len(converter.ImageConversions) == 0 { + t.Error("ImageConversions is empty") + } + for src, targets := range converter.ImageConversions { + if len(targets) == 0 { + t.Errorf("no targets for source format %q", src) + } + // Source must not appear in its own target list + for _, tgt := range targets { + if tgt == src { + t.Errorf("format %q lists itself as a target", src) + } + } + } +} + +func TestVideoConversions_nonEmpty(t *testing.T) { + if len(converter.VideoConversions) == 0 { + t.Error("VideoConversions is empty") + } +} diff --git a/internal/converter/scanner.go b/internal/converter/scanner.go new file mode 100644 index 0000000..0969782 --- /dev/null +++ b/internal/converter/scanner.go @@ -0,0 +1,130 @@ +package converter + +import ( + "os" + "path/filepath" + "sort" + "strings" + + "github.com/exterex/morphic/internal/shared" +) + +// ScanResult holds the output of a folder scan. +type ScanResult struct { + Folder string `json:"folder"` + Summary map[string]int `json:"summary"` + Files []FileEntry `json:"files"` + Total int `json:"total"` +} + +// FileEntry describes one file found during scanning. +type FileEntry struct { + Path string `json:"path"` + Name string `json:"name"` + Ext string `json:"ext"` + Size int64 `json:"size"` + Type string `json:"type"` + Targets []string `json:"targets"` +} + +// ScanFolder walks folder and returns an inventory of convertible media files. +func ScanFolder(folder string, includeSubfolders bool, filterType string) (*ScanResult, error) { + allowed := make(map[string]struct{}) + if filterType == "images" || filterType == "both" { + for k, v := range shared.ImageExtensions { + allowed[k] = v + } + } + if filterType == "videos" || filterType == "both" { + for k, v := range shared.VideoExtensions { + allowed[k] = v + } + } + + summary := make(map[string]int) + var files []FileEntry + + if includeSubfolders { + filepath.WalkDir(folder, func(path string, d os.DirEntry, err error) error { + if err != nil { + return nil + } + if d.IsDir() { + name := strings.ToLower(d.Name()) + if _, excl := shared.ExcludedFolders[name]; excl { + return filepath.SkipDir + } + return nil + } + addFileEntry(path, d, allowed, summary, &files) + return nil + }) + } else { + entries, err := os.ReadDir(folder) + if err != nil { + return nil, err + } + for _, d := range entries { + if d.IsDir() { + continue + } + path := filepath.Join(folder, d.Name()) + addFileEntry(path, d, allowed, summary, &files) + } + } + + // Sort summary by count descending + type kv struct { + K string + V int + } + var sorted []kv + for k, v := range summary { + sorted = append(sorted, kv{k, v}) + } + sort.Slice(sorted, func(i, j int) bool { return sorted[i].V > sorted[j].V }) + sortedSummary := make(map[string]int, len(sorted)) + for _, s := range sorted { + sortedSummary[s.K] = s.V + } + + // Sort files by name + sort.Slice(files, func(i, j int) bool { + return strings.ToLower(files[i].Name) < strings.ToLower(files[j].Name) + }) + + return &ScanResult{ + Folder: folder, + Summary: sortedSummary, + Files: files, + Total: len(files), + }, nil +} + +func addFileEntry(path string, d os.DirEntry, allowed map[string]struct{}, summary map[string]int, files *[]FileEntry) { + ext := shared.NormaliseExt(strings.ToLower(filepath.Ext(path))) + if _, ok := allowed[ext]; !ok { + return + } + + var size int64 + if info, err := d.Info(); err == nil { + size = info.Size() + } + + ftype := "image" + if _, ok := shared.VideoExtensions[ext]; ok { + ftype = "video" + } + + targets := GetCompatibleTargets(path) + summary[ext]++ + *files = append(*files, FileEntry{ + Path: path, + Name: d.Name(), + Ext: ext, + Size: size, + Type: ftype, + Targets: targets, + }) +} diff --git a/internal/dupfinder/dupfinder_test.go b/internal/dupfinder/dupfinder_test.go new file mode 100644 index 0000000..27ef45e --- /dev/null +++ b/internal/dupfinder/dupfinder_test.go @@ -0,0 +1,224 @@ +package dupfinder_test + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/exterex/morphic/internal/dupfinder" + "github.com/exterex/morphic/internal/shared" +) + +// assetsDir returns the path to assets/test from internal/dupfinder/. +func assetsDir(t *testing.T) string { + t.Helper() + dir, err := filepath.Abs(filepath.Join("..", "..", "assets", "test")) + if err != nil { + t.Fatalf("cannot resolve assets dir: %v", err) + } + return dir +} + +// ── ComputeImageHashes ────────────────────────────────────────────────────── + +func TestComputeImageHashes_jpg(t *testing.T) { + path := filepath.Join(assetsDir(t), "sample1.jpg") + if _, err := os.Stat(path); os.IsNotExist(err) { + t.Skip("sample1.jpg not present") + } + info := dupfinder.ComputeImageHashes(path) + if !info.HasHash { + t.Error("expected HasHash=true for a valid JPEG") + } + if info.PHash == 0 && info.AHash == 0 && info.DHash == 0 { + t.Error("all hashes are zero for a valid JPEG") + } + if info.Width == 0 || info.Height == 0 { + t.Errorf("unexpected zero dimensions: %dx%d", info.Width, info.Height) + } +} + +func TestComputeImageHashes_png(t *testing.T) { + path := filepath.Join(assetsDir(t), "sample2.png") + if _, err := os.Stat(path); os.IsNotExist(err) { + t.Skip("sample2.png not present") + } + info := dupfinder.ComputeImageHashes(path) + if !info.HasHash { + t.Error("expected HasHash=true for a valid PNG") + } +} + +func TestComputeImageHashes_nonexistent(t *testing.T) { + info := dupfinder.ComputeImageHashes("/nonexistent/file.jpg") + if info.HasHash { + t.Error("expected HasHash=false for a missing file") + } + if info.FileSize != 0 { + t.Error("expected zero FileSize for a missing file") + } +} + +func TestComputeImageHashes_sameFileSameHash(t *testing.T) { + path := filepath.Join(assetsDir(t), "sample1.jpg") + if _, err := os.Stat(path); os.IsNotExist(err) { + t.Skip("sample1.jpg not present") + } + a := dupfinder.ComputeImageHashes(path) + b := dupfinder.ComputeImageHashes(path) + if a.PHash != b.PHash || a.AHash != b.AHash || a.DHash != b.DHash { + t.Error("same file produced different hashes on two reads") + } +} + +// ── ComputeSimilarity ─────────────────────────────────────────────────────── + +func TestComputeSimilarity_identicalImage(t *testing.T) { + path := filepath.Join(assetsDir(t), "sample1.jpg") + if _, err := os.Stat(path); os.IsNotExist(err) { + t.Skip("sample1.jpg not present") + } + info := dupfinder.ComputeImageHashes(path) + sim := dupfinder.ComputeSimilarity(&info, &info) + if sim < 0.999 { + t.Errorf("self-similarity expected ≈1.0, got %f", sim) + } +} + +func TestComputeSimilarity_differentImages(t *testing.T) { + dir := assetsDir(t) + // Use visually distinct images: smooth gradient vs high-frequency checkerboard + pathA := filepath.Join(dir, "gradient.png") + pathB := filepath.Join(dir, "checkerboard.png") + if _, e := os.Stat(pathA); os.IsNotExist(e) { + t.Skip("gradient.png not present") + } + if _, e := os.Stat(pathB); os.IsNotExist(e) { + t.Skip("checkerboard.png not present") + } + a := dupfinder.ComputeImageHashes(pathA) + b := dupfinder.ComputeImageHashes(pathB) + // Different images should be less than 99% similar + sim := dupfinder.ComputeSimilarity(&a, &b) + if sim > 0.99 { + t.Errorf("distinct images have unexpectedly high similarity: %f", sim) + } +} + +// ── ProcessImages ─────────────────────────────────────────────────────────── + +func TestProcessImages_basic(t *testing.T) { + dir := assetsDir(t) + files := []shared.FileInfo{ + {Path: filepath.Join(dir, "sample1.jpg"), Ext: ".jpg"}, + {Path: filepath.Join(dir, "sample2.png"), Ext: ".png"}, + } + // Filter to only existing files + var existing []shared.FileInfo + for _, f := range files { + if _, err := os.Stat(f.Path); err == nil { + existing = append(existing, f) + } + } + if len(existing) == 0 { + t.Skip("no test images available") + } + + result := dupfinder.ProcessImages(context.Background(), existing, 2) + if len(result) == 0 { + t.Error("ProcessImages returned empty result for valid images") + } + for path, info := range result { + if !info.HasHash { + t.Errorf("image %s has no hash after processing", path) + } + } +} + +func TestProcessImages_cancelledContext(t *testing.T) { + dir := assetsDir(t) + files := []shared.FileInfo{ + {Path: filepath.Join(dir, "sample1.jpg"), Ext: ".jpg"}, + {Path: filepath.Join(dir, "sample2.png"), Ext: ".png"}, + } + + ctx, cancel := context.WithCancel(context.Background()) + cancel() // cancel immediately + + result := dupfinder.ProcessImages(ctx, files, 2) + // With cancelled context, we should get 0 results (cancelled before first work) + _ = result // either 0 or partial — just must not panic +} + +// ── FindImageDuplicates ───────────────────────────────────────────────────── + +func TestFindImageDuplicates_exactDuplicate(t *testing.T) { + path := filepath.Join(assetsDir(t), "sample1.jpg") + if _, err := os.Stat(path); os.IsNotExist(err) { + t.Skip("sample1.jpg not present") + } + + info := dupfinder.ComputeImageHashes(path) + // Register the same image under two different paths + infos := map[string]*dupfinder.ImageInfo{ + "/fake/a.jpg": &info, + "/fake/b.jpg": &info, + } + groups := dupfinder.FindImageDuplicates(infos, 0.9) + if len(groups) == 0 { + t.Error("expected at least one duplicate group for identical hashes") + } +} + +func TestFindImageDuplicates_noFalsePositives(t *testing.T) { + dir := assetsDir(t) + // Use visually distinct images: smooth gradient vs high-frequency checkerboard + pathA := filepath.Join(dir, "gradient.png") + pathB := filepath.Join(dir, "checkerboard.png") + if _, e := os.Stat(pathA); os.IsNotExist(e) { + t.Skip("gradient.png not present") + } + if _, e := os.Stat(pathB); os.IsNotExist(e) { + t.Skip("checkerboard.png not present") + } + + infoA := dupfinder.ComputeImageHashes(pathA) + infoB := dupfinder.ComputeImageHashes(pathB) + infos := map[string]*dupfinder.ImageInfo{ + pathA: &infoA, + pathB: &infoB, + } + // High threshold — visually dissimilar images must not be grouped + groups := dupfinder.FindImageDuplicates(infos, 0.99) + for _, g := range groups { + if len(g) > 1 { + t.Errorf("distinct images grouped as duplicates at threshold 0.99: %v", g) + } + } +} + +// ── StartJob / GetJob ─────────────────────────────────────────────────────── + +func TestStartJob_createsJob(t *testing.T) { + dir := assetsDir(t) + id := dupfinder.StartJob(dir, "images", 0.9, 0.85) + if id == "" { + t.Fatal("StartJob returned empty job ID") + } + + job, ok := dupfinder.GetJob(id) + if !ok { + t.Fatal("GetJob returned false for a just-created job") + } + if job.ID != id { + t.Errorf("job ID mismatch: got %q want %q", job.ID, id) + } +} + +func TestGetJob_unknownID(t *testing.T) { + _, ok := dupfinder.GetJob("00000000-0000-0000-0000-000000000000") + if ok { + t.Error("expected false for unknown job ID") + } +} diff --git a/internal/dupfinder/images.go b/internal/dupfinder/images.go new file mode 100644 index 0000000..2025b9a --- /dev/null +++ b/internal/dupfinder/images.go @@ -0,0 +1,189 @@ +package dupfinder + +import ( + "context" + "log" + "math/bits" + "os" + "sort" + "sync" + + "github.com/corona10/goimagehash" + "github.com/exterex/morphic/internal/shared" +) + +// ImageInfo stores information about an image file. +type ImageInfo struct { + Path string `json:"path"` + Width int `json:"width"` + Height int `json:"height"` + FileSize int64 `json:"file_size"` + Format string `json:"format"` + PHash uint64 `json:"-"` + AHash uint64 `json:"-"` + DHash uint64 `json:"-"` + HasHash bool `json:"-"` +} + +// ComputeImageHashes loads an image and computes perceptual hashes. +func ComputeImageHashes(path string) ImageInfo { + info := ImageInfo{Path: path} + + st, err := os.Stat(path) + if err != nil { + return info + } + info.FileSize = st.Size() + + img, err := shared.OpenImageFile(path) + if err != nil { + log.Printf("dupfinder: cannot open image %s: %v", path, err) + return info + } + + bounds := img.Bounds() + info.Width = bounds.Dx() + info.Height = bounds.Dy() + + ph, err := goimagehash.PerceptionHash(img) + if err == nil { + info.PHash = ph.GetHash() + } + ah, err := goimagehash.AverageHash(img) + if err == nil { + info.AHash = ah.GetHash() + } + dh, err := goimagehash.DifferenceHash(img) + if err == nil { + info.DHash = dh.GetHash() + } + + info.HasHash = info.PHash != 0 || info.AHash != 0 || info.DHash != 0 + return info +} + +// ProcessImages hashes all images concurrently and returns successful results. +// It stops accepting new work when ctx is cancelled. +func ProcessImages(ctx context.Context, files []shared.FileInfo, numWorkers int) map[string]*ImageInfo { + result := make(map[string]*ImageInfo) + var mu sync.Mutex + var wg sync.WaitGroup + sem := make(chan struct{}, numWorkers) + + for _, f := range files { + select { + case <-ctx.Done(): + wg.Wait() + return result + default: + } + wg.Add(1) + sem <- struct{}{} + go func(fi shared.FileInfo) { + defer wg.Done() + defer func() { <-sem }() + info := ComputeImageHashes(fi.Path) + if info.HasHash { + mu.Lock() + result[fi.Path] = &info + mu.Unlock() + } + }(f) + } + wg.Wait() + return result +} + +// hashSimilarity returns the similarity (0-1) between two 64-bit hashes. +func hashSimilarity(a, b uint64) float64 { + dist := bits.OnesCount64(a ^ b) + return 1.0 - float64(dist)/64.0 +} + +// ComputeSimilarity computes average similarity across phash, ahash, dhash. +func ComputeSimilarity(a, b *ImageInfo) float64 { + var total, count float64 + if a.PHash != 0 && b.PHash != 0 { + total += hashSimilarity(a.PHash, b.PHash) + count++ + } + if a.AHash != 0 && b.AHash != 0 { + total += hashSimilarity(a.AHash, b.AHash) + count++ + } + if a.DHash != 0 && b.DHash != 0 { + total += hashSimilarity(a.DHash, b.DHash) + count++ + } + if count == 0 { + return 0 + } + return total / count +} + +// DuplicateEntry represents one file in a duplicate group. +type DuplicateEntry struct { + Path string `json:"path"` + Similarity float64 `json:"similarity"` +} + +// FindImageDuplicates finds groups of duplicate images. +func FindImageDuplicates(infos map[string]*ImageInfo, threshold float64) [][]DuplicateEntry { + // Bucket exact PHash matches first + buckets := make(map[uint64][]string) + for path, info := range infos { + if info.PHash != 0 { + buckets[info.PHash] = append(buckets[info.PHash], path) + } + } + + var groups [][]DuplicateEntry + assigned := make(map[string]bool) + + // Exact hash groups + for _, paths := range buckets { + if len(paths) > 1 { + sort.Strings(paths) + var group []DuplicateEntry + for _, p := range paths { + group = append(group, DuplicateEntry{Path: p, Similarity: 1.0}) + assigned[p] = true + } + groups = append(groups, group) + } + } + + // Near-duplicate detection on remaining images + var remaining []string + for path := range infos { + if !assigned[path] { + remaining = append(remaining, path) + } + } + sort.Strings(remaining) + + for i := 0; i < len(remaining); i++ { + if assigned[remaining[i]] { + continue + } + group := []DuplicateEntry{{Path: remaining[i], Similarity: 1.0}} + + for j := i + 1; j < len(remaining); j++ { + if assigned[remaining[j]] { + continue + } + sim := ComputeSimilarity(infos[remaining[i]], infos[remaining[j]]) + if sim >= threshold { + group = append(group, DuplicateEntry{Path: remaining[j], Similarity: sim}) + assigned[remaining[j]] = true + } + } + + if len(group) > 1 { + assigned[remaining[i]] = true + groups = append(groups, group) + } + } + + return groups +} diff --git a/internal/dupfinder/scanner.go b/internal/dupfinder/scanner.go new file mode 100644 index 0000000..4118f44 --- /dev/null +++ b/internal/dupfinder/scanner.go @@ -0,0 +1,318 @@ +package dupfinder + +import ( + "fmt" + "path/filepath" + "sort" + "sync" + "time" + + "github.com/exterex/morphic/internal/shared" +) + +// ScanJob represents a running or completed dupfinder job. +type ScanJob struct { + shared.Job + mu sync.Mutex + + Folder string `json:"folder"` + ScanType string `json:"scan_type"` // "images", "videos", "both" + ImageThreshold float64 `json:"image_threshold"` + VideoThreshold float64 `json:"video_threshold"` + ImageGroups [][]map[string]interface{} `json:"image_groups,omitempty"` + VideoGroups [][]map[string]interface{} `json:"video_groups,omitempty"` + TotalFound int `json:"total_files_found"` + TotalProcessed int `json:"total_files_processed"` + SpaceSavings int64 `json:"space_savings"` +} + +var store = shared.NewJobStore[ScanJob]() + +func init() { + store.StartCleanup(30*time.Minute, func(j *ScanJob) time.Time { + return j.DoneAt + }) +} + +// StartJob creates and launches a new dupfinder job. +func StartJob(folder, scanType string, imageThreshold, videoThreshold float64) string { + job := &ScanJob{ + Job: shared.NewJob(), + Folder: folder, + ScanType: scanType, + ImageThreshold: imageThreshold, + VideoThreshold: videoThreshold, + } + job.Status = shared.JobStatusRunning + store.Set(job.ID, job) + go runScan(job) + return job.ID +} + +// GetJob retrieves a job by ID. +func GetJob(id string) (*ScanJob, bool) { + return store.Get(id) +} + +func runScan(job *ScanJob) { + defer func() { + if r := recover(); r != nil { + job.mu.Lock() + job.Status = shared.JobStatusFailed + job.Error = fmt.Sprintf("%v", r) + job.DoneAt = time.Now() + job.mu.Unlock() + } + }() + + job.mu.Lock() + job.Message = fmt.Sprintf("Scanning folder: %s", job.Folder) + job.mu.Unlock() + + // Image scan + if job.ScanType == "images" || job.ScanType == "both" { + scanImages(job) + } + + // Check for cancellation between phases + select { + case <-job.Ctx().Done(): + job.mu.Lock() + job.Status = shared.JobStatusCancelled + job.DoneAt = time.Now() + job.Message = "Scan was interrupted" + job.mu.Unlock() + return + default: + } + + // Video scan + if job.ScanType == "videos" || job.ScanType == "both" { + scanVideos(job) + } + + // Check for cancellation before finalising + select { + case <-job.Ctx().Done(): + job.mu.Lock() + job.Status = shared.JobStatusCancelled + job.DoneAt = time.Now() + job.Message = "Scan was interrupted" + job.mu.Unlock() + return + default: + } + + // Finalise + job.mu.Lock() + job.SpaceSavings = calculateSpaceSavings(job) + job.Status = shared.JobStatusDone + job.Progress = 1.0 + job.DoneAt = time.Now() + elapsed := job.DoneAt.Sub(job.StartedAt).Seconds() + totalGroups := len(job.ImageGroups) + len(job.VideoGroups) + job.Message = fmt.Sprintf("Done! Found %d duplicate group(s) in %.1fs. Potential savings: %s", + totalGroups, elapsed, shared.FormatFileSize(job.SpaceSavings)) + job.mu.Unlock() +} + +func scanImages(job *ScanJob) { + job.mu.Lock() + job.Message = "Finding image files..." + job.mu.Unlock() + + files, err := shared.FindImageFiles(job.Folder) + if err != nil { + return + } + + job.mu.Lock() + job.TotalFound += len(files) + job.Message = fmt.Sprintf("Found %d images. Processing hashes...", len(files)) + job.Progress = 0.1 + job.mu.Unlock() + + if len(files) == 0 { + return + } + + infos := ProcessImages(job.Ctx(), files, shared.DefaultNumWorkers) + + // Return early if cancelled during hash processing + select { + case <-job.Ctx().Done(): + return + default: + } + + job.mu.Lock() + job.TotalProcessed += len(infos) + job.Progress = 0.4 + job.Message = fmt.Sprintf("Processed %d images. Finding duplicates...", len(infos)) + job.mu.Unlock() + + groups := FindImageDuplicates(infos, job.ImageThreshold) + formatted := formatImageGroups(groups, infos) + + job.mu.Lock() + job.ImageGroups = formatted + if job.ScanType == "both" { + job.Progress = 0.5 + } else { + job.Progress = 0.95 + } + job.mu.Unlock() +} + +func scanVideos(job *ScanJob) { + job.mu.Lock() + job.Message = "Finding video files..." + job.mu.Unlock() + + files, err := shared.FindVideoFiles(job.Folder) + if err != nil { + return + } + + job.mu.Lock() + job.TotalFound += len(files) + job.Message = fmt.Sprintf("Found %d videos. Processing hashes...", len(files)) + if job.ScanType == "both" { + job.Progress = 0.6 + } else { + job.Progress = 0.1 + } + job.mu.Unlock() + + if len(files) == 0 { + return + } + + infos := ProcessVideos(job.Ctx(), files, shared.DefaultNumFrames, shared.DefaultNumWorkers) + + // Return early if cancelled during hash processing + select { + case <-job.Ctx().Done(): + return + default: + } + + job.mu.Lock() + job.TotalProcessed += len(infos) + if job.ScanType == "both" { + job.Progress = 0.8 + } else { + job.Progress = 0.7 + } + job.Message = fmt.Sprintf("Processed %d videos. Finding duplicates...", len(infos)) + job.mu.Unlock() + + groups := FindVideoDuplicates(infos, job.VideoThreshold) + formatted := formatVideoGroups(groups, infos) + + job.mu.Lock() + job.VideoGroups = formatted + job.Progress = 0.95 + job.mu.Unlock() +} + +func formatImageGroups(groups [][]DuplicateEntry, infos map[string]*ImageInfo) [][]map[string]interface{} { + var result [][]map[string]interface{} + for _, group := range groups { + // Sort by file size descending + sort.Slice(group, func(i, j int) bool { + ai := infos[group[i].Path] + aj := infos[group[j].Path] + if ai == nil || aj == nil { + return false + } + return ai.FileSize > aj.FileSize + }) + + var formatted []map[string]interface{} + for _, entry := range group { + info := infos[entry.Path] + if info == nil { + continue + } + formatted = append(formatted, map[string]interface{}{ + "path": entry.Path, + "filename": filepath.Base(entry.Path), + "directory": filepath.Dir(entry.Path), + "width": info.Width, + "height": info.Height, + "resolution": fmt.Sprintf("%dx%d", info.Width, info.Height), + "format": info.Format, + "file_size": info.FileSize, + "file_size_formatted": shared.FormatFileSize(info.FileSize), + "similarity": float64(int(entry.Similarity*1000)) / 10, + "type": "image", + }) + } + if len(formatted) > 1 { + result = append(result, formatted) + } + } + return result +} + +func formatVideoGroups(groups [][]DuplicateEntry, infos map[string]*VideoInfo) [][]map[string]interface{} { + var result [][]map[string]interface{} + for _, group := range groups { + sort.Slice(group, func(i, j int) bool { + ai := infos[group[i].Path] + aj := infos[group[j].Path] + if ai == nil || aj == nil { + return false + } + return ai.FileSize > aj.FileSize + }) + + var formatted []map[string]interface{} + for _, entry := range group { + info := infos[entry.Path] + if info == nil { + continue + } + formatted = append(formatted, map[string]interface{}{ + "path": entry.Path, + "filename": filepath.Base(entry.Path), + "directory": filepath.Dir(entry.Path), + "width": info.Width, + "height": info.Height, + "resolution": fmt.Sprintf("%dx%d", info.Width, info.Height), + "duration": info.Duration, + "duration_formatted": shared.FormatDuration(info.Duration), + "fps": float64(int(info.FPS*10)) / 10, + "file_size": info.FileSize, + "file_size_formatted": shared.FormatFileSize(info.FileSize), + "similarity": float64(int(entry.Similarity*1000)) / 10, + "type": "video", + }) + } + if len(formatted) > 1 { + result = append(result, formatted) + } + } + return result +} + +func calculateSpaceSavings(job *ScanJob) int64 { + var total int64 + allGroups := append(job.ImageGroups, job.VideoGroups...) + for _, group := range allGroups { + var sizes []int64 + for _, item := range group { + if s, ok := item["file_size"].(int64); ok { + sizes = append(sizes, s) + } + } + if len(sizes) > 1 { + sort.Slice(sizes, func(i, j int) bool { return sizes[i] < sizes[j] }) + for _, s := range sizes[:len(sizes)-1] { + total += s + } + } + } + return total +} diff --git a/internal/dupfinder/videos.go b/internal/dupfinder/videos.go new file mode 100644 index 0000000..6073cdf --- /dev/null +++ b/internal/dupfinder/videos.go @@ -0,0 +1,241 @@ +package dupfinder + +import ( + "context" + "fmt" + "log" + "math/bits" + "os" + "os/exec" + "sort" + "strconv" + "strings" + "sync" + + "github.com/corona10/goimagehash" + "github.com/disintegration/imaging" + "github.com/exterex/morphic/internal/shared" +) + +// VideoInfo stores information about a video file. +type VideoInfo struct { + Path string `json:"path"` + Duration float64 `json:"duration"` + FPS float64 `json:"fps"` + FrameCount int `json:"frame_count"` + Width int `json:"width"` + Height int `json:"height"` + FileSize int64 `json:"file_size"` + FrameHashes []uint64 `json:"-"` + HasHash bool `json:"-"` +} + +// ComputeVideoHashes extracts frames and computes perceptual hashes. +func ComputeVideoHashes(path string, numFrames int) VideoInfo { + info := VideoInfo{Path: path} + + st, err := os.Stat(path) + if err != nil { + return info + } + info.FileSize = st.Size() + + // Get video metadata via ffprobe + probeOut, err := exec.Command("ffprobe", + "-v", "error", + "-select_streams", "v:0", + "-show_entries", "stream=width,height,duration,r_frame_rate,nb_frames", + "-of", "csv=p=0", + path, + ).Output() + if err != nil { + log.Printf("dupfinder: ffprobe failed for %s: %v", path, err) + return info + } + + parts := strings.Split(strings.TrimSpace(string(probeOut)), ",") + if len(parts) >= 1 { + info.Width, _ = strconv.Atoi(parts[0]) + } + if len(parts) >= 2 { + info.Height, _ = strconv.Atoi(parts[1]) + } + if len(parts) >= 3 && parts[2] != "" && parts[2] != "N/A" { + info.Duration, _ = strconv.ParseFloat(parts[2], 64) + } + if len(parts) >= 4 { + info.FPS = parseFPS(parts[3]) + } + if len(parts) >= 5 { + info.FrameCount, _ = strconv.Atoi(parts[4]) + } + + if info.Duration <= 0 && info.FrameCount > 0 && info.FPS > 0 { + info.Duration = float64(info.FrameCount) / info.FPS + } + + if info.Duration <= 0 { + return info + } + + // Extract frames at regular intervals using ffmpeg + frameHashes := extractAndHashFrames(path, info.Duration, numFrames) + info.FrameHashes = frameHashes + info.HasHash = len(frameHashes) > 0 + + return info +} + +// extractAndHashFrames extracts frames at intervals and hashes them. +func extractAndHashFrames(path string, duration float64, numFrames int) []uint64 { + startTime := duration * 0.05 + endTime := duration * 0.95 + if endTime <= startTime { + startTime = 0 + endTime = duration + } + + interval := (endTime - startTime) / float64(numFrames+1) + var hashes []uint64 + + for i := 0; i < numFrames; i++ { + ts := startTime + float64(i+1)*interval + frameFile := fmt.Sprintf("/tmp/morphic_frame_%d_%d.jpg", os.Getpid(), i) + + cmd := exec.Command("ffmpeg", "-y", + "-ss", fmt.Sprintf("%.3f", ts), + "-i", path, + "-vframes", "1", + "-q:v", "2", + frameFile, + ) + cmd.Stdout = nil + cmd.Stderr = nil + + if err := cmd.Run(); err != nil { + continue + } + + img, err := imaging.Open(frameFile) + os.Remove(frameFile) + if err != nil { + continue + } + + ph, err := goimagehash.PerceptionHash(img) + if err != nil { + continue + } + hashes = append(hashes, ph.GetHash()) + } + + return hashes +} + +func parseFPS(s string) float64 { + s = strings.TrimSpace(s) + if strings.Contains(s, "/") { + parts := strings.Split(s, "/") + if len(parts) == 2 { + num, err1 := strconv.ParseFloat(parts[0], 64) + den, err2 := strconv.ParseFloat(parts[1], 64) + if err1 == nil && err2 == nil && den != 0 { + return num / den + } + } + } + f, _ := strconv.ParseFloat(s, 64) + return f +} + +// ProcessVideos hashes all videos concurrently and returns successful results. +// It stops accepting new work when ctx is cancelled. +func ProcessVideos(ctx context.Context, files []shared.FileInfo, numFrames, numWorkers int) map[string]*VideoInfo { + result := make(map[string]*VideoInfo) + var mu sync.Mutex + var wg sync.WaitGroup + sem := make(chan struct{}, numWorkers) + + for _, f := range files { + select { + case <-ctx.Done(): + wg.Wait() + return result + default: + } + wg.Add(1) + sem <- struct{}{} + go func(fi shared.FileInfo) { + defer wg.Done() + defer func() { <-sem }() + info := ComputeVideoHashes(fi.Path, numFrames) + if info.HasHash { + mu.Lock() + result[fi.Path] = &info + mu.Unlock() + } + }(f) + } + wg.Wait() + return result +} + +// ComputeVideoSimilarity computes similarity between two videos using +// frame-level hash comparison. +func ComputeVideoSimilarity(a, b *VideoInfo) float64 { + if len(a.FrameHashes) == 0 || len(b.FrameHashes) == 0 { + return 0 + } + + var total float64 + for _, h1 := range a.FrameHashes { + bestSim := 0.0 + for _, h2 := range b.FrameHashes { + dist := bits.OnesCount64(h1 ^ h2) + sim := 1.0 - float64(dist)/64.0 + if sim > bestSim { + bestSim = sim + } + } + total += bestSim + } + + return total / float64(len(a.FrameHashes)) +} + +// FindVideoDuplicates finds groups of duplicate videos. +func FindVideoDuplicates(infos map[string]*VideoInfo, threshold float64) [][]DuplicateEntry { + paths := make([]string, 0, len(infos)) + for p := range infos { + paths = append(paths, p) + } + sort.Strings(paths) + + assigned := make(map[string]bool) + var groups [][]DuplicateEntry + + for i := 0; i < len(paths); i++ { + if assigned[paths[i]] { + continue + } + group := []DuplicateEntry{{Path: paths[i], Similarity: 1.0}} + + for j := i + 1; j < len(paths); j++ { + if assigned[paths[j]] { + continue + } + sim := ComputeVideoSimilarity(infos[paths[i]], infos[paths[j]]) + if sim >= threshold { + group = append(group, DuplicateEntry{Path: paths[j], Similarity: sim}) + assigned[paths[j]] = true + } + } + + if len(group) > 1 { + assigned[paths[i]] = true + groups = append(groups, group) + } + } + + return groups +} diff --git a/internal/organizer/date_sorter.go b/internal/organizer/date_sorter.go new file mode 100644 index 0000000..bbd4e5a --- /dev/null +++ b/internal/organizer/date_sorter.go @@ -0,0 +1,156 @@ +package organizer + +import ( + "fmt" + "io" + "os" + "path/filepath" + "strings" + "time" + + exif "github.com/dsoprea/go-exif/v3" +) + +// SortPlanEntry is a single planned file move/copy. +type SortPlanEntry struct { + Source string `json:"source"` + Destination string `json:"destination"` + Status string `json:"status"` + Error string `json:"error,omitempty"` +} + +// GetFileDate extracts the date from EXIF data or falls back to modification time. +func GetFileDate(path string) time.Time { + if t, err := getExifDate(path); err == nil { + return t + } + + info, err := os.Stat(path) + if err != nil { + return time.Now() + } + return info.ModTime() +} + +func getExifDate(path string) (time.Time, error) { + rawExif, err := exif.SearchFileAndExtractExif(path) + if err != nil { + return time.Time{}, err + } + + entries, _, err := exif.GetFlatExifData(rawExif, nil) + if err != nil { + return time.Time{}, err + } + + // Look for DateTimeOriginal (0x9003) or DateTime (0x0132) + for _, entry := range entries { + if entry.TagId == 0x9003 || entry.TagId == 0x0132 { + if s, ok := entry.Value.(string); ok { + s = strings.TrimRight(strings.TrimSpace(s), "\x00") + if s != "" && s != "0000:00:00 00:00:00" { + t, err := time.Parse("2006:01:02 15:04:05", s) + if err == nil { + return t, nil + } + } + } + } + } + + return time.Time{}, fmt.Errorf("no date tag found") +} + +// RenderTemplate replaces date tokens in a template string. +func RenderTemplate(tmpl string, date time.Time) string { + r := strings.NewReplacer( + "{year}", fmt.Sprintf("%04d", date.Year()), + "{month}", fmt.Sprintf("%02d", date.Month()), + "{day}", fmt.Sprintf("%02d", date.Day()), + "{hour}", fmt.Sprintf("%02d", date.Hour()), + "{minute}", fmt.Sprintf("%02d", date.Minute()), + ) + return r.Replace(tmpl) +} + +// PlanSort creates a plan for sorting files into date-based folders. +func PlanSort(files []string, template string, destination string) []SortPlanEntry { + var plan []SortPlanEntry + + for _, path := range files { + date := GetFileDate(path) + subDir := RenderTemplate(template, date) + destDir := filepath.Join(destination, subDir) + destPath := filepath.Join(destDir, filepath.Base(path)) + + plan = append(plan, SortPlanEntry{ + Source: path, + Destination: destPath, + Status: "pending", + }) + } + + return plan +} + +// ExecuteSort executes the sort plan using the given operation (move or copy). +func ExecuteSort(plan []SortPlanEntry, operation string) { + for i := range plan { + destDir := filepath.Dir(plan[i].Destination) + if err := os.MkdirAll(destDir, 0o755); err != nil { + plan[i].Status = "error" + plan[i].Error = err.Error() + continue + } + + var err error + switch operation { + case "move": + err = os.Rename(plan[i].Source, plan[i].Destination) + if err != nil { + // Cross-device move: copy + remove + err = copyFile(plan[i].Source, plan[i].Destination) + if err == nil { + os.Remove(plan[i].Source) + } + } + case "copy": + err = copyFile(plan[i].Source, plan[i].Destination) + default: + err = fmt.Errorf("unknown operation: %s", operation) + } + + if err != nil { + plan[i].Status = "error" + plan[i].Error = err.Error() + } else { + plan[i].Status = "done" + } + } +} + +func copyFile(src, dst string) error { + in, err := os.Open(src) + if err != nil { + return err + } + defer in.Close() + + out, err := os.Create(dst) + if err != nil { + return err + } + + if _, err := io.Copy(out, in); err != nil { + out.Close() + return err + } + + // Preserve modification time + info, err := os.Stat(src) + if err == nil { + os.Chtimes(dst, info.ModTime(), info.ModTime()) + } + + return out.Close() +} diff --git a/internal/organizer/organizer_test.go b/internal/organizer/organizer_test.go new file mode 100644 index 0000000..5908250 --- /dev/null +++ b/internal/organizer/organizer_test.go @@ -0,0 +1,245 @@ +package organizer_test + +import ( + "fmt" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/exterex/morphic/internal/organizer" +) + +// assetsDir returns the abs path of assets/test from internal/organizer/. +func assetsDir(t *testing.T) string { + t.Helper() + dir, err := filepath.Abs(filepath.Join("..", "..", "assets", "test")) + if err != nil { + t.Fatalf("cannot resolve assets dir: %v", err) + } + return dir +} + +// tempCopy copies src into a new temp dir and returns the copy's path. +func tempCopy(t *testing.T, src string) string { + t.Helper() + data, err := os.ReadFile(src) + if err != nil { + t.Fatalf("tempCopy read %s: %v", src, err) + } + dir := t.TempDir() + dst := filepath.Join(dir, filepath.Base(src)) + if err := os.WriteFile(dst, data, 0o644); err != nil { + t.Fatalf("tempCopy write %s: %v", dst, err) + } + return dst +} + +// ── RenderName ────────────────────────────────────────────────────────────── + +func TestRenderName_dateToken(t *testing.T) { + tmp := t.TempDir() + f := filepath.Join(tmp, "photo.jpg") + os.WriteFile(f, []byte{0xFF, 0xD8, 0xFF}, 0o644) + + name := organizer.RenderName("{date}_{original}", f, 1) + if !strings.Contains(name, "_photo") { + t.Errorf("expected {original} → 'photo', got %q", name) + } + // {date} should be 8 digits: YYYYMMDD + parts := strings.SplitN(name, "_", 2) + if len(parts[0]) != 8 { + t.Errorf("expected 8-digit date, got %q", parts[0]) + } +} + +func TestRenderName_seqPadded(t *testing.T) { + tmp := t.TempDir() + f := filepath.Join(tmp, "img.jpg") + os.WriteFile(f, []byte{}, 0o644) + + name := organizer.RenderName("{seq:4}", f, 7) + if !strings.HasPrefix(name, "0007") { + t.Errorf("expected zero-padded seq '0007', got %q", name) + } +} + +func TestRenderName_seqPlain(t *testing.T) { + tmp := t.TempDir() + f := filepath.Join(tmp, "img.jpg") + os.WriteFile(f, []byte{}, 0o644) + + name := organizer.RenderName("{seq}", f, 42) + if !strings.HasPrefix(name, "42") { + t.Errorf("expected seq '42', got %q", name) + } +} + +func TestRenderName_extToken(t *testing.T) { + tmp := t.TempDir() + f := filepath.Join(tmp, "clip.mp4") + os.WriteFile(f, []byte{}, 0o644) + + name := organizer.RenderName("video_{seq}.{ext}", f, 1) + if !strings.Contains(name, ".mp4") { + t.Errorf("expected '.mp4' in output, got %q", name) + } +} + +func TestRenderName_datetimeToken(t *testing.T) { + tmp := t.TempDir() + f := filepath.Join(tmp, "img.jpg") + os.WriteFile(f, []byte{}, 0o644) + + name := organizer.RenderName("{datetime}", f, 1) + // datetime token → YYYYMMDD_HHMMSS.jpg (15 chars before ext) + base := strings.TrimSuffix(name, ".jpg") + if len(base) != 15 { + t.Errorf("expected 15-char datetime (YYYYMMDD_HHMMSS), got %q (%d chars)", base, len(base)) + } +} + +// ── RenderTemplate ────────────────────────────────────────────────────────── + +func TestRenderTemplate_basic(t *testing.T) { + date := time.Date(2024, 6, 15, 10, 30, 0, 0, time.UTC) + got := organizer.RenderTemplate("{year}/{month}/{day}", date) + if got != "2024/06/15" { + t.Errorf("RenderTemplate = %q, want %q", got, "2024/06/15") + } +} + +func TestRenderTemplate_zeropadded(t *testing.T) { + date := time.Date(2024, 1, 5, 9, 3, 0, 0, time.UTC) + got := organizer.RenderTemplate("{year}-{month}-{day}", date) + if got != "2024-01-05" { + t.Errorf("RenderTemplate = %q, want %q", got, "2024-01-05") + } +} + +// ── PlanSort ──────────────────────────────────────────────────────────────── + +func TestPlanSort_producesPendingEntries(t *testing.T) { + tmp := t.TempDir() + files := make([]string, 3) + for i := range files { + f := filepath.Join(tmp, fmt.Sprintf("img%d.jpg", i)) + os.WriteFile(f, []byte{0xFF, 0xD8, 0xFF}, 0o644) + files[i] = f + } + + dest := filepath.Join(tmp, "sorted") + plan := organizer.PlanSort(files, "{year}/{month}/{day}", dest) + + if len(plan) != 3 { + t.Fatalf("expected 3 plan entries, got %d", len(plan)) + } + for _, e := range plan { + if e.Status != "pending" { + t.Errorf("expected 'pending', got %q for %s", e.Status, e.Source) + } + if !strings.HasPrefix(e.Destination, dest) { + t.Errorf("destination %q does not start with %q", e.Destination, dest) + } + } +} + +// ── ExecuteSort (copy) ────────────────────────────────────────────────────── + +func TestExecuteSort_copy(t *testing.T) { + tmp := t.TempDir() + src := filepath.Join(tmp, "photo.jpg") + os.WriteFile(src, []byte{0xFF, 0xD8, 0xFF}, 0o644) + + dest := filepath.Join(tmp, "out") + plan := organizer.PlanSort([]string{src}, "{year}/{month}", dest) + organizer.ExecuteSort(plan, "copy") + + for _, e := range plan { + if e.Status == "error" { + t.Errorf("ExecuteSort copy error for %s: %s", e.Source, e.Error) + } + if _, err := os.Stat(e.Destination); err != nil { + t.Errorf("expected destination file to exist: %v", err) + } + // Original must still exist (copy, not move) + if _, err := os.Stat(src); err != nil { + t.Error("original file was removed after copy") + } + } +} + +func TestExecuteSort_move(t *testing.T) { + src := tempCopy(t, filepath.Join(assetsDir(t), "sample1.jpg")) + if _, err := os.Stat(filepath.Join(assetsDir(t), "sample1.jpg")); os.IsNotExist(err) { + // Create a minimal JPEG if assets not present + src2 := filepath.Join(filepath.Dir(src), "dummy.jpg") + os.WriteFile(src2, []byte{0xFF, 0xD8, 0xFF}, 0o644) + src = src2 + } + + dest := filepath.Join(filepath.Dir(src), "moved") + plan := organizer.PlanSort([]string{src}, "{year}/{month}", dest) + organizer.ExecuteSort(plan, "move") + + for _, e := range plan { + if e.Status == "error" { + t.Errorf("ExecuteSort move error: %s", e.Error) + continue + } + if _, err := os.Stat(e.Destination); err != nil { + t.Errorf("destination file missing after move: %v", err) + } + if _, err := os.Stat(e.Source); err == nil { + t.Error("source file still exists after move") + } + } +} + +// ── PlanRename ────────────────────────────────────────────────────────────── + +func TestPlanRename_noConflicts(t *testing.T) { + tmp := t.TempDir() + files := []string{} + for i := 0; i < 5; i++ { + f := filepath.Join(tmp, fmt.Sprintf("raw%d.jpg", i)) + os.WriteFile(f, []byte{}, 0o644) + files = append(files, f) + } + + plan := organizer.PlanRename(files, "photo_{seq:3}", "move", 1) + if len(plan) != 5 { + t.Fatalf("expected 5 plan entries, got %d", len(plan)) + } + for _, e := range plan { + if e.Status == "conflict" { + t.Errorf("unexpected conflict for %s → %s", e.Source, e.Destination) + } + } +} + +func TestPlanRename_conflictDetected(t *testing.T) { + tmp := t.TempDir() + // Two files that will produce the same renamed output: same template, same seq + f1 := filepath.Join(tmp, "a.jpg") + f2 := filepath.Join(tmp, "b.jpg") + os.WriteFile(f1, []byte{}, 0o644) + os.WriteFile(f2, []byte{}, 0o644) + // Force same timestamp on both + now := time.Now() + os.Chtimes(f1, now, now) + os.Chtimes(f2, now, now) + + // Use a static name with no seq: both will map to the same destination + plan := organizer.PlanRename([]string{f1, f2}, "samename", "move", 1) + conflicts := 0 + for _, e := range plan { + if e.Status == "conflict" { + conflicts++ + } + } + if conflicts < 2 { + t.Errorf("expected ≥2 conflict entries, got %d", conflicts) + } +} diff --git a/internal/organizer/renamer.go b/internal/organizer/renamer.go new file mode 100644 index 0000000..304ace7 --- /dev/null +++ b/internal/organizer/renamer.go @@ -0,0 +1,126 @@ +package organizer + +import ( + "fmt" + "os" + "path/filepath" + "regexp" + "sort" + "strings" + "time" +) + +// RenamePlanEntry is a single planned rename. +type RenamePlanEntry struct { + Source string `json:"source"` + Destination string `json:"destination"` + Status string `json:"status"` + Error string `json:"error,omitempty"` +} + +var seqPaddedRegex = regexp.MustCompile(`\{seq:(\d+)\}`) + +// RenderName applies the rename template to a file. +func RenderName(tmpl string, path string, seq int) string { + date := GetFileDate(path) + ext := filepath.Ext(path) + original := strings.TrimSuffix(filepath.Base(path), ext) + + result := tmpl + result = strings.ReplaceAll(result, "{date}", date.Format("20060102")) + result = strings.ReplaceAll(result, "{datetime}", date.Format("20060102_150405")) + result = strings.ReplaceAll(result, "{original}", original) + result = strings.ReplaceAll(result, "{ext}", strings.TrimPrefix(ext, ".")) + + // Handle {seq:N} with zero-padded sequence + result = seqPaddedRegex.ReplaceAllStringFunc(result, func(match string) string { + sub := seqPaddedRegex.FindStringSubmatch(match) + if len(sub) < 2 { + return match + } + width := 0 + fmt.Sscanf(sub[1], "%d", &width) + return fmt.Sprintf("%0*d", width, seq) + }) + + // Handle plain {seq} + result = strings.ReplaceAll(result, "{seq}", fmt.Sprintf("%d", seq)) + + return result + ext +} + +type fileWithDate struct { + Path string + Date time.Time +} + +// PlanRename creates a rename plan for the given files. +func PlanRename(files []string, tmpl string, operation string, startSeq int) []RenamePlanEntry { + // Sort by (date, path) for consistent sequencing + fwd := make([]fileWithDate, len(files)) + for i, f := range files { + fwd[i] = fileWithDate{Path: f, Date: GetFileDate(f)} + } + sort.Slice(fwd, func(i, j int) bool { + if fwd[i].Date.Equal(fwd[j].Date) { + return fwd[i].Path < fwd[j].Path + } + return fwd[i].Date.Before(fwd[j].Date) + }) + + plan := make([]RenamePlanEntry, len(fwd)) + destSet := make(map[string]int) // destination -> first index + + for i, f := range fwd { + newName := RenderName(tmpl, f.Path, startSeq+i) + destPath := filepath.Join(filepath.Dir(f.Path), newName) + + entry := RenamePlanEntry{ + Source: f.Path, + Destination: destPath, + Status: "pending", + } + + // Check for conflicts + if prevIdx, exists := destSet[destPath]; exists { + entry.Status = "conflict" + entry.Error = fmt.Sprintf("conflicts with file #%d", prevIdx+1) + plan[prevIdx].Status = "conflict" + if plan[prevIdx].Error == "" { + plan[prevIdx].Error = fmt.Sprintf("conflicts with file #%d", i+1) + } + } else { + destSet[destPath] = i + } + + plan[i] = entry + } + + return plan +} + +// ExecuteRename executes the rename plan. +func ExecuteRename(plan []RenamePlanEntry, operation string) { + for i := range plan { + if plan[i].Status == "conflict" { + continue + } + + var err error + switch operation { + case "move": + err = os.Rename(plan[i].Source, plan[i].Destination) + case "copy": + err = copyFile(plan[i].Source, plan[i].Destination) + default: + err = fmt.Errorf("unknown operation: %s", operation) + } + + if err != nil { + plan[i].Status = "error" + plan[i].Error = err.Error() + } else { + plan[i].Status = "done" + } + } +} diff --git a/internal/organizer/scanner.go b/internal/organizer/scanner.go new file mode 100644 index 0000000..0fdab57 --- /dev/null +++ b/internal/organizer/scanner.go @@ -0,0 +1,267 @@ +package organizer + +import ( + "sync" + "time" + + "github.com/exterex/morphic/internal/shared" +) + +// ScanJob represents an organizer scan/plan/execute job. +type ScanJob struct { + shared.Job + mu sync.Mutex + + Folder string `json:"folder"` + Phase string `json:"phase"` + Template string `json:"template"` + Destination string `json:"destination"` + Operation string `json:"operation"` + Mode string `json:"mode"` + StartSeq int `json:"start_seq"` + Files []string `json:"files,omitempty"` + SortPlan []SortPlanEntry `json:"sort_plan,omitempty"` + RenamePlan []RenamePlanEntry `json:"rename_plan,omitempty"` + Total int `json:"total"` + Processed int `json:"processed"` +} + +var store = shared.NewJobStore[ScanJob]() + +func init() { + store.StartCleanup(30*time.Minute, func(j *ScanJob) time.Time { + return j.DoneAt + }) +} + +// StartPlanJob starts a new planning job. +func StartPlanJob(folder, mode, template, destination, operation string, startSeq int) string { + job := &ScanJob{ + Job: shared.NewJob(), + Folder: folder, + Phase: "scanning", + Mode: mode, + Template: template, + Destination: destination, + Operation: operation, + StartSeq: startSeq, + } + job.Status = shared.JobStatusRunning + + store.Set(job.ID, job) + + go runPlan(job) + + return job.ID +} + +// GetJob retrieves a job by ID. +func GetJob(id string) (*ScanJob, bool) { + return store.Get(id) +} + +// ExecuteJob starts the execution phase of a planned job. +func ExecuteJob(id string) bool { + job, ok := store.Get(id) + if !ok || job.Phase != "planned" { + return false + } + + job.mu.Lock() + job.Phase = "executing" + job.Progress = 0 + job.Processed = 0 + job.mu.Unlock() + + go runExecute(job) + return true +} + +func runPlan(job *ScanJob) { + files, err := shared.FindAllMediaFiles(job.Folder) + + if err != nil { + job.mu.Lock() + job.Status = shared.JobStatusFailed + job.Error = err.Error() + job.DoneAt = time.Now() + job.mu.Unlock() + return + } + + // Check for cancellation after file discovery + select { + case <-job.Ctx().Done(): + job.mu.Lock() + job.Status = shared.JobStatusCancelled + job.DoneAt = time.Now() + job.Message = "Scan was interrupted" + job.mu.Unlock() + return + default: + } + + paths := make([]string, len(files)) + for i, f := range files { + paths[i] = f.Path + } + + job.mu.Lock() + job.Files = paths + job.Total = len(paths) + job.Phase = "planning" + job.Progress = 0.3 + job.mu.Unlock() + + switch job.Mode { + case "sort": + dest := job.Destination + if dest == "" { + dest = job.Folder + } + plan := PlanSort(paths, job.Template, dest) + job.mu.Lock() + job.SortPlan = plan + job.mu.Unlock() + case "rename": + plan := PlanRename(paths, job.Template, job.Operation, job.StartSeq) + job.mu.Lock() + job.RenamePlan = plan + job.mu.Unlock() + } + + // Check for cancellation after planning + select { + case <-job.Ctx().Done(): + job.mu.Lock() + job.Status = shared.JobStatusCancelled + job.DoneAt = time.Now() + job.Message = "Scan was interrupted" + job.mu.Unlock() + return + default: + } + + job.mu.Lock() + job.Phase = "planned" + job.Progress = 1.0 + job.Message = "Plan ready for review" + job.mu.Unlock() +} + +func runExecute(job *ScanJob) { + // Check for cancellation before starting execution + select { + case <-job.Ctx().Done(): + job.mu.Lock() + job.Status = shared.JobStatusCancelled + job.DoneAt = time.Now() + job.Message = "Execution was interrupted" + job.mu.Unlock() + return + default: + } + + switch job.Mode { + case "sort": + ExecuteSort(job.SortPlan, job.Operation) + job.mu.Lock() + for _, e := range job.SortPlan { + if e.Status == "done" { + job.Processed++ + } + } + job.mu.Unlock() + case "rename": + ExecuteRename(job.RenamePlan, job.Operation) + job.mu.Lock() + for _, e := range job.RenamePlan { + if e.Status == "done" { + job.Processed++ + } + } + job.mu.Unlock() + } + + job.mu.Lock() + job.Phase = "done" + job.Status = shared.JobStatusDone + job.Progress = 1.0 + job.DoneAt = time.Now() + job.mu.Unlock() +} + +// GetUnifiedPlan returns the plan entries in a unified format matching the +// Python API's response (each entry has "source", "destination", "conflict"). +func GetUnifiedPlan(job *ScanJob) []map[string]interface{} { + job.mu.Lock() + defer job.mu.Unlock() + + if job.Mode == "sort" { + plan := make([]map[string]interface{}, len(job.SortPlan)) + for i, e := range job.SortPlan { + entry := map[string]interface{}{ + "source": e.Source, + "destination": e.Destination, + } + if e.Status == "conflict" { + entry["conflict"] = true + } + plan[i] = entry + } + return plan + } + + plan := make([]map[string]interface{}, len(job.RenamePlan)) + for i, e := range job.RenamePlan { + entry := map[string]interface{}{ + "source": e.Source, + "destination": e.Destination, + } + if e.Status == "conflict" { + entry["conflict"] = true + } + plan[i] = entry + } + return plan +} + +// GetExecutionResult returns execution stats matching the Python API format. +func GetExecutionResult(job *ScanJob) map[string]interface{} { + job.mu.Lock() + defer job.mu.Unlock() + + completed := 0 + errors := 0 + skipped := 0 + + if job.Mode == "sort" { + for _, e := range job.SortPlan { + switch e.Status { + case "done": + completed++ + case "error": + errors++ + case "conflict", "skipped": + skipped++ + } + } + } else { + for _, e := range job.RenamePlan { + switch e.Status { + case "done": + completed++ + case "error": + errors++ + case "conflict", "skipped": + skipped++ + } + } + } + + return map[string]interface{}{ + "completed": completed, + "errors": errors, + "skipped": skipped, + } +} diff --git a/internal/shared/constants.go b/internal/shared/constants.go new file mode 100644 index 0000000..2f9f9ca --- /dev/null +++ b/internal/shared/constants.go @@ -0,0 +1,107 @@ +package shared + +const Version = "0.1.0" + +const ( + DefaultImageThreshold = 0.90 + DefaultVideoThreshold = 0.85 + DefaultHashSize = 16 + DefaultNumFrames = 10 + DefaultNumWorkers = 4 + DefaultBatchSize = 1000 +) + +// ImageExtensions contains all supported image file extensions (lowercase, with dot prefix). +var ImageExtensions = map[string]struct{}{ + ".jpg": {}, + ".jpeg": {}, + ".png": {}, + ".tif": {}, + ".tiff": {}, + ".bmp": {}, + ".webp": {}, + ".gif": {}, + ".ico": {}, + ".heic": {}, + ".heif": {}, + ".avif": {}, + ".svg": {}, + ".raw": {}, + ".cr2": {}, + ".nef": {}, + ".arw": {}, + ".dng": {}, + ".orf": {}, + ".rw2": {}, + ".pef": {}, + ".srw": {}, +} + +// VideoExtensions contains all supported video file extensions. +var VideoExtensions = map[string]struct{}{ + ".mp4": {}, + ".mov": {}, + ".avi": {}, + ".mkv": {}, + ".webm": {}, + ".flv": {}, + ".wmv": {}, + ".m4v": {}, + ".mpeg": {}, + ".mpg": {}, + ".3gp": {}, + ".ts": {}, + ".ogv": {}, + ".mts": {}, + ".m2ts": {}, + ".vob": {}, + ".divx": {}, + ".xvid": {}, + ".asf": {}, + ".rm": {}, + ".rmvb": {}, +} + +// ExcludedFolders contains folder names to skip during scanning. +var ExcludedFolders = map[string]struct{}{ + // Windows + "$recycle.bin": {}, + "$recycle": {}, + "recycler": {}, + "recycled": {}, + "system volume information": {}, + "windows": {}, + "appdata": {}, + // macOS + ".trash": {}, + ".trashes": {}, + ".spotlight-v100": {}, + ".fseventsd": {}, + ".ds_store": {}, + // Linux + "lost+found": {}, + "trash": {}, + // Thumbnails + ".thumbnails": {}, + ".thumb": {}, + "thumbs": {}, + // NAS + "@eadir": {}, + // Version control + ".git": {}, + ".svn": {}, + ".hg": {}, + // Development + "__pycache__": {}, + ".cache": {}, + "node_modules": {}, + ".venv": {}, + "venv": {}, +} + +// Aliases maps alternative extensions to their canonical form. +var Aliases = map[string]string{ + ".jpeg": ".jpg", + ".tiff": ".tif", + ".mpg": ".mpeg", +} diff --git a/internal/shared/file_browser.go b/internal/shared/file_browser.go new file mode 100644 index 0000000..a97c061 --- /dev/null +++ b/internal/shared/file_browser.go @@ -0,0 +1,72 @@ +package shared + +import ( + "os" + "os/exec" + "runtime" + "strings" +) + +// OpenNativeFolderDialog opens a native folder selection dialog. +// Returns the selected folder path, whether a dialog tool is available, and any error. +func OpenNativeFolderDialog() (string, bool, error) { + // Test mode: allow test to set folder via environment + if testFolder := os.Getenv("MORPHIC_TEST_FOLDER"); testFolder != "" { + return testFolder, true, nil + } + + switch runtime.GOOS { + case "linux": + return linuxFolderDialog() + case "darwin": + return macFolderDialog() + case "windows": + return windowsFolderDialog() + default: + return "", false, nil + } +} + +func linuxFolderDialog() (string, bool, error) { + // Try zenity first + if path, err := exec.LookPath("zenity"); err == nil && path != "" { + cmd := exec.Command("zenity", "--file-selection", "--directory", "--title=Select Folder") + out, err := cmd.Output() + if err == nil { + return strings.TrimSpace(string(out)), true, nil + } + return "", true, nil // tool available but user cancelled + } + + // Try kdialog + if path, err := exec.LookPath("kdialog"); err == nil && path != "" { + cmd := exec.Command("kdialog", "--getexistingdirectory", ".") + out, err := cmd.Output() + if err == nil { + return strings.TrimSpace(string(out)), true, nil + } + return "", true, nil // tool available but user cancelled + } + + // No dialog tool found + return "", false, nil +} + +func macFolderDialog() (string, bool, error) { + cmd := exec.Command("osascript", "-e", `POSIX path of (choose folder with prompt "Select Folder")`) + out, err := cmd.Output() + if err != nil { + return "", true, nil + } + return strings.TrimSpace(string(out)), true, nil +} + +func windowsFolderDialog() (string, bool, error) { + script := `Add-Type -AssemblyName System.Windows.Forms; $f = New-Object System.Windows.Forms.FolderBrowserDialog; if ($f.ShowDialog() -eq 'OK') { $f.SelectedPath }` + cmd := exec.Command("powershell", "-NoProfile", "-Command", script) + out, err := cmd.Output() + if err != nil { + return "", true, nil + } + return strings.TrimSpace(string(out)), true, nil +} diff --git a/internal/shared/jobs.go b/internal/shared/jobs.go new file mode 100644 index 0000000..9d55b7c --- /dev/null +++ b/internal/shared/jobs.go @@ -0,0 +1,117 @@ +package shared + +import ( + "context" + "sync" + "time" + + "github.com/google/uuid" +) + +// JobStatus represents the state of a background job. +type JobStatus string + +const ( + JobStatusPending JobStatus = "pending" + JobStatusRunning JobStatus = "running" + JobStatusDone JobStatus = "done" + JobStatusFailed JobStatus = "failed" + JobStatusPlanned JobStatus = "planned" + JobStatusCancelled JobStatus = "cancelled" +) + +// Job is a base type embedded in all module-specific jobs. +type Job struct { + ID string `json:"id"` + Status JobStatus `json:"status"` + Progress float64 `json:"progress"` + Message string `json:"message,omitempty"` + Error string `json:"error,omitempty"` + StartedAt time.Time `json:"started_at"` + DoneAt time.Time `json:"done_at,omitempty"` + + ctx context.Context `json:"-"` + cancel context.CancelFunc `json:"-"` +} + +// NewJob creates a new job with a unique ID. +func NewJob() Job { + ctx, cancel := context.WithCancel(context.Background()) + return Job{ + ID: uuid.New().String(), + Status: JobStatusPending, + Progress: 0, + StartedAt: time.Now(), + ctx: ctx, + cancel: cancel, + } +} + +// Ctx returns the job's context. It is cancelled when Cancel is called. +func (j *Job) Ctx() context.Context { + if j.ctx == nil { + return context.Background() + } + return j.ctx +} + +// Cancel signals the job to stop. It is safe to call multiple times. +func (j *Job) Cancel() { + if j.cancel != nil { + j.cancel() + } +} + +// JobStore is a thread-safe generic store for background jobs. +type JobStore[T any] struct { + mu sync.RWMutex + jobs map[string]*T +} + +// NewJobStore creates a new JobStore. +func NewJobStore[T any]() *JobStore[T] { + return &JobStore[T]{ + jobs: make(map[string]*T), + } +} + +// Set stores a job. +func (s *JobStore[T]) Set(id string, job *T) { + s.mu.Lock() + defer s.mu.Unlock() + s.jobs[id] = job +} + +// Get retrieves a job by ID. +func (s *JobStore[T]) Get(id string) (*T, bool) { + s.mu.RLock() + defer s.mu.RUnlock() + job, ok := s.jobs[id] + return job, ok +} + +// Delete removes a job by ID. +func (s *JobStore[T]) Delete(id string) { + s.mu.Lock() + defer s.mu.Unlock() + delete(s.jobs, id) +} + +// StartCleanup runs a background goroutine that removes jobs older than ttl. +func (s *JobStore[T]) StartCleanup(ttl time.Duration, getDoneAt func(*T) time.Time) { + go func() { + ticker := time.NewTicker(ttl / 2) + defer ticker.Stop() + for range ticker.C { + now := time.Now() + s.mu.Lock() + for id, job := range s.jobs { + doneAt := getDoneAt(job) + if !doneAt.IsZero() && now.Sub(doneAt) > ttl { + delete(s.jobs, id) + } + } + s.mu.Unlock() + } + }() +} diff --git a/internal/shared/shared_test.go b/internal/shared/shared_test.go new file mode 100644 index 0000000..0e6d78b --- /dev/null +++ b/internal/shared/shared_test.go @@ -0,0 +1,334 @@ +package shared_test + +import ( + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/exterex/morphic/internal/shared" +) + +// assetsDir returns assets/test relative to internal/shared/. +func assetsDir(t *testing.T) string { + t.Helper() + dir, err := filepath.Abs(filepath.Join("..", "..", "assets", "test")) + if err != nil { + t.Fatalf("cannot resolve assets dir: %v", err) + } + return dir +} + +// ── NormaliseExt ──────────────────────────────────────────────────────────── + +func TestNormaliseExt_jpeg(t *testing.T) { + if got := shared.NormaliseExt(".jpeg"); got != ".jpg" { + t.Errorf("NormaliseExt(.jpeg) = %q, want %q", got, ".jpg") + } +} + +func TestNormaliseExt_tiff(t *testing.T) { + if got := shared.NormaliseExt(".tiff"); got != ".tif" { + t.Errorf("NormaliseExt(.tiff) = %q, want %q", got, ".tif") + } +} + +func TestNormaliseExt_uppercase(t *testing.T) { + if got := shared.NormaliseExt(".JPG"); got != ".jpg" { + t.Errorf("NormaliseExt(.JPG) = %q, want %q", got, ".jpg") + } +} + +func TestNormaliseExt_unknownPassthrough(t *testing.T) { + if got := shared.NormaliseExt(".xyz"); got != ".xyz" { + t.Errorf("NormaliseExt(.xyz) = %q, want %q", got, ".xyz") + } +} + +// ── IsImage / IsVideo ─────────────────────────────────────────────────────── + +func TestIsImage(t *testing.T) { + cases := []struct { + path string + want bool + }{ + {"photo.jpg", true}, + {"photo.JPEG", true}, + {"clip.mp4", false}, + {"document.pdf", false}, + {"image.png", true}, + {"image.webp", true}, + {"video.mkv", false}, + } + for _, tc := range cases { + t.Run(tc.path, func(t *testing.T) { + if got := shared.IsImage(tc.path); got != tc.want { + t.Errorf("IsImage(%q) = %v, want %v", tc.path, got, tc.want) + } + }) + } +} + +func TestIsVideo(t *testing.T) { + cases := []struct { + path string + want bool + }{ + {"clip.mp4", true}, + {"clip.MOV", true}, + {"photo.jpg", false}, + {"clip.mkv", true}, + {"clip.avi", true}, + {"photo.png", false}, + } + for _, tc := range cases { + t.Run(tc.path, func(t *testing.T) { + if got := shared.IsVideo(tc.path); got != tc.want { + t.Errorf("IsVideo(%q) = %v, want %v", tc.path, got, tc.want) + } + }) + } +} + +// ── FormatFileSize ────────────────────────────────────────────────────────── + +func TestFormatFileSize(t *testing.T) { + cases := []struct { + bytes int64 + want string + }{ + {0, "0.00 B"}, + {512, "512.00 B"}, + {1024, "1.00 KB"}, + {1536, "1.50 KB"}, + {1024 * 1024, "1.00 MB"}, + {1024 * 1024 * 1024, "1.00 GB"}, + } + for _, tc := range cases { + t.Run(fmt.Sprintf("%d", tc.bytes), func(t *testing.T) { + got := shared.FormatFileSize(tc.bytes) + if got != tc.want { + t.Errorf("FormatFileSize(%d) = %q, want %q", tc.bytes, got, tc.want) + } + }) + } +} + +// ── FormatDuration ────────────────────────────────────────────────────────── + +func TestFormatDuration(t *testing.T) { + cases := []struct { + secs float64 + want string + }{ + {0, "0s"}, + {45, "45s"}, + {90, "1m 30s"}, + {3661, "1h 1m 1s"}, + } + for _, tc := range cases { + t.Run(tc.want, func(t *testing.T) { + got := shared.FormatDuration(tc.secs) + if got != tc.want { + t.Errorf("FormatDuration(%v) = %q, want %q", tc.secs, got, tc.want) + } + }) + } +} + +// ── FindFilesByExtension ──────────────────────────────────────────────────── + +func TestFindFilesByExtension_findsImages(t *testing.T) { + dir := assetsDir(t) + exts := map[string]struct{}{".jpg": {}, ".png": {}, ".tif": {}} + files, err := shared.FindFilesByExtension(dir, exts, shared.ExcludedFolders) + if err != nil { + t.Fatalf("FindFilesByExtension error: %v", err) + } + if len(files) == 0 { + t.Error("expected at least one image file in assets/test") + } + for _, f := range files { + if _, ok := exts[f.Ext]; !ok { + t.Errorf("unexpected extension %q for file %s", f.Ext, f.Path) + } + } +} + +func TestFindFilesByExtension_emptyDir(t *testing.T) { + dir := t.TempDir() + files, err := shared.FindFilesByExtension(dir, map[string]struct{}{".jpg": {}}, shared.ExcludedFolders) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(files) != 0 { + t.Errorf("expected 0 files in empty dir, got %d", len(files)) + } +} + +func TestFindFilesByExtension_noDuplicates(t *testing.T) { + dir := assetsDir(t) + files, err := shared.FindFilesByExtension(dir, shared.ImageExtensions, shared.ExcludedFolders) + if err != nil { + t.Fatalf("error: %v", err) + } + seen := make(map[string]int) + for _, f := range files { + seen[f.Path]++ + } + for path, count := range seen { + if count > 1 { + t.Errorf("file %s appeared %d times (expected 1)", path, count) + } + } +} + +func TestFindImageFiles_basic(t *testing.T) { + files, err := shared.FindImageFiles(assetsDir(t)) + if err != nil { + t.Fatalf("FindImageFiles error: %v", err) + } + if len(files) == 0 { + t.Error("expected images in assets/test") + } +} + +func TestFindAllMediaFiles_includesBoth(t *testing.T) { + files, err := shared.FindAllMediaFiles(assetsDir(t)) + if err != nil { + t.Fatalf("FindAllMediaFiles error: %v", err) + } + hasImage, hasVideo := false, false + for _, f := range files { + if _, ok := shared.ImageExtensions[f.Ext]; ok { + hasImage = true + } + if _, ok := shared.VideoExtensions[f.Ext]; ok { + hasVideo = true + } + } + if !hasImage { + t.Error("expected at least one image") + } + if !hasVideo { + t.Error("expected at least one video") + } +} + +// ── IsExcludedPath ────────────────────────────────────────────────────────── + +func TestIsExcludedPath(t *testing.T) { + cases := []struct { + path string + want bool + }{ + {"/home/user/photos/img.jpg", false}, + {"/home/user/$Recycle.Bin/img.jpg", true}, + {"/home/user/.Trash/img.jpg", true}, + {"/home/user/node_modules/img.jpg", true}, // node_modules is excluded + {"/home/user/Pictures/img.jpg", false}, + } + for _, tc := range cases { + t.Run(tc.path, func(t *testing.T) { + got := shared.IsExcludedPath(tc.path, shared.ExcludedFolders) + if got != tc.want { + t.Errorf("IsExcludedPath(%q) = %v, want %v", tc.path, got, tc.want) + } + }) + } +} + +// ── GenerateImageThumbnail ────────────────────────────────────────────────── + +func TestGenerateImageThumbnail_jpg(t *testing.T) { + path := filepath.Join(assetsDir(t), "sample1.jpg") + if _, err := os.Stat(path); os.IsNotExist(err) { + t.Skip("sample1.jpg not present") + } + data, err := shared.GenerateImageThumbnail(path, 64) + if err != nil { + t.Fatalf("GenerateImageThumbnail error: %v", err) + } + if len(data) == 0 { + t.Error("expected non-empty thumbnail bytes") + } + // JPEG starts with FF D8 + if len(data) < 2 || data[0] != 0xFF || data[1] != 0xD8 { + t.Error("thumbnail is not a valid JPEG (missing FF D8 header)") + } +} + +func TestGenerateImageThumbnail_png(t *testing.T) { + path := filepath.Join(assetsDir(t), "sample2.png") + if _, err := os.Stat(path); os.IsNotExist(err) { + t.Skip("sample2.png not present") + } + data, err := shared.GenerateImageThumbnail(path, 64) + if err != nil { + t.Fatalf("GenerateImageThumbnail png error: %v", err) + } + if len(data) == 0 { + t.Error("expected non-empty thumbnail bytes") + } +} + +// ── JobStore ──────────────────────────────────────────────────────────────── + +func TestJobStore_setAndGet(t *testing.T) { + type item struct{ shared.Job } + store := shared.NewJobStore[item]() + + job := item{Job: shared.NewJob()} + store.Set(job.ID, &job) + + got, ok := store.Get(job.ID) + if !ok { + t.Fatal("Get returned false after Set") + } + if got.ID != job.ID { + t.Errorf("ID mismatch: got %q want %q", got.ID, job.ID) + } +} + +func TestJobStore_missingKey(t *testing.T) { + type item struct{ shared.Job } + store := shared.NewJobStore[item]() + + _, ok := store.Get("does-not-exist") + if ok { + t.Error("expected false for missing key") + } +} + +// ── Job context / cancel ──────────────────────────────────────────────────── + +func TestJob_cancelStopsContext(t *testing.T) { + job := shared.NewJob() + ctx := job.Ctx() + + select { + case <-ctx.Done(): + t.Fatal("context should not be done before Cancel()") + default: + } + + job.Cancel() + + select { + case <-ctx.Done(): + // expected + default: + t.Error("context should be done after Cancel()") + } +} + +func TestJob_doubleCancelNoPanic(t *testing.T) { + defer func() { + if r := recover(); r != nil { + t.Errorf("double Cancel() panicked: %v", r) + } + }() + job := shared.NewJob() + job.Cancel() + job.Cancel() // must not panic +} diff --git a/internal/shared/thumbnails.go b/internal/shared/thumbnails.go new file mode 100644 index 0000000..4bd2db7 --- /dev/null +++ b/internal/shared/thumbnails.go @@ -0,0 +1,233 @@ +package shared + +import ( + "bytes" + "fmt" + "image" + "image/jpeg" + "os/exec" + "path/filepath" + "strings" + "sync" + + "github.com/disintegration/imaging" +) + +const ( + DefaultThumbnailSize = 200 + DefaultThumbnailQuality = 80 +) + +// ThumbnailCache provides a thread-safe cache for generated thumbnails. +type ThumbnailCache struct { + mu sync.RWMutex + store map[string][]byte +} + +var thumbnailCache = &ThumbnailCache{ + store: make(map[string][]byte), +} + +// GenerateImageThumbnail creates a JPEG thumbnail for an image file. +func GenerateImageThumbnail(path string, size int) ([]byte, error) { + if size <= 0 { + size = DefaultThumbnailSize + } + + cacheKey := fmt.Sprintf("%s:%d", path, size) + thumbnailCache.mu.RLock() + if data, ok := thumbnailCache.store[cacheKey]; ok { + thumbnailCache.mu.RUnlock() + return data, nil + } + thumbnailCache.mu.RUnlock() + + ext := strings.ToLower(filepath.Ext(path)) + if alias, ok := Aliases[ext]; ok { + ext = alias + } + + if ext == ".avif" { + data, err := extractImageFrame(path, "00:00:00", size) + if err != nil { + return nil, fmt.Errorf("failed to generate AVIF thumbnail %s: %w", path, err) + } + + thumbnailCache.mu.Lock() + thumbnailCache.store[cacheKey] = data + thumbnailCache.mu.Unlock() + + return data, nil + } + + img, err := imaging.Open(path, imaging.AutoOrientation(true)) + if err != nil { + // Fallback to ffmpeg for formats that imaging can't decode. + ffData, ffErr := extractImageFrame(path, "00:00:00", size) + if ffErr == nil { + thumbnailCache.mu.Lock() + thumbnailCache.store[cacheKey] = ffData + thumbnailCache.mu.Unlock() + return ffData, nil + } + return nil, fmt.Errorf("failed to open image %s: %w (ffmpeg fallback: %v)", path, err, ffErr) + } + + thumb := imaging.Fit(img, size, size, imaging.Lanczos) + + var buf bytes.Buffer + if err := jpeg.Encode(&buf, thumb, &jpeg.Options{Quality: DefaultThumbnailQuality}); err != nil { + return nil, fmt.Errorf("failed to encode thumbnail: %w", err) + } + + data := buf.Bytes() + thumbnailCache.mu.Lock() + thumbnailCache.store[cacheKey] = data + thumbnailCache.mu.Unlock() + + return data, nil +} + +// GenerateVideoThumbnail creates a JPEG thumbnail for a video file using ffmpeg. +func GenerateVideoThumbnail(path string, size int) ([]byte, error) { + if size <= 0 { + size = DefaultThumbnailSize + } + + cacheKey := fmt.Sprintf("video:%s:%d", path, size) + thumbnailCache.mu.RLock() + if data, ok := thumbnailCache.store[cacheKey]; ok { + thumbnailCache.mu.RUnlock() + return data, nil + } + thumbnailCache.mu.RUnlock() + + // Try extracting frame at 1 second, fallback to 0 seconds + data, err := extractVideoFrame(path, "00:00:01", size) + if err != nil { + data, err = extractVideoFrame(path, "00:00:00", size) + if err != nil { + return nil, fmt.Errorf("failed to extract video frame from %s: %w", path, err) + } + } + + thumbnailCache.mu.Lock() + thumbnailCache.store[cacheKey] = data + thumbnailCache.mu.Unlock() + + return data, nil +} + +func extractVideoFrame(videoPath, seekTime string, size int) ([]byte, error) { + img, err := extractImageFromFFmpeg(videoPath, seekTime, size) + if err != nil { + return nil, err + } + + var buf bytes.Buffer + if err := jpeg.Encode(&buf, imaging.Fit(img, size, size, imaging.Lanczos), &jpeg.Options{Quality: DefaultThumbnailQuality}); err != nil { + return nil, err + } + + return buf.Bytes(), nil +} + +func extractImageFrame(imagePath, seekTime string, size int) ([]byte, error) { + img, err := extractImageFromFFmpeg(imagePath, seekTime, size) + if err != nil { + return nil, err + } + + var buf bytes.Buffer + if err := jpeg.Encode(&buf, imaging.Fit(img, size, size, imaging.Lanczos), &jpeg.Options{Quality: DefaultThumbnailQuality}); err != nil { + return nil, err + } + + return buf.Bytes(), nil +} + +func ffmpegCandidates() []string { + var bins []string + for _, name := range []string{"ffmpeg"} { + if _, err := exec.LookPath(name); err == nil { + bins = append(bins, name) + } + } + return bins +} + +// OpenImageFile opens an image from any format supported by imaging or ffmpeg. +// It uses imaging.Open for common formats and falls back to ffmpeg for formats +// that imaging cannot handle (e.g. AVIF). +func OpenImageFile(path string) (image.Image, error) { + img, err := imaging.Open(path, imaging.AutoOrientation(true)) + if err == nil { + return img, nil + } + // imaging failed — try ffmpeg (handles AVIF, HEIC, …) + return extractImageFromFFmpeg(path, "00:00:00", 0) +} + +func extractImageFromFFmpeg(srcPath, seekTime string, size int) (image.Image, error) { + bins := ffmpegCandidates() + if len(bins) == 0 { + return nil, fmt.Errorf("ffmpeg not found in PATH") + } + + var lastErr error + for _, bin := range bins { + for _, codec := range []string{"png", "mjpeg"} { + args := []string{"-ss", seekTime, "-i", srcPath, "-frames:v", "1"} + if size > 0 { + args = append(args, "-vf", fmt.Sprintf("scale=%d:%d:force_original_aspect_ratio=decrease", size, size)) + } + args = append(args, "-f", "image2pipe", "-vcodec", codec) + if codec == "mjpeg" { + args = append(args, "-q:v", "5") + } + args = append(args, "pipe:1") + + var stdout, stderr bytes.Buffer + cmd := exec.Command(bin, args...) + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + if err := cmd.Run(); err != nil { + lastErr = fmt.Errorf("%s/%s failed: %w (stderr: %s)", bin, codec, err, stderr.String()) + continue + } + if stdout.Len() == 0 { + lastErr = fmt.Errorf("%s/%s: no output produced", bin, codec) + continue + } + + img, _, err := image.Decode(bytes.NewReader(stdout.Bytes())) + if err != nil { + lastErr = fmt.Errorf("%s/%s: decode failed: %w", bin, codec, err) + continue + } + return img, nil + } + } + return nil, fmt.Errorf("all ffmpeg variants failed for %s: %w", srcPath, lastErr) +} + +// IsImageFile checks if a path has an image extension. +func IsImageFile(path string) bool { + ext := strings.ToLower(filepath.Ext(path)) + if alias, ok := Aliases[ext]; ok { + ext = alias + } + _, ok := ImageExtensions[ext] + return ok +} + +// IsVideoFile checks if a path has a video extension. +func IsVideoFile(path string) bool { + ext := strings.ToLower(filepath.Ext(path)) + if alias, ok := Aliases[ext]; ok { + ext = alias + } + _, ok := VideoExtensions[ext] + return ok +} diff --git a/internal/shared/utils.go b/internal/shared/utils.go new file mode 100644 index 0000000..7d129c0 --- /dev/null +++ b/internal/shared/utils.go @@ -0,0 +1,164 @@ +package shared + +import ( + "fmt" + "os" + "path/filepath" + "sort" + "strings" +) + +// FileInfo holds metadata about a discovered file. +type FileInfo struct { + Path string + Name string + Size int64 + Ext string +} + +// FindFilesByExtension walks the folder tree once and returns all files +// matching the given extensions. This replaces the Python version which +// called rglob() twice per extension (82+ traversals). +func FindFilesByExtension(folder string, extensions map[string]struct{}, excludedFolders map[string]struct{}) ([]FileInfo, error) { + var files []FileInfo + seen := make(map[string]struct{}) + + err := filepath.WalkDir(folder, func(path string, d os.DirEntry, err error) error { + if err != nil { + return nil // skip inaccessible entries + } + + if d.IsDir() { + name := strings.ToLower(d.Name()) + if _, excluded := excludedFolders[name]; excluded { + return filepath.SkipDir + } + return nil + } + + ext := strings.ToLower(filepath.Ext(path)) + if alias, ok := Aliases[ext]; ok { + ext = alias + } + + if _, ok := extensions[ext]; !ok { + return nil + } + + // Deduplicate by resolved path + abs, err := filepath.Abs(path) + if err != nil { + abs = path + } + if _, dup := seen[abs]; dup { + return nil + } + seen[abs] = struct{}{} + + info, err := d.Info() + if err != nil { + return nil + } + + files = append(files, FileInfo{ + Path: abs, + Name: d.Name(), + Size: info.Size(), + Ext: ext, + }) + + return nil + }) + + if err != nil { + return nil, err + } + + sort.Slice(files, func(i, j int) bool { + return files[i].Path < files[j].Path + }) + + return files, nil +} + +// IsExcludedPath checks if any component of the path is in the exclusion set. +func IsExcludedPath(path string, excludedFolders map[string]struct{}) bool { + parts := strings.Split(filepath.ToSlash(path), "/") + for _, part := range parts { + if _, excluded := excludedFolders[strings.ToLower(part)]; excluded { + return true + } + } + return false +} + +// FindImageFiles returns all image files in the given folder. +func FindImageFiles(folder string) ([]FileInfo, error) { + return FindFilesByExtension(folder, ImageExtensions, ExcludedFolders) +} + +// FindVideoFiles returns all video files in the given folder. +func FindVideoFiles(folder string) ([]FileInfo, error) { + return FindFilesByExtension(folder, VideoExtensions, ExcludedFolders) +} + +// FindAllMediaFiles returns all image and video files. +func FindAllMediaFiles(folder string) ([]FileInfo, error) { + allExts := make(map[string]struct{}) + for k, v := range ImageExtensions { + allExts[k] = v + } + for k, v := range VideoExtensions { + allExts[k] = v + } + return FindFilesByExtension(folder, allExts, ExcludedFolders) +} + +// FormatFileSize formats file size in human-readable format. +func FormatFileSize(sizeBytes int64) string { + size := float64(sizeBytes) + for _, unit := range []string{"B", "KB", "MB", "GB"} { + if size < 1024 { + return fmt.Sprintf("%.2f %s", size, unit) + } + size /= 1024 + } + return fmt.Sprintf("%.2f TB", size) +} + +// NormaliseExt lowercases and resolves aliases (.jpeg → .jpg). +func NormaliseExt(ext string) string { + ext = strings.ToLower(ext) + if alias, ok := Aliases[ext]; ok { + return alias + } + return ext +} + +// IsImage returns true if the file extension is a known image type. +func IsImage(path string) bool { + ext := NormaliseExt(strings.ToLower(filepath.Ext(path))) + _, ok := ImageExtensions[ext] + return ok +} + +// IsVideo returns true if the file extension is a known video type. +func IsVideo(path string) bool { + ext := NormaliseExt(strings.ToLower(filepath.Ext(path))) + _, ok := VideoExtensions[ext] + return ok +} + +// FormatDuration formats duration in human-readable format. +func FormatDuration(seconds float64) string { + hours := int(seconds) / 3600 + minutes := (int(seconds) % 3600) / 60 + secs := int(seconds) % 60 + if hours > 0 { + return fmt.Sprintf("%dh %dm %ds", hours, minutes, secs) + } + if minutes > 0 { + return fmt.Sprintf("%dm %ds", minutes, secs) + } + return fmt.Sprintf("%ds", secs) +} diff --git a/src/morphic/__init__.py b/src/morphic/__init__.py deleted file mode 100644 index 1e330e4..0000000 --- a/src/morphic/__init__.py +++ /dev/null @@ -1,23 +0,0 @@ -""" -morphic - Unified media toolkit: format conversion and duplicate detection. - -This package provides: - -- **morphic.converter** - Scan folders and batch-convert images/videos -- **morphic.dupfinder** - Find duplicate images/videos via perceptual hashing -- **morphic.frontend** - Shared Flask web UI with tabbed interface - -Quick start:: - - # Launch the web UI - morphic - - # With options - morphic --port 9000 --folder /path/to/media -""" - -from importlib.metadata import metadata as _metadata - -_meta = _metadata(__name__) -__version__ = _meta["Version"] -__author__ = _meta["Author"] diff --git a/src/morphic/converter/__init__.py b/src/morphic/converter/__init__.py deleted file mode 100644 index b94d98e..0000000 --- a/src/morphic/converter/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -""" -morphic.converter - File format conversion engine. - -Provides image conversion (Pillow) and video conversion (ffmpeg). -""" - -from morphic.converter.constants import ( - IMAGE_CONVERSIONS, - VIDEO_CONVERSIONS, -) -from morphic.converter.converter import ( - convert_file, - convert_image, - convert_video, -) -from morphic.converter.scanner import scan_folder - -__all__ = [ - "IMAGE_CONVERSIONS", - "VIDEO_CONVERSIONS", - "convert_file", - "convert_image", - "convert_video", - "scan_folder", -] diff --git a/src/morphic/converter/constants.py b/src/morphic/converter/constants.py deleted file mode 100644 index 1d8c26e..0000000 --- a/src/morphic/converter/constants.py +++ /dev/null @@ -1,74 +0,0 @@ -""" -Conversion-specific constants: which formats can be converted to which. - -Uses the shared extension sets from :mod:`morphic.shared.constants` and -builds canonical conversion mapping tables. -""" - -from __future__ import annotations - -from morphic.shared.constants import ( - ALIASES, - IMAGE_EXTENSIONS, - VIDEO_EXTENSIONS, -) - -# ── Canonical (de-aliased) sets ──────────────────────────────────────────── -# Only the "primary" extension for each format. - -_CANONICAL_IMAGE: set[str] = { - ".jpg", - ".png", - ".tif", - ".bmp", - ".webp", - ".gif", - ".ico", - ".heic", - ".heif", - ".avif", -} - -_CANONICAL_VIDEO: set[str] = { - ".mp4", - ".mov", - ".avi", - ".mkv", - ".webm", - ".flv", - ".wmv", - ".m4v", - ".mpeg", - ".3gp", - ".ts", -} - - -def _normalise(ext: str) -> str: - """Resolve aliases (e.g. .jpeg -> .jpg).""" - return ALIASES.get(ext.lower(), ext.lower()) - - -# ── Conversion mappings ─────────────────────────────────────────────────── -# source ext -> list of compatible target extensions - -# Only generate mappings for extensions that have a canonical form we can -# actually write to. Raw/vector/exotic formats are read-only. - -_CONVERTIBLE_IMAGE: set[str] = { - ext for ext in IMAGE_EXTENSIONS if _normalise(ext) in _CANONICAL_IMAGE -} - -_CONVERTIBLE_VIDEO: set[str] = { - ext for ext in VIDEO_EXTENSIONS if _normalise(ext) in _CANONICAL_VIDEO -} - -IMAGE_CONVERSIONS: dict[str, list[str]] = { - ext: sorted(_CANONICAL_IMAGE - {_normalise(ext)}) - for ext in _CONVERTIBLE_IMAGE -} - -VIDEO_CONVERSIONS: dict[str, list[str]] = { - ext: sorted(_CANONICAL_VIDEO - {_normalise(ext)}) - for ext in _CONVERTIBLE_VIDEO -} diff --git a/src/morphic/converter/converter.py b/src/morphic/converter/converter.py deleted file mode 100644 index 20c3039..0000000 --- a/src/morphic/converter/converter.py +++ /dev/null @@ -1,295 +0,0 @@ -""" -File conversion engine – Pillow for images, ffmpeg for videos. -""" - -from __future__ import annotations - -import importlib -import os -import shutil -import subprocess -from pathlib import Path - -from PIL import Image - -from morphic.shared.utils import is_image, is_video, normalise_ext - - -def _ffmpeg_available() -> bool: - """Return True if ffmpeg is on PATH.""" - return shutil.which("ffmpeg") is not None - - -def _is_torch_cuda_available() -> bool: - """Return True if torch is installed and CUDA is available.""" - try: - torch = importlib.import_module("torch") - - return torch.cuda.is_available() - except Exception: - return False - - -def _ffmpeg_has_encoder(encoder: str) -> bool: - """Check if ffmpeg has a particular video encoder available.""" - try: - output = subprocess.check_output( - ["ffmpeg", "-hide_banner", "-encoders"], - stderr=subprocess.STDOUT, - text=True, - timeout=15, - ) - return any(encoder in line for line in output.splitlines()) - except Exception: - return False - - -def _ffmpeg_has_hwaccel(hwaccel: str) -> bool: - """Check if ffmpeg supports a particular hardware acceleration method.""" - try: - output = subprocess.check_output( - ["ffmpeg", "-hide_banner", "-hwaccels"], - stderr=subprocess.STDOUT, - text=True, - timeout=15, - ) - return any(hwaccel in line for line in output.splitlines()) - except Exception: - return False - - -def _get_video_encoder(target_ext: str) -> tuple[str, bool, str]: - """Select a video encoder (and whether to request hardware acceleration). - - Returns (encoder, use_hwaccel, output_extension). - """ - ext = target_ext.lower().lstrip(".") - output_ext = ext - - if ext.endswith("-av1"): - output_ext = ext.split("-")[0] - # For AV1 try available encoders in preferred order. - use_cuda = _is_torch_cuda_available() and _ffmpeg_has_hwaccel("cuda") - if use_cuda and _ffmpeg_has_encoder("av1_nvenc"): - return "av1_nvenc", True, output_ext - if _ffmpeg_has_encoder("libsvtav1"): - return "libsvtav1", False, output_ext - if _ffmpeg_has_encoder("libaom-av1"): - return "libaom-av1", False, output_ext - if _ffmpeg_has_encoder("libvpx-vp9"): - # VP9 older fallback if AV1 is unavailable - return "libvpx-vp9", False, output_ext - # fall back to H.264 if no AV1 encoder installed - return "libx264", False, output_ext - - # Prefer NVIDIA nvenc if available for standard containers. - use_cuda = _is_torch_cuda_available() and _ffmpeg_has_hwaccel("cuda") - if output_ext in ("mp4", "mkv", "mov") and use_cuda: - if _ffmpeg_has_encoder("h264_nvenc"): - return "h264_nvenc", True, output_ext - if _ffmpeg_has_encoder("hevc_nvenc"): - return "hevc_nvenc", True, output_ext - # fallback to GPU-like to ensure we do not use missing nvenc - if output_ext in ("mp4", "mkv", "mov"): - return "libx264", False, output_ext - if output_ext == "webm" and _ffmpeg_has_encoder("vp9_nvenc"): - return "vp9_nvenc", True, output_ext - - # Fallback to software encoders. - if output_ext in ("mp4", "mkv", "mov"): - return "libx264", False, output_ext - if output_ext == "webm": - return "libvpx-vp9", False, output_ext - if output_ext == "avi": - return "mpeg4", False, output_ext - if output_ext in ("flv", "mpeg", "3gp", "ts"): - return "libx264", False, output_ext - - # Generic fallback - return "libx264", False, output_ext - - -def convert_image( - source: str, - target_ext: str, - output_dir: str | None = None, -) -> str: - """ - Convert an image file using Pillow. - - Parameters - ---------- - source : str - Path to the source image. - target_ext : str - Target extension (with or without leading dot). - output_dir : str, optional - Directory for the output file. Defaults to the source directory. - - Returns - ------- - str - Path of the converted file. - """ - src = Path(source) - target_ext = target_ext if target_ext.startswith(".") else f".{target_ext}" - target_ext = normalise_ext(target_ext) - - if output_dir: - os.makedirs(output_dir, exist_ok=True) - dest = Path(output_dir) / (src.stem + target_ext) - else: - dest = src.with_suffix(target_ext) - - # Avoid overwriting existing files - if dest.exists(): - dest = dest.with_stem(dest.stem + "_converted") - - img = Image.open(source) - - # Handle RGBA -> formats that don't support alpha - if img.mode == "RGBA" and target_ext in {".jpg", ".jpeg", ".bmp", ".ico"}: - img = img.convert("RGB") - elif img.mode == "P" and target_ext in {".jpg", ".jpeg"}: - img = img.convert("RGB") - - save_kwargs: dict = {} - if target_ext in {".jpg", ".jpeg"}: - save_kwargs["quality"] = 95 - elif target_ext == ".webp": - save_kwargs["quality"] = 90 - elif target_ext in {".tif", ".tiff"}: - save_kwargs["compression"] = "tiff_lzw" - - img.save(str(dest), **save_kwargs) - return str(dest) - - -def convert_video( - source: str, - target_ext: str, - output_dir: str | None = None, - av1_crf: int | None = None, -) -> str: - """ - Convert a video file using ffmpeg. - - Parameters - ---------- - source : str - Path to the source video. - target_ext : str - Target extension (with or without leading dot). - output_dir : str, optional - Directory for the output file. Defaults to the source directory. - - Returns - ------- - str - Path of the converted file. - - Raises - ------ - RuntimeError - If ffmpeg is not installed or conversion fails. - """ - if not _ffmpeg_available(): - raise RuntimeError( - "ffmpeg is not installed or not on PATH. " - "Install it: https://ffmpeg.org/download.html" - ) - - src = Path(source) - target_ext = target_ext if target_ext.startswith(".") else f".{target_ext}" - target_ext = normalise_ext(target_ext) - - codec_target_ext = target_ext - if ( - target_ext.startswith(".mp4-av1") - or target_ext.startswith(".mkv-av1") - or target_ext.startswith(".webm-av1") - ): - # select container from it, preserve compatible extension - container_ext = target_ext.split("-", 1)[0] - codec_target_ext = container_ext - else: - codec_target_ext = target_ext - - if output_dir: - os.makedirs(output_dir, exist_ok=True) - dest = Path(output_dir) / (src.stem + codec_target_ext) - else: - dest = src.with_suffix(codec_target_ext) - - if dest.exists(): - dest = dest.with_stem(dest.stem + "_converted") - - # Use stream-copy for container-only conversions - if codec_target_ext in {".mkv", ".ts"}: - cmd = ["ffmpeg", "-y", "-i", str(src), "-c", "copy", str(dest)] - else: - encoder, hwaccel, _out_ext = _get_video_encoder(target_ext) - cmd = ["ffmpeg", "-y"] - - if hwaccel and _ffmpeg_has_hwaccel("cuda"): - cmd += ["-hwaccel", "cuda"] - - audio_codec = "aac" - if codec_target_ext == ".avi": - # AVI prefers mp3 audio or PCM - audio_codec = "libmp3lame" - - if codec_target_ext == ".webm" and encoder in ( - "libx264", - "h264_nvenc", - "h264", - ): - # WebM should use VP9/AV1; keep default - pass - - cmd += ["-i", str(src), "-c:v", encoder, "-c:a", audio_codec] - - if encoder.endswith("nvenc"): - cmd += ["-preset", "fast", "-rc", "vbr", "-cq", "23"] - else: - if encoder in ("libaom-av1", "libsvtav1", "av1_nvenc"): - # AV1 quality presets and CRF management - av1_default_crf = ( - 32 if encoder in ("libaom-av1", "av1_nvenc") else 28 - ) - if av1_crf is not None and 10 <= av1_crf <= 63: - chosen_crf = av1_crf - else: - chosen_crf = av1_default_crf - cmd += ["-preset", "fast", "-crf", str(chosen_crf)] - else: - cmd += ["-preset", "fast", "-crf", "23"] - - cmd.append(str(dest)) - - result = subprocess.run( - cmd, - capture_output=True, - text=True, - timeout=600, - ) - if result.returncode != 0: - raise RuntimeError( - f"ffmpeg error (code {result.returncode}): {result.stderr.strip()}" - ) - - return str(dest) - - -def convert_file( - source: str, - target_ext: str, - output_dir: str | None = None, - av1_crf: int | None = None, -) -> str: - """High-level converter – routes to image or video handler.""" - if is_image(source): - return convert_image(source, target_ext, output_dir) - if is_video(source): - return convert_video(source, target_ext, output_dir, av1_crf=av1_crf) - raise ValueError(f"Unsupported file type: {source}") diff --git a/src/morphic/converter/scanner.py b/src/morphic/converter/scanner.py deleted file mode 100644 index 981c9f7..0000000 --- a/src/morphic/converter/scanner.py +++ /dev/null @@ -1,99 +0,0 @@ -""" -Folder scanner – walks directories and inventories image/video files -for the format converter. -""" - -from __future__ import annotations - -import os -from pathlib import Path - -from morphic.converter.constants import IMAGE_CONVERSIONS, VIDEO_CONVERSIONS -from morphic.shared.constants import IMAGE_EXTENSIONS, VIDEO_EXTENSIONS -from morphic.shared.utils import normalise_ext - - -def get_compatible_targets(source_path: str) -> list[str]: - """Return list of extensions the source can be converted to.""" - ext = normalise_ext(Path(source_path).suffix) - if ext in IMAGE_CONVERSIONS: - return IMAGE_CONVERSIONS[ext] - if ext in VIDEO_CONVERSIONS: - return VIDEO_CONVERSIONS[ext] - return [] - - -def scan_folder( - folder: str, - include_subfolders: bool = True, - filter_type: str = "both", -) -> dict: - """ - Walk *folder* and return a summary + full file list. - - Parameters - ---------- - folder : str - Absolute path to the directory to scan. - include_subfolders : bool - Whether to recurse into subdirectories. - filter_type : str - One of ``"images"``, ``"videos"``, ``"both"``. - - Returns - ------- - dict - ``{"folder", "summary", "files", "total"}`` - """ - allowed: set[str] = set() - if filter_type in ("images", "both"): - allowed |= IMAGE_EXTENSIONS - if filter_type in ("videos", "both"): - allowed |= VIDEO_EXTENSIONS - - summary: dict[str, int] = {} - files: list[dict] = [] - - if include_subfolders: - walker = os.walk(folder) - else: - try: - entries = os.listdir(folder) - except PermissionError: - entries = [] - walker = [(folder, [], entries)] # type: ignore[assignment] - - for dirpath, _dirs, filenames in walker: - for fname in filenames: - full = os.path.join(dirpath, fname) - ext = normalise_ext(Path(fname).suffix) - if ext not in allowed: - continue - try: - size = os.path.getsize(full) - except OSError: - size = 0 - - ftype = "image" if ext in IMAGE_EXTENSIONS else "video" - targets = get_compatible_targets(full) - - summary[ext] = summary.get(ext, 0) + 1 - files.append( - { - "path": full, - "name": fname, - "ext": ext, - "size": size, - "type": ftype, - "targets": targets, - } - ) - - sorted_summary = dict(sorted(summary.items(), key=lambda x: -x[1])) - - return { - "folder": folder, - "summary": sorted_summary, - "files": sorted(files, key=lambda f: f["name"].lower()), - "total": len(files), - } diff --git a/src/morphic/dupfinder/__init__.py b/src/morphic/dupfinder/__init__.py deleted file mode 100644 index edf157d..0000000 --- a/src/morphic/dupfinder/__init__.py +++ /dev/null @@ -1,34 +0,0 @@ -""" -morphic.dupfinder - Duplicate image/video detection via perceptual hashing. - -Ported from the standalone dupfinder project, now sharing constants and -utilities with the rest of morphic. -""" - -from morphic.dupfinder.images import ( - ImageDuplicateFinder, - ImageHasher, - ImageInfo, -) -from morphic.dupfinder.videos import ( - VideoDuplicateFinder, - VideoHasher, - VideoInfo, -) -from morphic.dupfinder.accelerator import ( - AcceleratorType, - GPUAccelerator, - get_accelerator, -) - -__all__ = [ - "AcceleratorType", - "GPUAccelerator", - "ImageDuplicateFinder", - "ImageHasher", - "ImageInfo", - "VideoDuplicateFinder", - "VideoHasher", - "VideoInfo", - "get_accelerator", -] diff --git a/src/morphic/dupfinder/accelerator.py b/src/morphic/dupfinder/accelerator.py deleted file mode 100644 index 25dea1c..0000000 --- a/src/morphic/dupfinder/accelerator.py +++ /dev/null @@ -1,599 +0,0 @@ -""" -GPU Accelerator Module - -Provides GPU-accelerated operations for image/video processing with automatic -fallback through: CUDA -> AMD/ROCm -> OpenCL -> CPU multiprocessing - -Accelerates: -1. Image resizing/preprocessing -2. DCT computation for perceptual hashing -3. Hamming distance computation for similarity matrix -""" - -from __future__ import annotations - -import importlib -import logging -import multiprocessing as mp -import warnings -from enum import Enum, auto -from typing import Any, Sequence - -import numpy as np - -# Suppress PyTorch CUDA capability warnings during detection -warnings.filterwarnings("ignore", message=".*CUDA capability.*") -warnings.filterwarnings("ignore", message=".*cuda capability.*") -warnings.filterwarnings("ignore", message=".*Please install PyTorch.*") - -logger = logging.getLogger(__name__) - - -class AcceleratorType(Enum): - """Available acceleration backends.""" - - CUDA = auto() - ROCM = auto() - OPENCL = auto() - CPU = auto() - - -class GPUAccelerator: - """ - GPU-accelerated operations with automatic backend detection and fallback. - - Priority: CUDA -> ROCm -> OpenCL -> CPU multiprocessing - """ - - _instance: GPUAccelerator | None = None - _initialized: bool = False - - def __new__(cls) -> GPUAccelerator: - """Singleton pattern to avoid multiple GPU initializations.""" - if cls._instance is None: - cls._instance = super().__new__(cls) - return cls._instance - - def __init__(self) -> None: - if self._initialized: - return - - self.backend: AcceleratorType = AcceleratorType.CPU - self.device = None - self.torch_device = None - self._torch: Any = None - self._cp: Any = None - self._cl: Any = None - self._cl_ctx: Any = None - self._cl_queue: Any = None - self.num_cpus = mp.cpu_count() - - self._detect_backend() - self._initialized = True - - def _detect_backend(self) -> None: - """Detect available GPU backend with priority fallback.""" - if self._try_cuda(): - return # pragma: no cover - if self._try_rocm(): - return # pragma: no cover - if self._try_opencl(): - return # pragma: no cover - self._setup_cpu() - - def _try_cuda(self) -> bool: # pragma: no cover - """Try to initialize CUDA backend.""" - try: - torch = importlib.import_module("torch") - if ( - getattr(torch, "cuda", None) is not None - and torch.cuda.is_available() - ): - try: - test_device = torch.device("cuda") - test_tensor = torch.zeros(1, device=test_device) - _ = test_tensor + 1 - del test_tensor - torch.cuda.empty_cache() - - self._torch = torch - self.torch_device = test_device - self.backend = AcceleratorType.CUDA - gpu_name = torch.cuda.get_device_name(0) - logger.info( - "Using CUDA acceleration via PyTorch: %s", - gpu_name, - ) - return True - except RuntimeError as e: - logger.warning( - "PyTorch CUDA available but not functional: %s", - e, - ) - except (ImportError, Exception): - pass - - try: - cp = importlib.import_module("cupy") - if getattr(cp, "cuda", None) is not None: - device_count = cp.cuda.runtime.getDeviceCount() - if device_count > 0: - test_arr = cp.zeros(1) - _ = test_arr + 1 - del test_arr - cp.get_default_memory_pool().free_all_blocks() - - self._cp = cp - self.backend = AcceleratorType.CUDA - device_props = cp.cuda.runtime.getDeviceProperties(0) - gpu_name = ( - device_props["name"].decode() - if isinstance(device_props["name"], bytes) - else device_props["name"] - ) - logger.info( - "Using CUDA acceleration via CuPy: %s", - gpu_name, - ) - return True - except (ImportError, Exception) as e: - logger.debug("CuPy CUDA not available: %s", e) - - return False - - def _try_rocm(self) -> bool: # pragma: no cover - """Try to initialize AMD ROCm backend via PyTorch.""" - try: - torch = importlib.import_module("torch") - - if ( - getattr(torch, "cuda", None) is not None - and torch.cuda.is_available() - ): - return False - - if ( - hasattr(torch, "hip") - and getattr(torch.hip, "is_available", lambda: False)() - ): - self._torch = torch - self.torch_device = torch.device("cuda") - self.backend = AcceleratorType.ROCM - logger.info("Using AMD ROCm acceleration via PyTorch") - return True - except (ImportError, AttributeError): - pass - return False - - def _try_opencl(self) -> bool: # pragma: no cover - """Try to initialize OpenCL backend.""" - try: - cl = importlib.import_module("pyopencl") - - try: - platforms = cl.get_platforms() - except Exception as e: - logger.debug("OpenCL platform enumeration failed: %s", e) - return False - - if not platforms: - return False - - for platform in platforms: - try: - devices = platform.get_devices( - device_type=cl.device_type.GPU, - ) - if devices: - self._cl = cl - self._cl_ctx = cl.Context(devices=[devices[0]]) - self._cl_queue = cl.CommandQueue(self._cl_ctx) - self.backend = AcceleratorType.OPENCL - logger.info( - "Using OpenCL acceleration: %s", - devices[0].name, - ) - return True - except Exception: - continue - except ImportError: - pass - return False - - def _setup_cpu(self) -> None: - """Set up CPU multiprocessing backend.""" - self.backend = AcceleratorType.CPU - logger.info( - "Using CPU multiprocessing with %d cores", - self.num_cpus, - ) - - @property - def is_gpu_available(self) -> bool: - """Check if any GPU acceleration is available.""" - return self.backend in ( - AcceleratorType.CUDA, - AcceleratorType.ROCM, - AcceleratorType.OPENCL, - ) - - def get_backend_name(self) -> str: - """Get human-readable backend name.""" - names = { - AcceleratorType.CUDA: "CUDA (NVIDIA GPU)", - AcceleratorType.ROCM: "ROCm (AMD GPU)", - AcceleratorType.OPENCL: "OpenCL (GPU)", - AcceleratorType.CPU: "CPU Multiprocessing", - } - return names.get(self.backend, "Unknown") - - # ── Batch operations ─────────────────────────────────────────────── - - def resize_image_batch( - self, - images: list[np.ndarray], - target_size: tuple[int, int], - ) -> list[np.ndarray]: - """Resize a batch of images using the best available backend.""" - if not images: - return [] - if ( - self.backend == AcceleratorType.CUDA and self._torch is not None - ): # pragma: no cover - return self._resize_batch_torch( - images, target_size - ) # pragma: no cover - if ( - self.backend == AcceleratorType.CUDA and self._cp is not None - ): # pragma: no cover - return self._resize_batch_cupy( - images, target_size - ) # pragma: no cover - return self._resize_batch_cpu(images, target_size) - - def _resize_batch_torch( # pragma: no cover - self, - images: list[np.ndarray], - target_size: tuple[int, int], - ) -> list[np.ndarray]: - torch_nn_functional = importlib.import_module("torch.nn.functional") - functional = torch_nn_functional - - assert self._torch is not None - results = [] - target_h, target_w = target_size[1], target_size[0] - for img in images: - if len(img.shape) == 2: - img = img[:, :, np.newaxis] - tensor = ( - self._torch.from_numpy(img) - .float() - .permute(2, 0, 1) - .unsqueeze(0) - .to(self.torch_device) - ) - resized = functional.interpolate( - tensor, - size=(target_h, target_w), - mode="bilinear", - align_corners=False, - ) - result = ( - resized.squeeze(0) - .permute(1, 2, 0) - .cpu() - .numpy() - .astype(np.uint8) - ) - if result.shape[2] == 1: - result = result.squeeze(2) - results.append(result) - return results - - def _resize_batch_cupy( # pragma: no cover - self, - images: list[np.ndarray], - target_size: tuple[int, int], - ) -> list[np.ndarray]: - cupyx_ndimage = importlib.import_module("cupyx.scipy.ndimage") - zoom = getattr(cupyx_ndimage, "zoom") - - assert self._cp is not None - results = [] - for img in images: - if len(img.shape) == 2: - zoom_factors = ( - target_size[1] / img.shape[0], - target_size[0] / img.shape[1], - ) - else: - zoom_factors = ( - target_size[1] / img.shape[0], - target_size[0] / img.shape[1], - 1, - ) - gpu_img = self._cp.asarray(img) - resized = zoom(gpu_img, zoom_factors, order=1) - results.append(self._cp.asnumpy(resized).astype(np.uint8)) - return results - - def _resize_batch_cpu( - self, - images: list[np.ndarray], - target_size: tuple[int, int], - ) -> list[np.ndarray]: - import cv2 - - return [ - cv2.resize(img, target_size, interpolation=cv2.INTER_LINEAR) - for img in images - ] - - def compute_dct_batch( - self, - images: list[np.ndarray], - ) -> list[np.ndarray]: - """Compute DCT for a batch of images (used in pHash).""" - if ( - self.backend == AcceleratorType.CUDA and self._torch is not None - ): # pragma: no cover - return self._dct_batch_torch(images) # pragma: no cover - if ( - self.backend == AcceleratorType.CUDA and self._cp is not None - ): # pragma: no cover - return self._dct_batch_cupy(images) # pragma: no cover - return self._dct_batch_cpu(images) - - def _dct_batch_torch( # pragma: no cover - self, - images: list[np.ndarray], - ) -> list[np.ndarray]: - results = [] - for img in images: - tensor = self._torch.from_numpy(img.astype(np.float32)).to( - self.torch_device - ) - dct = self._torch.fft.fft2(tensor).real - results.append(dct.cpu().numpy()) - return results - - def _dct_batch_cupy( # pragma: no cover - self, - images: list[np.ndarray], - ) -> list[np.ndarray]: - results = [] - for img in images: - gpu_img = self._cp.asarray(img.astype(np.float32)) - dct = self._cp.fft.fft2(gpu_img).real - results.append(self._cp.asnumpy(dct)) - return results - - def _dct_batch_cpu( - self, - images: list[np.ndarray], - ) -> list[np.ndarray]: - from scipy.fftpack import dct - - results = [] - for img in images: - dct_result = dct( - dct(img.astype(np.float32).T, norm="ortho").T, - norm="ortho", - ) - results.append(dct_result) - return results - - def compute_similarity_matrix( - self, - hashes: list[np.ndarray], - threshold: float = 0.0, - ) -> np.ndarray: - """Compute pairwise similarity matrix for hash arrays.""" - if not hashes: - return np.array([]) - - n = len(hashes) - hash_matrix = np.vstack( - [h.flatten() for h in hashes], - ).astype(np.float32) - - if ( - self.backend == AcceleratorType.CUDA and self._torch is not None - ): # pragma: no cover - return self._similarity_matrix_torch( - hash_matrix, n - ) # pragma: no cover - if ( - self.backend == AcceleratorType.CUDA and self._cp is not None - ): # pragma: no cover - return self._similarity_matrix_cupy( - hash_matrix, n - ) # pragma: no cover - return self._similarity_matrix_cpu(hash_matrix, n) - - def _similarity_matrix_torch( # pragma: no cover - self, - hash_matrix: np.ndarray, - n: int, - ) -> np.ndarray: - gpu_hashes = self._torch.from_numpy(hash_matrix).to(self.torch_device) - h1 = gpu_hashes.unsqueeze(1) - h2 = gpu_hashes.unsqueeze(0) - diff = (h1 != h2).float().sum(dim=2) - max_dist = hash_matrix.shape[1] - similarity = 1.0 - (diff / max_dist) - return similarity.cpu().numpy() - - def _similarity_matrix_cupy( # pragma: no cover - self, - hash_matrix: np.ndarray, - n: int, - ) -> np.ndarray: - gpu_hashes = self._cp.asarray(hash_matrix) - h1 = gpu_hashes[:, self._cp.newaxis, :] - h2 = gpu_hashes[self._cp.newaxis, :, :] - diff = self._cp.sum(h1 != h2, axis=2).astype(self._cp.float32) - max_dist = hash_matrix.shape[1] - similarity = 1.0 - (diff / max_dist) - return self._cp.asnumpy(similarity) - - def _similarity_matrix_cpu( - self, - hash_matrix: np.ndarray, - n: int, - ) -> np.ndarray: - from scipy.spatial.distance import cdist - - distances = cdist(hash_matrix, hash_matrix, metric="hamming") - return (1.0 - distances).astype(np.float32) - - def batch_hamming_distance( - self, - hashes1: list[str], - hashes2: list[str], - ) -> np.ndarray: - """Compute Hamming distances between two lists of hex hash strings.""" - - def hex_to_binary(hex_str: str) -> np.ndarray: - return np.array( - [ - int(b) - for b in bin(int(hex_str, 16))[2:].zfill( - len(hex_str) * 4, - ) - ], - ) - - arr1 = np.vstack( - [hex_to_binary(h) for h in hashes1], - ).astype(np.float32) - arr2 = np.vstack( - [hex_to_binary(h) for h in hashes2], - ).astype(np.float32) - - if ( - self.backend == AcceleratorType.CUDA and self._torch is not None - ): # pragma: no cover - return self._batch_hamming_torch(arr1, arr2) # pragma: no cover - if ( - self.backend == AcceleratorType.CUDA and self._cp is not None - ): # pragma: no cover - return self._batch_hamming_cupy(arr1, arr2) # pragma: no cover - return self._batch_hamming_cpu(arr1, arr2) - - def _batch_hamming_torch( # pragma: no cover - self, - arr1: np.ndarray, - arr2: np.ndarray, - ) -> np.ndarray: - gpu1 = self._torch.from_numpy(arr1).to(self.torch_device) - gpu2 = self._torch.from_numpy(arr2).to(self.torch_device) - distances = (gpu1.unsqueeze(1) != gpu2.unsqueeze(0)).float().sum(dim=2) - return distances.cpu().numpy() - - def _batch_hamming_cupy( # pragma: no cover - self, - arr1: np.ndarray, - arr2: np.ndarray, - ) -> np.ndarray: - gpu1 = self._cp.asarray(arr1) - gpu2 = self._cp.asarray(arr2) - distances = self._cp.sum( - gpu1[:, self._cp.newaxis, :] != gpu2[self._cp.newaxis, :, :], - axis=2, - ) - return self._cp.asnumpy(distances) - - def _batch_hamming_cpu( - self, - arr1: np.ndarray, - arr2: np.ndarray, - ) -> np.ndarray: - from scipy.spatial.distance import cdist - - distances = cdist(arr1, arr2, metric="hamming") * arr1.shape[1] - return distances.astype(np.float32) - - -# ── Module-level convenience functions ───────────────────────────────── - -_accelerator: GPUAccelerator | None = None - - -def get_accelerator() -> GPUAccelerator: - """Get the global GPU accelerator instance.""" - global _accelerator - if _accelerator is None: - _accelerator = GPUAccelerator() - return _accelerator - - -def compute_phash_gpu( - images: list[np.ndarray], - hash_size: int = 8, -) -> list[np.ndarray]: - """Compute perceptual hashes for images using GPU acceleration.""" - acc = get_accelerator() - if not images: - return [] - - dct_size = hash_size * 4 - - gray_images = [] - for img in images: - if len(img.shape) == 3: - gray = np.dot(img[..., :3], [0.299, 0.587, 0.114]) - else: - gray = img - gray_images.append(gray.astype(np.float32)) - - resized = acc.resize_image_batch( - [img.astype(np.uint8) for img in gray_images], - (dct_size, dct_size), - ) - - dct_results = acc.compute_dct_batch( - [r.astype(np.float32) for r in resized], - ) - - hashes = [] - for dct in dct_results: - low_freq = dct[:hash_size, :hash_size] - median = np.median(low_freq) - hash_bits = (low_freq > median).astype(np.uint8) - hashes.append(hash_bits.flatten()) - - return hashes - - -def compute_similarity_matrix_gpu( - hashes: Sequence[str | np.ndarray], - hash_size: int = 16, -) -> np.ndarray: - """Compute pairwise similarity matrix for hashes using GPU.""" - acc = get_accelerator() - if not hashes: - return np.array([]) - - if isinstance(hashes[0], str): - - def hex_to_binary(hex_str: str) -> np.ndarray: - try: - bits = bin(int(hex_str, 16))[2:].zfill( - hash_size * hash_size, - ) - return np.array( - [int(b) for b in bits], - dtype=np.uint8, - ) - except ValueError: - return np.zeros(hash_size * hash_size, dtype=np.uint8) - - hash_arrays = [hex_to_binary(h) for h in hashes if isinstance(h, str)] - else: - hash_arrays = [ - h.flatten() if isinstance(h, np.ndarray) else np.array(h) - for h in hashes - ] - - return acc.compute_similarity_matrix(hash_arrays) diff --git a/src/morphic/dupfinder/images.py b/src/morphic/dupfinder/images.py deleted file mode 100644 index 723a8de..0000000 --- a/src/morphic/dupfinder/images.py +++ /dev/null @@ -1,437 +0,0 @@ -""" -Image Duplicate Finder module. - -Detects duplicate images based on content similarity using perceptual hashing. -""" - -from __future__ import annotations - -import logging -import os -from collections import defaultdict -from concurrent.futures import ThreadPoolExecutor, as_completed -from dataclasses import dataclass - -import imagehash -import numpy as np -from PIL import Image -from tqdm import tqdm - -from morphic.shared.constants import ( - DEFAULT_BATCH_SIZE, - DEFAULT_HASH_SIZE, - DEFAULT_IMAGE_THRESHOLD, - DEFAULT_NUM_WORKERS, - EXCLUDED_FOLDERS, - IMAGE_EXTENSIONS, -) -from morphic.shared.utils import ( - find_files_by_extension, -) - -logger = logging.getLogger(__name__) - -# Lazy import GPU accelerator -_gpu_available: bool | None = None -_get_accelerator = None -_compute_similarity_matrix_gpu = None - - -def _init_gpu() -> bool: - """Initialize GPU module lazily.""" - global _gpu_available, _get_accelerator, _compute_similarity_matrix_gpu - if _gpu_available is None: - try: - from morphic.dupfinder.accelerator import ( - compute_similarity_matrix_gpu, - get_accelerator, - ) - - _get_accelerator = get_accelerator - _compute_similarity_matrix_gpu = compute_similarity_matrix_gpu - _gpu_available = True - except ImportError: - _gpu_available = False - return _gpu_available - - -@dataclass -class ImageInfo: - """Stores information about an image file.""" - - path: str - width: int = 0 - height: int = 0 - file_size: int = 0 - format: str = "" - mode: str = "" - phash: str | None = None - ahash: str | None = None - dhash: str | None = None - whash: str | None = None - - def to_dict(self) -> dict: - """Convert to dictionary for JSON serialization.""" - return { - "path": self.path, - "width": self.width, - "height": self.height, - "file_size": self.file_size, - "format": self.format, - "mode": self.mode, - "phash": self.phash, - "ahash": self.ahash, - "dhash": self.dhash, - } - - -class ImageHasher: - """Handles image loading and perceptual hashing.""" - - def __init__(self, hash_size: int = DEFAULT_HASH_SIZE) -> None: - self.hash_size = hash_size - - def compute_hashes(self, image_path: str) -> ImageInfo: - """Compute perceptual hashes for an image.""" - image_info = ImageInfo(path=image_path) - - try: - image_info.file_size = os.path.getsize(image_path) - - with Image.open(image_path) as img: - image_info.width = img.width - image_info.height = img.height - image_info.format = img.format or "" - image_info.mode = img.mode - - if img.mode not in ("RGB", "L"): - try: - img = img.convert("RGB") - except Exception: - img = img.convert("L") - - image_info.phash = str( - imagehash.phash(img, hash_size=self.hash_size), - ) - image_info.ahash = str( - imagehash.average_hash(img, hash_size=self.hash_size), - ) - image_info.dhash = str( - imagehash.dhash(img, hash_size=self.hash_size), - ) - image_info.whash = str( - imagehash.whash(img, hash_size=self.hash_size), - ) - - except Exception as e: - logger.warning("Error processing image %s: %s", image_path, e) - - return image_info - - -class ImageDuplicateFinder: - """Finds duplicate images based on perceptual hash similarity.""" - - def __init__( - self, - similarity_threshold: float = DEFAULT_IMAGE_THRESHOLD, - hash_size: int = DEFAULT_HASH_SIZE, - num_workers: int = DEFAULT_NUM_WORKERS, - hash_type: str = "combined", - use_gpu: bool = True, - batch_size: int = DEFAULT_BATCH_SIZE, - ) -> None: - self.similarity_threshold = similarity_threshold - self.num_workers = num_workers - self.hash_type = hash_type - self.hash_size = hash_size - self.batch_size = batch_size - self.hasher = ImageHasher(hash_size=hash_size) - self.image_infos: dict[str, ImageInfo] = {} - - self.use_gpu = use_gpu and _init_gpu() - self.accelerator = None - if self.use_gpu and _get_accelerator is not None: - try: - self.accelerator = _get_accelerator() - logger.info( - "GPU acceleration enabled: %s", - self.accelerator.get_backend_name(), - ) - except Exception as e: - logger.warning("GPU acceleration not available: %s", e) - self.use_gpu = False - - def find_images(self, folder: str) -> list[str]: - """Find all image files in a folder recursively.""" - return find_files_by_extension( - folder, - IMAGE_EXTENSIONS, - EXCLUDED_FOLDERS, - ) - - def process_images( - self, - image_files: list[str], - ) -> dict[str, ImageInfo]: - """Process all images and compute their hashes.""" - logger.info("Processing images and computing hashes...") - - with ThreadPoolExecutor(max_workers=self.num_workers) as executor: - futures = { - executor.submit(self.hasher.compute_hashes, img): img - for img in image_files - } - - for future in tqdm( - as_completed(futures), - total=len(futures), - desc="Processing", - ): - image_path = futures[future] - try: - image_info = future.result() - if image_info.phash: - self.image_infos[image_path] = image_info - except Exception as e: - logger.error("Error processing %s: %s", image_path, e) - - logger.info( - "Successfully processed %d images", - len(self.image_infos), - ) - return self.image_infos - - def compute_similarity( - self, - info1: ImageInfo, - info2: ImageInfo, - ) -> float: - """Compute similarity between two images based on their hashes.""" - similarities: list[float] = [] - hash_pairs: list[tuple[str, str]] = [] - - if self.hash_type in ("combined", "phash"): - if info1.phash and info2.phash: - hash_pairs.append((info1.phash, info2.phash)) - - if self.hash_type in ("combined", "ahash"): - if info1.ahash and info2.ahash: - hash_pairs.append((info1.ahash, info2.ahash)) - - if self.hash_type in ("combined", "dhash"): - if info1.dhash and info2.dhash: - hash_pairs.append((info1.dhash, info2.dhash)) - - if self.hash_type in ("combined", "whash"): - if info1.whash and info2.whash: - hash_pairs.append((info1.whash, info2.whash)) - - for hash1, hash2 in hash_pairs: - h1 = imagehash.hex_to_hash(hash1) - h2 = imagehash.hex_to_hash(hash2) - distance = h1 - h2 - max_distance = len(h1.hash.flatten()) - similarity = 1 - (distance / max_distance) - similarities.append(similarity) - - return sum(similarities) / len(similarities) if similarities else 0.0 - - def find_duplicates_fast( - self, - ) -> list[list[tuple[str, float]]]: - """Find groups of duplicate images using hash bucketing.""" - logger.info("Finding duplicate images (fast mode)...") - - hash_buckets: dict[str, list[str]] = defaultdict(list) - for path, info in self.image_infos.items(): - if info.phash: - hash_buckets[info.phash].append(path) - - exact_groups: list[list[tuple[str, float]]] = [] - processed: set[str] = set() - - for _, paths in hash_buckets.items(): - if len(paths) > 1: - group = [(p, 1.0) for p in paths] - exact_groups.append(group) - processed.update(paths) - - remaining = [p for p in self.image_infos if p not in processed] - - if self.use_gpu and len(remaining) > 1: - near_groups = self._find_near_duplicates_gpu(remaining) - else: - near_groups = self._find_near_duplicates(remaining) - - return exact_groups + near_groups - - def _find_near_duplicates_gpu( - self, - image_paths: list[str], - ) -> list[list[tuple[str, float]]]: - """Find near-duplicate images using GPU-accelerated similarity.""" - n = len(image_paths) - if n == 0: - return [] - - logger.info( - "Computing similarity matrix for %d images using GPU...", - n, - ) - - all_hashes: list[str] = [] - for path in image_paths: - info = self.image_infos[path] - if self.hash_type == "phash" and info.phash: - all_hashes.append(info.phash) - elif self.hash_type == "ahash" and info.ahash: - all_hashes.append(info.ahash) - elif self.hash_type == "dhash" and info.dhash: - all_hashes.append(info.dhash) - elif self.hash_type == "whash" and info.whash: - all_hashes.append(info.whash) - else: - all_hashes.append( - info.phash or info.ahash or info.dhash or "", - ) - - valid_indices = [i for i, h in enumerate(all_hashes) if h] - valid_paths = [image_paths[i] for i in valid_indices] - valid_hashes = [all_hashes[i] for i in valid_indices] - - if len(valid_hashes) < 2: - return [] - - try: - if _compute_similarity_matrix_gpu is None: - raise RuntimeError("GPU not initialized") - sim_matrix = _compute_similarity_matrix_gpu( - valid_hashes, - self.hash_size, - ) - except Exception as e: - logger.warning( - "GPU similarity failed, falling back to CPU: %s", - e, - ) - return self._find_near_duplicates(image_paths) - - duplicate_groups: list[list[tuple[str, float]]] = [] - assigned: set[int] = set() - - for i in range(len(valid_paths)): - if i in assigned: - continue - - similar_indices = np.where( - sim_matrix[i] >= self.similarity_threshold, - )[0] - - if len(similar_indices) > 1: - group: list[tuple[str, float]] = [] - for j in similar_indices: - if j not in assigned or j == i: - group.append( - (valid_paths[j], float(sim_matrix[i, j])), - ) - assigned.add(j) - if len(group) > 1: - duplicate_groups.append(group) - - logger.info( - "Found %d groups using GPU acceleration", - len(duplicate_groups), - ) - return duplicate_groups - - def _find_near_duplicates( - self, - image_paths: list[str], - ) -> list[list[tuple[str, float]]]: - """Find near-duplicate images using pairwise comparison.""" - n = len(image_paths) - assigned: set[str] = set() - duplicate_groups: list[list[tuple[str, float]]] = [] - - total_comparisons = n * (n - 1) // 2 - - with tqdm(total=total_comparisons, desc="Comparing") as pbar: - for i in range(n): - if image_paths[i] in assigned: - pbar.update(n - i - 1) - continue - - current_group: list[tuple[str, float]] = [ - (image_paths[i], 1.0), - ] - - for j in range(i + 1, n): - pbar.update(1) - if image_paths[j] in assigned: - continue - - info1 = self.image_infos[image_paths[i]] - info2 = self.image_infos[image_paths[j]] - similarity = self.compute_similarity(info1, info2) - - if similarity >= self.similarity_threshold: - current_group.append( - (image_paths[j], similarity), - ) - assigned.add(image_paths[j]) - - if len(current_group) > 1: - assigned.add(image_paths[i]) - duplicate_groups.append(current_group) - - return duplicate_groups - - def find_duplicates(self) -> list[list[tuple[str, float]]]: - """Find groups of duplicate images.""" - if len(self.image_infos) > 100 or self.use_gpu: - return self.find_duplicates_fast() - - logger.info("Finding duplicate images...") - - image_paths = list(self.image_infos.keys()) - n = len(image_paths) - assigned: set[str] = set() - duplicate_groups: list[list[tuple[str, float]]] = [] - - total_comparisons = n * (n - 1) // 2 - - with tqdm(total=total_comparisons, desc="Comparing") as pbar: - for i in range(n): - if image_paths[i] in assigned: - pbar.update(n - i - 1) - continue - - current_group: list[tuple[str, float]] = [ - (image_paths[i], 1.0), - ] - - for j in range(i + 1, n): - pbar.update(1) - if image_paths[j] in assigned: - continue - - info1 = self.image_infos[image_paths[i]] - info2 = self.image_infos[image_paths[j]] - similarity = self.compute_similarity(info1, info2) - - if similarity >= self.similarity_threshold: - current_group.append( - (image_paths[j], similarity), - ) - assigned.add(image_paths[j]) - - if len(current_group) > 1: - assigned.add(image_paths[i]) - duplicate_groups.append(current_group) - - logger.info( - "Found %d groups of duplicates", - len(duplicate_groups), - ) - return duplicate_groups diff --git a/src/morphic/dupfinder/scanner.py b/src/morphic/dupfinder/scanner.py deleted file mode 100644 index 6ff9354..0000000 --- a/src/morphic/dupfinder/scanner.py +++ /dev/null @@ -1,299 +0,0 @@ -""" -Background scan job management for the dupfinder web UI. - -Handles running duplicate-detection scans in background threads and -converting results into JSON-serializable formats. -""" - -from __future__ import annotations - -import logging -import os -import threading -import time -import uuid -from dataclasses import dataclass, field - -from morphic.shared.constants import ( - DEFAULT_BATCH_SIZE, - DEFAULT_HASH_SIZE, - DEFAULT_IMAGE_THRESHOLD, - DEFAULT_NUM_FRAMES, - DEFAULT_NUM_WORKERS, - DEFAULT_VIDEO_THRESHOLD, -) -from morphic.shared.utils import format_duration, format_file_size -from morphic.dupfinder.images import ImageDuplicateFinder, ImageInfo -from morphic.dupfinder.videos import VideoDuplicateFinder, VideoInfo - -logger = logging.getLogger(__name__) - - -# ── Data Structures ──────────────────────────────────────────────────────── - - -@dataclass -class ScanJob: - """Represents a running or completed scan job.""" - - id: str - folder: str - scan_type: str # "images", "videos", "both" - status: str = "pending" - progress: float = 0.0 - message: str = "" - error: str | None = None - # Results - image_groups: list[list[dict]] = field(default_factory=list) - video_groups: list[list[dict]] = field(default_factory=list) - image_infos: dict[str, ImageInfo] = field(default_factory=dict) - video_infos: dict[str, VideoInfo] = field(default_factory=dict) - total_files_found: int = 0 - total_files_processed: int = 0 - space_savings: int = 0 - started_at: float = 0.0 - finished_at: float = 0.0 - # Settings - image_threshold: float = DEFAULT_IMAGE_THRESHOLD - video_threshold: float = DEFAULT_VIDEO_THRESHOLD - - -# ── Job Registry ─────────────────────────────────────────────────────────── - -_scan_jobs: dict[str, ScanJob] = {} -_scan_lock = threading.Lock() - - -def get_job(job_id: str) -> ScanJob | None: - """Retrieve a scan job by ID.""" - with _scan_lock: - return _scan_jobs.get(job_id) - - -def start_job( - folder: str, - scan_type: str, - image_threshold: float = DEFAULT_IMAGE_THRESHOLD, - video_threshold: float = DEFAULT_VIDEO_THRESHOLD, -) -> str: - """Create and launch a new scan job. Returns the job ID.""" - job_id = str(uuid.uuid4())[:8] - job = ScanJob( - id=job_id, - folder=folder, - scan_type=scan_type, - image_threshold=image_threshold, - video_threshold=video_threshold, - ) - with _scan_lock: - _scan_jobs[job_id] = job - - thread = threading.Thread(target=_run_scan, args=(job,), daemon=True) - thread.start() - return job_id - - -# ── Background Worker ────────────────────────────────────────────────────── - - -def _run_scan(job: ScanJob) -> None: - """Execute the scan in a background thread.""" - try: - job.status = "scanning" - job.started_at = time.time() - job.message = f"Scanning folder: {job.folder}" - - # ── Image Scan ───────────────────────────────────────────── - if job.scan_type in ("images", "both"): - job.message = "Finding image files..." - finder = ImageDuplicateFinder( - similarity_threshold=job.image_threshold, - hash_size=DEFAULT_HASH_SIZE, - num_workers=DEFAULT_NUM_WORKERS, - use_gpu=True, - batch_size=DEFAULT_BATCH_SIZE, - ) - image_files = finder.find_images(job.folder) - job.total_files_found += len(image_files) - job.message = ( - f"Found {len(image_files)} images. Processing hashes..." - ) - job.progress = 0.1 - - if image_files: - job.status = "processing" - finder.process_images(image_files) - job.image_infos = finder.image_infos - job.total_files_processed += len(finder.image_infos) - job.progress = 0.4 if job.scan_type == "both" else 0.7 - job.message = ( - f"Processed {len(finder.image_infos)} images. " - "Finding duplicates..." - ) - - job.status = "comparing" - groups = finder.find_duplicates() - job.image_groups = _format_image_groups( - groups, - finder.image_infos, - ) - job.progress = 0.5 if job.scan_type == "both" else 0.95 - - # ── Video Scan ───────────────────────────────────────────── - if job.scan_type in ("videos", "both"): - job.message = "Finding video files..." - vfinder = VideoDuplicateFinder( - similarity_threshold=job.video_threshold, - num_frames=DEFAULT_NUM_FRAMES, - hash_size=DEFAULT_HASH_SIZE, - num_workers=DEFAULT_NUM_WORKERS, - use_gpu=True, - ) - video_files = vfinder.find_videos(job.folder) - job.total_files_found += len(video_files) - job.message = ( - f"Found {len(video_files)} videos. Processing hashes..." - ) - job.progress = 0.6 if job.scan_type == "both" else 0.1 - - if video_files: - job.status = "processing" - vfinder.process_videos(video_files) - job.video_infos = vfinder.video_infos - job.total_files_processed += len(vfinder.video_infos) - job.progress = 0.8 if job.scan_type == "both" else 0.7 - job.message = ( - f"Processed {len(vfinder.video_infos)} videos. " - "Finding duplicates..." - ) - - job.status = "comparing" - groups = vfinder.find_duplicates() - job.video_groups = _format_video_groups( - groups, - vfinder.video_infos, - ) - job.progress = 0.95 - - # ── Finalise ─────────────────────────────────────────────── - job.space_savings = _calculate_space_savings(job) - job.status = "done" - job.progress = 1.0 - job.finished_at = time.time() - elapsed = job.finished_at - job.started_at - total_groups = len(job.image_groups) + len(job.video_groups) - job.message = ( - f"Done! Found {total_groups} duplicate group(s) " - f"in {elapsed:.1f}s. " - f"Potential savings: {format_file_size(job.space_savings)}" - ) - - except Exception as e: - logger.exception("Scan failed") - job.status = "error" - job.error = str(e) - job.message = f"Error: {e}" - job.finished_at = time.time() - - -# ── Result Formatters ────────────────────────────────────────────────────── - - -def _format_image_groups( - groups: list[list[tuple[str, float]]], - infos: dict[str, ImageInfo], -) -> list[list[dict]]: - """Convert image duplicate groups to JSON-serializable dicts.""" - result = [] - for group in groups: - formatted = [] - sorted_group = sorted( - group, - key=lambda x: ( - infos.get( - x[0], - ImageInfo(path=""), - ).file_size - ), - reverse=True, - ) - for path, similarity in sorted_group: - info = infos.get(path) - if info: - formatted.append( - { - "path": path, - "filename": os.path.basename(path), - "directory": os.path.dirname(path), - "width": info.width, - "height": info.height, - "resolution": f"{info.width}x{info.height}", - "format": info.format or "Unknown", - "file_size": info.file_size, - "file_size_formatted": format_file_size( - info.file_size, - ), - "similarity": round(similarity * 100, 1), - "type": "image", - } - ) - if len(formatted) > 1: - result.append(formatted) - return result - - -def _format_video_groups( - groups: list[list[tuple[str, float]]], - infos: dict[str, VideoInfo], -) -> list[list[dict]]: - """Convert video duplicate groups to JSON-serializable dicts.""" - result = [] - for group in groups: - formatted = [] - sorted_group = sorted( - group, - key=lambda x: ( - infos.get( - x[0], - VideoInfo(path=""), - ).file_size - ), - reverse=True, - ) - for path, similarity in sorted_group: - info = infos.get(path) - if info: - formatted.append( - { - "path": path, - "filename": os.path.basename(path), - "directory": os.path.dirname(path), - "width": info.width, - "height": info.height, - "resolution": f"{info.width}x{info.height}", - "duration": info.duration, - "duration_formatted": format_duration( - info.duration, - ), - "fps": round(info.fps, 1), - "file_size": info.file_size, - "file_size_formatted": format_file_size( - info.file_size, - ), - "similarity": round(similarity * 100, 1), - "type": "video", - } - ) - if len(formatted) > 1: - result.append(formatted) - return result - - -def _calculate_space_savings(job: ScanJob) -> int: - """Sum up file sizes for all but the largest file in each group.""" - total = 0 - for group in job.image_groups + job.video_groups: - sizes = [item["file_size"] for item in group] - if len(sizes) > 1: - total += sum(sorted(sizes)[:-1]) - return total diff --git a/src/morphic/dupfinder/videos.py b/src/morphic/dupfinder/videos.py deleted file mode 100644 index 09a1ec4..0000000 --- a/src/morphic/dupfinder/videos.py +++ /dev/null @@ -1,445 +0,0 @@ -""" -Video Duplicate Finder module. - -Detects duplicate videos based on content similarity using perceptual hashing -of extracted frames. -""" - -from __future__ import annotations - -import hashlib -import logging -import os -from concurrent.futures import ThreadPoolExecutor, as_completed -from dataclasses import dataclass, field - -import cv2 -import imagehash -import numpy as np -from PIL import Image -from tqdm import tqdm - -from morphic.shared.constants import ( - DEFAULT_HASH_SIZE, - DEFAULT_NUM_FRAMES, - DEFAULT_NUM_WORKERS, - DEFAULT_VIDEO_THRESHOLD, - EXCLUDED_FOLDERS, - VIDEO_EXTENSIONS, -) -from morphic.shared.utils import ( - find_files_by_extension, - suppress_stderr, -) - -logger = logging.getLogger(__name__) - -# Lazy import GPU accelerator -_gpu_available: bool | None = None -_get_accelerator = None -_compute_similarity_matrix_gpu = None - - -def _init_gpu() -> bool: - """Initialize GPU module lazily.""" - global _gpu_available, _get_accelerator, _compute_similarity_matrix_gpu - if _gpu_available is None: - try: - from morphic.dupfinder.accelerator import ( - compute_similarity_matrix_gpu, - get_accelerator, - ) - - _get_accelerator = get_accelerator - _compute_similarity_matrix_gpu = compute_similarity_matrix_gpu - _gpu_available = True - except ImportError: - _gpu_available = False - return _gpu_available - - -@dataclass -class VideoInfo: - """Stores information about a video file.""" - - path: str - duration: float = 0.0 - fps: float = 0.0 - frame_count: int = 0 - width: int = 0 - height: int = 0 - file_size: int = 0 - frame_hashes: list[str] = field(default_factory=list) - average_hash: str | None = None - - def to_dict(self) -> dict: - """Convert to dictionary for JSON serialization.""" - return { - "path": self.path, - "duration": self.duration, - "fps": self.fps, - "frame_count": self.frame_count, - "width": self.width, - "height": self.height, - "file_size": self.file_size, - "average_hash": self.average_hash, - } - - -class VideoHasher: - """Handles video frame extraction and perceptual hashing.""" - - def __init__( - self, - num_frames: int = DEFAULT_NUM_FRAMES, - hash_size: int = DEFAULT_HASH_SIZE, - ) -> None: - self.num_frames = num_frames - self.hash_size = hash_size - - def extract_frames( - self, - video_path: str, - ) -> tuple[list[np.ndarray], VideoInfo]: - """Extract frames from a video at regular intervals.""" - video_info = VideoInfo(path=video_path) - frames: list[np.ndarray] = [] - - try: - video_info.file_size = os.path.getsize(video_path) - - with suppress_stderr(): - cap = cv2.VideoCapture(video_path) - - if not cap.isOpened(): - logger.warning("Could not open video: %s", video_path) - return frames, video_info - - video_info.fps = cap.get(cv2.CAP_PROP_FPS) or 30.0 - video_info.frame_count = int( - cap.get(cv2.CAP_PROP_FRAME_COUNT), - ) - video_info.width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) - video_info.height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) - - if video_info.frame_count > 0 and video_info.fps > 0: - video_info.duration = video_info.frame_count / video_info.fps - - if video_info.frame_count <= 0: - logger.warning( - "Could not determine frame count: %s", - video_path, - ) - cap.release() - return frames, video_info - - start_frame = int(video_info.frame_count * 0.05) - end_frame = int(video_info.frame_count * 0.95) - - if end_frame <= start_frame: - start_frame = 0 - end_frame = video_info.frame_count - 1 - - frame_interval = max( - 1, - (end_frame - start_frame) // (self.num_frames + 1), - ) - frame_indices = [ - start_frame + (i + 1) * frame_interval - for i in range(self.num_frames) - ] - - for frame_idx in frame_indices: - if frame_idx >= video_info.frame_count: - continue - - cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx) - with suppress_stderr(): - ret, frame = cap.read() - - if ret and frame is not None: - frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) - frames.append(frame_rgb) - - cap.release() - - except Exception as e: - logger.error("Error processing video %s: %s", video_path, e) - - return frames, video_info - - def compute_frame_hash(self, frame: np.ndarray) -> str: - """Compute perceptual hash for a single frame.""" - try: - img = Image.fromarray(frame) - phash = imagehash.phash(img, hash_size=self.hash_size) - return str(phash) - except Exception as e: - logger.error("Error computing hash: %s", e) - return "" - - def compute_video_hashes(self, video_path: str) -> VideoInfo: - """Compute perceptual hashes for a video.""" - frames, video_info = self.extract_frames(video_path) - - if not frames: - return video_info - - for frame in frames: - frame_hash = self.compute_frame_hash(frame) - if frame_hash: - video_info.frame_hashes.append(frame_hash) - - if video_info.frame_hashes: - combined = "".join(video_info.frame_hashes) - video_info.average_hash = hashlib.md5( - combined.encode(), - ).hexdigest() - - return video_info - - -class VideoDuplicateFinder: - """Finds duplicate videos based on perceptual hash similarity.""" - - def __init__( - self, - similarity_threshold: float = DEFAULT_VIDEO_THRESHOLD, - num_frames: int = DEFAULT_NUM_FRAMES, - hash_size: int = DEFAULT_HASH_SIZE, - num_workers: int = DEFAULT_NUM_WORKERS, - use_gpu: bool = True, - ) -> None: - self.similarity_threshold = similarity_threshold - self.num_workers = num_workers - self.hash_size = hash_size - self.hasher = VideoHasher( - num_frames=num_frames, - hash_size=hash_size, - ) - self.video_infos: dict[str, VideoInfo] = {} - - self.use_gpu = use_gpu and _init_gpu() - self.accelerator = None - if self.use_gpu and _get_accelerator is not None: - try: - self.accelerator = _get_accelerator() - logger.info( - "GPU acceleration enabled: %s", - self.accelerator.get_backend_name(), - ) - except Exception as e: - logger.warning("GPU acceleration not available: %s", e) - self.use_gpu = False - - def find_videos(self, folder: str) -> list[str]: - """Find all video files in a folder recursively.""" - return find_files_by_extension( - folder, - VIDEO_EXTENSIONS, - EXCLUDED_FOLDERS, - ) - - def process_videos( - self, - video_files: list[str], - ) -> dict[str, VideoInfo]: - """Process all videos and compute their hashes.""" - logger.info("Processing videos and computing hashes...") - - with ThreadPoolExecutor(max_workers=self.num_workers) as executor: - futures = { - executor.submit(self.hasher.compute_video_hashes, vf): vf - for vf in video_files - } - - for future in tqdm( - as_completed(futures), - total=len(futures), - desc="Processing", - ): - video_path = futures[future] - try: - video_info = future.result() - if video_info.frame_hashes: - self.video_infos[video_path] = video_info - except Exception as e: - logger.error( - "Error processing %s: %s", - video_path, - e, - ) - - logger.info( - "Successfully processed %d videos", - len(self.video_infos), - ) - return self.video_infos - - def compute_similarity( - self, - info1: VideoInfo, - info2: VideoInfo, - ) -> float: - """Compute similarity between two videos.""" - if not info1.frame_hashes or not info2.frame_hashes: - return 0.0 - - similarities: list[float] = [] - - for hash1 in info1.frame_hashes: - best_sim = 0.0 - h1 = imagehash.hex_to_hash(hash1) - - for hash2 in info2.frame_hashes: - h2 = imagehash.hex_to_hash(hash2) - distance = h1 - h2 - max_distance = len(h1.hash.flatten()) * len( - h1.hash.flatten(), - ) - similarity = 1 - (distance / max_distance) - best_sim = max(best_sim, similarity) - - similarities.append(best_sim) - - return sum(similarities) / len(similarities) if similarities else 0.0 - - def find_duplicates( - self, - ) -> list[list[tuple[str, float]]]: - """Find groups of duplicate videos.""" - logger.info("Finding duplicate videos...") - - video_paths = list(self.video_infos.keys()) - n = len(video_paths) - - if self.use_gpu and n > 1: - return self._find_duplicates_gpu(video_paths) - return self._find_duplicates_cpu(video_paths) - - def _find_duplicates_gpu( - self, - video_paths: list[str], - ) -> list[list[tuple[str, float]]]: - """Find duplicates using GPU-accelerated frame hash comparison.""" - n = len(video_paths) - logger.info( - "Computing video similarities using GPU for %d videos...", - n, - ) - - combined_hashes: list[str] = [] - valid_paths: list[str] = [] - - for path in video_paths: - info = self.video_infos[path] - if info.frame_hashes: - combined_hashes.append(info.frame_hashes[0]) - valid_paths.append(path) - - if len(combined_hashes) < 2: - return [] - - try: - if _compute_similarity_matrix_gpu is None: - raise RuntimeError("GPU not initialized") - sim_matrix = _compute_similarity_matrix_gpu( - combined_hashes, - self.hash_size, - ) - except Exception as e: - logger.warning( - "GPU computation failed, falling back to CPU: %s", - e, - ) - return self._find_duplicates_cpu(video_paths) - - duplicate_groups: list[list[tuple[str, float]]] = [] - assigned: set[int] = set() - - for i in range(len(valid_paths)): - if i in assigned: - continue - - pre_threshold = max( - 0.5, - self.similarity_threshold - 0.2, - ) - candidate_indices = np.where( - sim_matrix[i] >= pre_threshold, - )[0] - - if len(candidate_indices) <= 1: - continue - - current_group: list[tuple[str, float]] = [ - (valid_paths[i], 1.0), - ] - - for j in candidate_indices: - if j <= i or j in assigned: - continue - - info1 = self.video_infos[valid_paths[i]] - info2 = self.video_infos[valid_paths[j]] - similarity = self.compute_similarity(info1, info2) - - if similarity >= self.similarity_threshold: - current_group.append((valid_paths[j], similarity)) - assigned.add(j) - - if len(current_group) > 1: - assigned.add(i) - duplicate_groups.append(current_group) - - logger.info( - "Found %d groups of duplicates", - len(duplicate_groups), - ) - return duplicate_groups - - def _find_duplicates_cpu( - self, - video_paths: list[str], - ) -> list[list[tuple[str, float]]]: - """Find duplicates using CPU-based comparison.""" - n = len(video_paths) - assigned: set[str] = set() - duplicate_groups: list[list[tuple[str, float]]] = [] - - total_comparisons = n * (n - 1) // 2 - - with tqdm(total=total_comparisons, desc="Comparing") as pbar: - for i in range(n): - if video_paths[i] in assigned: - pbar.update(n - i - 1) - continue - - current_group: list[tuple[str, float]] = [ - (video_paths[i], 1.0), - ] - - for j in range(i + 1, n): - pbar.update(1) - if video_paths[j] in assigned: - continue - - info1 = self.video_infos[video_paths[i]] - info2 = self.video_infos[video_paths[j]] - similarity = self.compute_similarity(info1, info2) - - if similarity >= self.similarity_threshold: - current_group.append( - (video_paths[j], similarity), - ) - assigned.add(video_paths[j]) - - if len(current_group) > 1: - assigned.add(video_paths[i]) - duplicate_groups.append(current_group) - - logger.info( - "Found %d groups of duplicates", - len(duplicate_groups), - ) - return duplicate_groups diff --git a/src/morphic/frontend/__init__.py b/src/morphic/frontend/__init__.py deleted file mode 100644 index d37faeb..0000000 --- a/src/morphic/frontend/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -""" -morphic.frontend — Unified web interface for all morphic modules. - -Usage:: - - morphic - python -m morphic.frontend -""" - -from morphic.frontend.app import create_app, main - -__all__ = [ - "create_app", - "main", -] diff --git a/src/morphic/frontend/__main__.py b/src/morphic/frontend/__main__.py deleted file mode 100644 index e2ab11c..0000000 --- a/src/morphic/frontend/__main__.py +++ /dev/null @@ -1,5 +0,0 @@ -"""Allow running with ``python -m morphic.frontend``.""" - -from morphic.frontend.app import main - -main() diff --git a/src/morphic/frontend/app.py b/src/morphic/frontend/app.py deleted file mode 100644 index e160ff7..0000000 --- a/src/morphic/frontend/app.py +++ /dev/null @@ -1,152 +0,0 @@ -""" -Flask application factory and CLI entry-point for morphic. -""" - -from __future__ import annotations - -import logging -import os -import webbrowser - -from flask import Flask - - -def create_app(initial_folder: str | None = None) -> Flask: - """Create and configure the Flask application. - - Parameters - ---------- - initial_folder : str, optional - Folder path to pre-populate in the UI. - """ - app = Flask( - __name__, - template_folder=os.path.join(os.path.dirname(__file__), "templates"), - static_folder=os.path.join(os.path.dirname(__file__), "static"), - ) - app.secret_key = os.urandom(24) - - # Store initial folder so templates/routes can access it - app.config["INITIAL_FOLDER"] = initial_folder or "" - - # Disable caching during development - @app.after_request - def _no_cache(response): - response.headers["Cache-Control"] = ( - "no-cache, no-store, must-revalidate" - ) - response.headers["Pragma"] = "no-cache" - response.headers["Expires"] = "0" - return response - - # Register blueprints - from morphic.frontend.routes_shared import bp as shared_bp - from morphic.frontend.routes_converter import bp as converter_bp - from morphic.frontend.routes_dupfinder import bp as dupfinder_bp - from morphic.frontend.routes_inspector import bp as inspector_bp - from morphic.frontend.routes_resizer import bp as resizer_bp - from morphic.frontend.routes_organizer import bp as organizer_bp - - app.register_blueprint(shared_bp) - app.register_blueprint(converter_bp, url_prefix="/api/converter") - app.register_blueprint(dupfinder_bp, url_prefix="/api/dupfinder") - app.register_blueprint(inspector_bp, url_prefix="/api/inspector") - app.register_blueprint(resizer_bp, url_prefix="/api/resizer") - app.register_blueprint(organizer_bp, url_prefix="/api/organizer") - - return app - - -def main() -> None: - """CLI entry-point: ``morphic``.""" - import argparse - - parser = argparse.ArgumentParser( - prog="morphic", - description="Morphic — media format converter & duplicate finder", - ) - parser.add_argument( - "--host", - default="127.0.0.1", - help="Host to bind to (default: 127.0.0.1)", - ) - parser.add_argument( - "--port", - type=int, - default=8000, - help="Port to listen on (default: 8000)", - ) - parser.add_argument( - "--folder", - default=None, - help="Pre-populate the folder path in the UI", - ) - parser.add_argument( - "--debug", - action="store_true", - help="Enable Flask debug mode", - ) - parser.add_argument( - "--no-browser", - action="store_true", - help="Don't auto-open the browser", - ) - args = parser.parse_args() - - logging.basicConfig( - level=logging.DEBUG if args.debug else logging.INFO, - format="%(asctime)s - %(levelname)s - %(message)s", - ) - - app = create_app(initial_folder=args.folder) - - url = f"http://{args.host}:{args.port}" - print("\n Morphic") - print(f" {'─' * 34}") - print(f" Open in browser: {url}") - print(" Press Ctrl+C to stop\n") - - browser_opened = False - if not args.no_browser and not args.debug: - try: - browser_opened = webbrowser.open(url) - if not browser_opened: - # fallback to a platform-specific browser if available - for browser_name in [ - "windows-default", - "macosx", - "gnome", - "kde", - "safari", - "firefox", - "chrome", - ]: - try: - if webbrowser.get(browser_name).open(url): - browser_opened = True - break - except Exception: - continue - except Exception as exc: - logging.debug("Could not open browser: %s", exc) - - if not browser_opened: - print( - " Warning: Could not automatically open the browser. Please open the URL manually:" - ) - print(f" {url}\n") - - try: - app.run(host=args.host, port=args.port, debug=args.debug) - except OSError as exc: - if "Address already in use" in str(exc): - print( - f"\n Error: Port {args.port} is already in use.\n" - f" Try: morphic --port {args.port + 1}\n" - ) - else: - raise - - -if __name__ == "__main__": - main() diff --git a/src/morphic/frontend/routes_converter.py b/src/morphic/frontend/routes_converter.py deleted file mode 100644 index 1fc8f82..0000000 --- a/src/morphic/frontend/routes_converter.py +++ /dev/null @@ -1,261 +0,0 @@ -""" -Converter-tab API routes — scan, formats, convert, delete, progress. -""" - -from __future__ import annotations - -import os -import threading -import time -import uuid - -from flask import Blueprint, jsonify, request - -from morphic.converter.constants import IMAGE_CONVERSIONS, VIDEO_CONVERSIONS -from morphic.converter.converter import convert_file -from morphic.converter.scanner import scan_folder -from morphic.shared.utils import format_file_size - -bp = Blueprint("converter", __name__) - -# ── In-memory progress store ─────────────────────────────────────────────── - -_conversion_jobs: dict[str, dict] = {} -_jobs_lock = threading.Lock() - - -# ── Scan ─────────────────────────────────────────────────────────────────── - - -@bp.route("/scan", methods=["POST"]) -def api_scan(): - """Scan a folder for convertible media files.""" - data = request.get_json() - if not data: - return jsonify({"error": "No JSON body"}), 400 - - folder = data.get("folder", "").strip() - if not folder or not os.path.isdir(folder): - return jsonify({"error": f"Invalid folder: {folder}"}), 400 - - include_sub = data.get("include_subfolders", True) - filter_type = data.get("filter_type", "both") - if filter_type not in ("images", "videos", "both"): - filter_type = "both" - - result = scan_folder( - folder, - include_subfolders=include_sub, - filter_type=filter_type, - ) - return jsonify(result) - - -# ── Formats ──────────────────────────────────────────────────────────────── - - -@bp.route("/formats") -def api_formats(): - """Return all conversion mappings (source -> targets).""" - return jsonify( - { - "image": IMAGE_CONVERSIONS, - "video": VIDEO_CONVERSIONS, - } - ) - - -# ── Convert ──────────────────────────────────────────────────────────────── - - -@bp.route("/convert", methods=["POST"]) -def api_convert(): - """Convert files and return progress job id.""" - data = request.get_json() - if not data: - return jsonify({"error": "No JSON body"}), 400 - - files = data.get("files", []) - target_ext = data.get("target_ext", "").strip() - delete_original = bool(data.get("delete_original", False)) - av1_crf = data.get("av1_crf") - - # Safety check: only allow source deletion if explicitly set true (boolean-like via strict cast). - if "delete_original" in data and data.get("delete_original") not in ( - True, - "true", - "True", - 1, - "1", - ): - delete_original = False - - if av1_crf is not None: - try: - av1_crf = int(av1_crf) - except (TypeError, ValueError): - return jsonify({"error": "av1_crf must be an integer"}), 400 - - if not files or not target_ext: - return jsonify({"error": "files and target_ext required"}), 400 - - job_id = str(uuid.uuid4())[:8] - job = { - "id": job_id, - "status": "running", - "total": len(files), - "completed": 0, - "current_file": "", - "results": [], - "error": None, - } - with _jobs_lock: - _conversion_jobs[job_id] = job - - thread = threading.Thread( - target=_run_conversion, - args=(job, files, target_ext, delete_original, av1_crf), - daemon=True, - ) - thread.start() - return jsonify({"job_id": job_id}), 202 - - -def _run_conversion( - job: dict, - files: list[str], - target_ext: str, - delete_original: bool, - av1_crf: int | None = None, -) -> None: - """Background worker for batch conversion.""" - for i, source in enumerate(files): - job["current_file"] = os.path.basename(source) - try: - original_size = ( - os.path.getsize(source) if os.path.isfile(source) else 0 - ) - dest = convert_file(source, target_ext, av1_crf=av1_crf) - - if not dest or not os.path.isfile(dest): - raise RuntimeError( - "Conversion completed but output file is missing" - ) - - new_size = os.path.getsize(dest) - - result = { - "source": source, - "destination": dest, - "status": "ok", - "original_size": original_size, - "new_size": new_size, - "original_size_fmt": format_file_size(original_size), - "new_size_fmt": format_file_size(new_size), - "source_deleted": False, - } - - # Delete original only if explicitly requested and conversion definitely succeeded. - # Avoid deleting source if destination is missing, same as source, or if input was not a real file. - if ( - delete_original - and os.path.isfile(source) - and os.path.isfile(dest) - and os.path.abspath(source) != os.path.abspath(dest) - and new_size > 0 - ): - try: - os.remove(source) - result["source_deleted"] = True - except OSError: - result["source_deleted"] = False - except Exception as e: - result = { - "source": source, - "destination": None, - "status": "error", - "error": str(e), - "source_deleted": False, - } - job["results"].append(result) - job["completed"] = i + 1 - - job["status"] = "done" - job["current_file"] = "" - - -# ── Progress ─────────────────────────────────────────────────────────────── - - -@bp.route("/progress/") -def api_progress(job_id: str): - """Get progress of a conversion job.""" - with _jobs_lock: - job = _conversion_jobs.get(job_id) - if not job: - return jsonify({"error": "Job not found"}), 404 - return jsonify(job) - - -@bp.route("/progress//poll") -def api_progress_poll(job_id: str): - """Long-poll for conversion progress (blocks up to 10s).""" - with _jobs_lock: - job = _conversion_jobs.get(job_id) - if not job: - return jsonify({"error": "Job not found"}), 404 - - # Lightweight polling — wait until progress changes or timeout - last_completed = int(request.args.get("last", -1)) - deadline = time.time() + 10 - while time.time() < deadline: - if job["completed"] != last_completed or job["status"] == "done": - return jsonify(job) - time.sleep(0.3) - return jsonify(job) - - -# ── Delete (standalone) ─────────────────────────────────────────────────── - - -@bp.route("/delete", methods=["POST"]) -def api_delete(): - """Delete selected files.""" - data = request.get_json() - if not data: - return jsonify({"error": "No JSON body"}), 400 - - files = data.get("files", []) - if not files: - return jsonify({"error": "No files specified"}), 400 - - results = [] - total_freed = 0 - - for fp in files: - try: - if not os.path.isfile(fp): - results.append({"path": fp, "status": "not_found"}) - continue - size = os.path.getsize(fp) - os.remove(fp) - total_freed += size - results.append( - { - "path": fp, - "status": "deleted", - "size_freed": size, - } - ) - except PermissionError: - results.append({"path": fp, "status": "permission_denied"}) - except Exception as e: - results.append({"path": fp, "status": "error", "error": str(e)}) - - return jsonify( - { - "results": results, - "total_freed": total_freed, - "total_freed_formatted": format_file_size(total_freed), - } - ) diff --git a/src/morphic/frontend/routes_dupfinder.py b/src/morphic/frontend/routes_dupfinder.py deleted file mode 100644 index 47124c8..0000000 --- a/src/morphic/frontend/routes_dupfinder.py +++ /dev/null @@ -1,162 +0,0 @@ -""" -Dupfinder-tab API routes — scan, status, results, delete. -""" - -from __future__ import annotations - -import os -import time - -from flask import Blueprint, jsonify, request - -from morphic.shared.constants import ( - DEFAULT_IMAGE_THRESHOLD, - DEFAULT_VIDEO_THRESHOLD, -) -from morphic.shared.utils import format_file_size -from morphic.dupfinder.scanner import get_job, start_job - -bp = Blueprint("dupfinder", __name__) - - -# ── Scan ─────────────────────────────────────────────────────────────────── - - -@bp.route("/scan", methods=["POST"]) -def api_start_scan(): - """Start a new duplicate scan.""" - data = request.get_json() - if not data: - return jsonify({"error": "No JSON body"}), 400 - - folder = data.get("folder", "").strip() - scan_type = data.get("type", "both") - image_threshold = float( - data.get("image_threshold", DEFAULT_IMAGE_THRESHOLD), - ) - video_threshold = float( - data.get("video_threshold", DEFAULT_VIDEO_THRESHOLD), - ) - - if not folder or not os.path.isdir(folder): - return jsonify({"error": f"Invalid folder: {folder}"}), 400 - if scan_type not in ("images", "videos", "both"): - return jsonify( - { - "error": "type must be images, videos, or both", - } - ), 400 - - job_id = start_job( - folder=folder, - scan_type=scan_type, - image_threshold=image_threshold, - video_threshold=video_threshold, - ) - return jsonify({"job_id": job_id}), 202 - - -# ── Status & results ────────────────────────────────────────────────────── - - -@bp.route("/scan//status") -def api_scan_status(job_id: str): - """Get status of a scan job.""" - job = get_job(job_id) - if not job: - return jsonify({"error": "Job not found"}), 404 - - elapsed = 0.0 - if job.started_at: - end = job.finished_at if job.finished_at else time.time() - elapsed = end - job.started_at - - return jsonify( - { - "id": job.id, - "status": job.status, - "progress": job.progress, - "message": job.message, - "error": job.error, - "total_files_found": job.total_files_found, - "total_files_processed": job.total_files_processed, - "elapsed_seconds": round(elapsed, 1), - } - ) - - -@bp.route("/scan//results") -def api_scan_results(job_id: str): - """Get results of a completed scan.""" - job = get_job(job_id) - if not job: - return jsonify({"error": "Job not found"}), 404 - - if job.status not in ("done", "error"): - return jsonify({"error": "Scan not finished yet"}), 409 - - return jsonify( - { - "image_groups": job.image_groups, - "video_groups": job.video_groups, - "space_savings": job.space_savings, - "space_savings_formatted": format_file_size(job.space_savings), - } - ) - - -# ── Delete ───────────────────────────────────────────────────────────────── - - -@bp.route("/delete", methods=["POST"]) -def api_delete_files(): - """Delete selected duplicate files.""" - data = request.get_json() - if not data: - return jsonify({"error": "No JSON body"}), 400 - - files = data.get("files", []) - if not files: - return jsonify({"error": "No files specified"}), 400 - - results = [] - total_freed = 0 - - for file_path in files: - try: - if not os.path.isfile(file_path): - results.append({"path": file_path, "status": "not_found"}) - continue - file_size = os.path.getsize(file_path) - os.remove(file_path) - total_freed += file_size - results.append( - { - "path": file_path, - "status": "deleted", - "size_freed": file_size, - } - ) - except PermissionError: - results.append( - { - "path": file_path, - "status": "permission_denied", - } - ) - except Exception as e: - results.append( - { - "path": file_path, - "status": "error", - "error": str(e), - } - ) - - return jsonify( - { - "results": results, - "total_freed": total_freed, - "total_freed_formatted": format_file_size(total_freed), - } - ) diff --git a/src/morphic/frontend/routes_inspector.py b/src/morphic/frontend/routes_inspector.py deleted file mode 100644 index a1f9a71..0000000 --- a/src/morphic/frontend/routes_inspector.py +++ /dev/null @@ -1,136 +0,0 @@ -""" -Inspector-tab API routes — EXIF scan, integrity check, edit, strip. -""" - -from __future__ import annotations - -import os -import time - -from flask import Blueprint, jsonify, request - -from morphic.inspector.scanner import get_job, start_job - -bp = Blueprint("inspector", __name__) - - -# ── Scan (EXIF or Integrity) ────────────────────────────────────────────── - - -@bp.route("/scan", methods=["POST"]) -def api_start_scan(): - """Start a new inspector scan (EXIF or integrity).""" - data = request.get_json() - if not data: - return jsonify({"error": "No JSON body"}), 400 - - folder = data.get("folder", "").strip() - mode = data.get("mode", "exif") - - if not folder or not os.path.isdir(folder): - return jsonify({"error": f"Invalid folder: {folder}"}), 400 - if mode not in ("exif", "integrity"): - return jsonify({"error": "mode must be 'exif' or 'integrity'"}), 400 - - job_id = start_job(folder=folder, mode=mode) - return jsonify({"job_id": job_id}), 202 - - -@bp.route("/scan//status") -def api_scan_status(job_id: str): - """Get status of an inspector job.""" - job = get_job(job_id) - if not job: - return jsonify({"error": "Job not found"}), 404 - - elapsed = 0.0 - if job.started_at: - end = job.finished_at if job.finished_at else time.time() - elapsed = end - job.started_at - - return jsonify( - { - "id": job.id, - "status": job.status, - "mode": job.mode, - "progress": job.progress, - "message": job.message, - "error": job.error, - "total_files": job.total_files, - "processed_files": job.processed_files, - "elapsed_seconds": round(elapsed, 1), - } - ) - - -@bp.route("/scan//results") -def api_scan_results(job_id: str): - """Get results of a completed inspector job.""" - job = get_job(job_id) - if not job: - return jsonify({"error": "Job not found"}), 404 - - if job.status not in ("done", "error"): - return jsonify({"error": "Scan not finished yet"}), 409 - - return jsonify( - { - "mode": job.mode, - "results": job.results, - "total_files": job.total_files, - } - ) - - -# ── EXIF Edit ────────────────────────────────────────────────────────────── - - -@bp.route("/exif/edit", methods=["POST"]) -def api_exif_edit(): - """Edit EXIF fields on a single file.""" - from morphic.inspector.exif import edit_exif - - data = request.get_json() - if not data: - return jsonify({"error": "No JSON body"}), 400 - - file_path = data.get("file", "").strip() - updates = data.get("updates", {}) - - if not file_path or not os.path.isfile(file_path): - return jsonify({"error": f"Invalid file: {file_path}"}), 400 - if not updates: - return jsonify({"error": "No updates provided"}), 400 - - try: - edit_exif(file_path, updates) - return jsonify({"status": "ok", "file": file_path}) - except Exception as e: - return jsonify({"error": str(e)}), 500 - - -# ── EXIF Strip ───────────────────────────────────────────────────────────── - - -@bp.route("/exif/strip", methods=["POST"]) -def api_exif_strip(): - """Strip EXIF from one or more files.""" - from morphic.inspector.exif import strip_exif_batch - - data = request.get_json() - if not data: - return jsonify({"error": "No JSON body"}), 400 - - files = data.get("files", []) - if not files: - return jsonify({"error": "No files specified"}), 400 - - results = strip_exif_batch(files) - success_count = sum(1 for r in results.values() if r.get("success")) - return jsonify( - { - "results": results, - "total": len(files), - "success_count": success_count, - } - ) diff --git a/src/morphic/frontend/routes_organizer.py b/src/morphic/frontend/routes_organizer.py deleted file mode 100644 index ea3c43f..0000000 --- a/src/morphic/frontend/routes_organizer.py +++ /dev/null @@ -1,106 +0,0 @@ -""" -Organizer-tab API routes — plan, execute, status for sort & rename. -""" - -from __future__ import annotations - -import os -import time - -from flask import Blueprint, jsonify, request - -from morphic.organizer.scanner import execute_job, get_job, start_job - -bp = Blueprint("organizer", __name__) - - -@bp.route("/plan", methods=["POST"]) -def api_plan(): - """Create a sort/rename plan (does not execute).""" - data = request.get_json() - if not data: - return jsonify({"error": "No JSON body"}), 400 - - folder = data.get("folder", "").strip() - mode = data.get("mode", "sort") - operation = data.get("operation", "copy") - template = data.get("template", "{year}/{month}/{day}") - destination = data.get("destination", "").strip() or None - start_seq = int(data.get("start_seq", 1)) - - if not folder or not os.path.isdir(folder): - return jsonify({"error": f"Invalid folder: {folder}"}), 400 - if mode not in ("sort", "rename"): - return jsonify({"error": "mode must be 'sort' or 'rename'"}), 400 - if operation not in ("move", "copy"): - return jsonify({"error": "operation must be 'move' or 'copy'"}), 400 - - job_id = start_job( - folder=folder, - mode=mode, - operation=operation, - template=template, - destination=destination, - start_seq=start_seq, - ) - return jsonify({"job_id": job_id}), 202 - - -@bp.route("/execute", methods=["POST"]) -def api_execute(): - """Execute a previously planned job.""" - data = request.get_json() - if not data: - return jsonify({"error": "No JSON body"}), 400 - - job_id = data.get("job_id", "").strip() - if not job_id: - return jsonify({"error": "job_id required"}), 400 - - ok = execute_job(job_id) - if not ok: - return jsonify( - { - "error": "Job not found or not in 'planned' state", - } - ), 404 - - return jsonify({"status": "executing", "job_id": job_id}), 202 - - -@bp.route("/status/") -def api_status(job_id: str): - """Get status of an organizer job.""" - job = get_job(job_id) - if not job: - return jsonify({"error": "Job not found"}), 404 - - elapsed = 0.0 - if job.started_at: - end = job.finished_at if job.finished_at else time.time() - elapsed = end - job.started_at - - response = { - "id": job.id, - "status": job.status, - "phase": job.phase, - "mode": job.mode, - "operation": job.operation, - "progress": job.progress, - "message": job.message, - "error": job.error, - "elapsed_seconds": round(elapsed, 1), - } - - # Include plan preview when planning is done - if job.phase in ("planned", "executing", "done"): - response["plan"] = job.plan - response["plan_count"] = len(job.plan) - conflicts = sum(1 for e in job.plan if e.get("conflict")) - response["conflicts"] = conflicts - - # Include execution results when done - if job.phase == "done" and job.execution_result: - response["execution"] = job.execution_result - - return jsonify(response) diff --git a/src/morphic/frontend/routes_resizer.py b/src/morphic/frontend/routes_resizer.py deleted file mode 100644 index b3f62f3..0000000 --- a/src/morphic/frontend/routes_resizer.py +++ /dev/null @@ -1,107 +0,0 @@ -""" -Resizer-tab API routes — scan, status, results. -""" - -from __future__ import annotations - -import os -import time - -from flask import Blueprint, jsonify, request - -from morphic.resizer.operations import RESIZE_MODES -from morphic.resizer.scanner import get_job, start_job - -bp = Blueprint("resizer", __name__) - - -@bp.route("/scan", methods=["POST"]) -def api_start_resize(): - """Start a batch resize job.""" - data = request.get_json() - if not data: - return jsonify({"error": "No JSON body"}), 400 - - folder = data.get("folder", "").strip() - width = data.get("width", 0) - height = data.get("height", 0) - mode = data.get("mode", "fit") - bg_color = data.get("bg_color", "#000000") - quality = data.get("quality", 90) - output_folder = data.get("output_folder", "").strip() or None - - if not folder or not os.path.isdir(folder): - return jsonify({"error": f"Invalid folder: {folder}"}), 400 - - try: - width = int(width) - height = int(height) - except (TypeError, ValueError): - return jsonify({"error": "width and height must be integers"}), 400 - - if width <= 0 or height <= 0: - return jsonify({"error": "width and height must be positive"}), 400 - - if mode not in RESIZE_MODES: - return jsonify( - { - "error": f"mode must be one of {RESIZE_MODES}", - } - ), 400 - - job_id = start_job( - folder=folder, - width=width, - height=height, - mode=mode, - output_folder=output_folder, - bg_color=bg_color, - quality=quality, - ) - return jsonify({"job_id": job_id}), 202 - - -@bp.route("/scan//status") -def api_scan_status(job_id: str): - """Get status of a resize job.""" - job = get_job(job_id) - if not job: - return jsonify({"error": "Job not found"}), 404 - - elapsed = 0.0 - if job.started_at: - end = job.finished_at if job.finished_at else time.time() - elapsed = end - job.started_at - - return jsonify( - { - "id": job.id, - "status": job.status, - "progress": job.progress, - "message": job.message, - "error": job.error, - "total_files": job.total_files, - "processed_files": job.processed_files, - "elapsed_seconds": round(elapsed, 1), - } - ) - - -@bp.route("/scan//results") -def api_scan_results(job_id: str): - """Get results of a completed resize job.""" - job = get_job(job_id) - if not job: - return jsonify({"error": "Job not found"}), 404 - - if job.status not in ("done", "error"): - return jsonify({"error": "Job not finished yet"}), 409 - - return jsonify( - { - "results": job.results, - "errors": job.errors, - "total_files": job.total_files, - "processed_files": job.processed_files, - } - ) diff --git a/src/morphic/frontend/routes_shared.py b/src/morphic/frontend/routes_shared.py deleted file mode 100644 index d4edd32..0000000 --- a/src/morphic/frontend/routes_shared.py +++ /dev/null @@ -1,295 +0,0 @@ -""" -Shared routes — index page, folder browsing, thumbnail & media serving. -""" - -from __future__ import annotations - -import importlib -import mimetypes -import os -import shutil -import subprocess -import sys -from pathlib import Path - -from flask import ( - Blueprint, - abort, - current_app, - jsonify, - render_template, - request, - send_file, -) - -from morphic.shared.constants import IMAGE_EXTENSIONS, VIDEO_EXTENSIONS -from morphic.shared.file_browser import open_native_folder_dialog -from morphic.shared.thumbnails import ( - generate_image_thumbnail, - generate_video_thumbnail, -) -from morphic.shared.utils import normalise_ext - -bp = Blueprint("shared", __name__) - - -# ── Page ──────────────────────────────────────────────────────────────────── - - -@bp.route("/") -def index(): - """Serve the single-page application.""" - return render_template( - "index.html", - initial_folder=current_app.config.get("INITIAL_FOLDER", ""), - ) - - -# ── Directory browsing ───────────────────────────────────────────────────── - - -@bp.route("/api/browse") -def browse_directory(): - """List directories for the in-page folder browser.""" - path = request.args.get("path", str(Path.home())) - try: - path = os.path.expanduser(path) - path = os.path.abspath(path) - - if not os.path.isdir(path): - return jsonify({"error": "Not a directory"}), 400 - - entries = [] - try: - for entry in sorted( - os.scandir(path), - key=lambda e: e.name.lower(), - ): - if entry.name.startswith("."): - continue - if entry.is_dir(follow_symlinks=False): - entries.append( - { - "name": entry.name, - "path": entry.path, - "type": "directory", - } - ) - except PermissionError: - pass - - parent = os.path.dirname(path) - return jsonify( - { - "current": path, - "parent": parent if parent != path else None, - "entries": entries, - } - ) - except Exception as e: - return jsonify({"error": str(e)}), 500 - - -@bp.route("/api/browse/native", methods=["POST"]) -def native_folder_dialog(): - """Open the OS-native folder picker dialog.""" - data = request.get_json(silent=True) or {} - initial_dir = data.get("initial_dir", str(Path.home())) - folder = open_native_folder_dialog(initial_dir) - if folder: - return jsonify({"folder": folder}) - return jsonify( - { - "folder": None, - "message": "Dialog cancelled or unavailable", - } - ), 200 - - -@bp.route("/api/system_info") -def api_system_info(): - """Return diagnostic info about GPU/cuda/ffmpeg availability.""" - info = { - "python_version": sys.version, - "torch": { - "installed": False, - "version": None, - "cuda_available": False, - "cuda_device_count": 0, - "device_names": [], - }, - "cupy": { - "installed": False, - "version": None, - "device_count": 0, - }, - "pyopencl": { - "installed": False, - "platforms": [], - }, - "ffmpeg": { - "installed": shutil.which("ffmpeg") is not None, - "hwaccels": [], - "encoders": [], - "nvenc_available": False, - }, - "duplicity_acceleration": { - "backend": "unknown", - "gpu_available": False, - }, - } - - try: - torch = importlib.import_module("torch") - - info["torch"].update( - { - "installed": True, - "version": getattr(torch, "__version__", None), - "cuda_available": bool( - getattr( - getattr(torch, "cuda", None), - "is_available", - lambda: False, - )() - ), - "cuda_device_count": torch.cuda.device_count() - if getattr( - getattr(torch, "cuda", None), "is_available", lambda: False - )() - else 0, - "device_names": [ - torch.cuda.get_device_name(i) - for i in range(torch.cuda.device_count()) - ] - if getattr( - getattr(torch, "cuda", None), "is_available", lambda: False - )() - else [], - } - ) - except Exception: - pass - - try: - cp = importlib.import_module("cupy") - - info["cupy"].update( - { - "installed": True, - "version": getattr(cp, "__version__", None), - "device_count": cp.cuda.runtime.getDeviceCount(), - } - ) - except Exception: - pass - - try: - cl = importlib.import_module("pyopencl") - - platforms = [] - for plat in cl.get_platforms(): - devices = [ - dev.name - for dev in plat.get_devices(device_type=cl.device_type.GPU) - ] - platforms.append( - {"name": plat.name, "vendor": plat.vendor, "devices": devices} - ) - info["pyopencl"].update({"installed": True, "platforms": platforms}) - except Exception: - pass - - if info["ffmpeg"]["installed"]: - try: - hw = subprocess.check_output( - ["ffmpeg", "-hide_banner", "-hwaccels"], - stderr=subprocess.STDOUT, - text=True, - timeout=10, - ) - info["ffmpeg"]["hwaccels"] = [ - line.strip() - for line in hw.splitlines() - if line.strip() and line.strip().isdigit() is False - ] - except Exception: - pass - - try: - enc = subprocess.check_output( - ["ffmpeg", "-hide_banner", "-encoders"], - stderr=subprocess.STDOUT, - text=True, - timeout=15, - ) - lines = [ - line.strip() - for line in enc.splitlines() - if line.strip() and line.strip()[0] in ("V", "A") - ] - info["ffmpeg"]["encoders"] = lines - nvenc = [ - line - for line in lines - if "nvenc" in line - or "h264_nvenc" in line - or "hevc_nvenc" in line - ] - info["ffmpeg"]["nvenc_available"] = bool(nvenc) - except Exception: - pass - - try: - from morphic.dupfinder.accelerator import get_accelerator - - acc = get_accelerator() - info["duplicity_acceleration"]["backend"] = acc.get_backend_name() - info["duplicity_acceleration"]["gpu_available"] = acc.is_gpu_available - except Exception: - pass - - return jsonify(info) - - -# ── Thumbnails & media ───────────────────────────────────────────────────── - - -@bp.route("/api/thumbnail") -def serve_thumbnail(): - """Generate and serve a thumbnail for a media file.""" - file_path = request.args.get("path", "") - if not file_path or not os.path.isfile(file_path): - abort(404) - - ext = normalise_ext(os.path.splitext(file_path)[1]) - - try: - if ext in IMAGE_EXTENSIONS: - buf = generate_image_thumbnail(file_path) - return send_file(buf, mimetype="image/jpeg") - elif ext in VIDEO_EXTENSIONS: - buf = generate_video_thumbnail(file_path) - if buf: - return send_file(buf, mimetype="image/jpeg") - abort(404) - else: - abort(403) - except Exception: - abort(500) - - -@bp.route("/api/media") -def serve_media(): - """Serve a media file for full-size preview.""" - file_path = request.args.get("path", "") - if not file_path or not os.path.isfile(file_path): - abort(404) - - ext = normalise_ext(os.path.splitext(file_path)[1]) - allowed = IMAGE_EXTENSIONS | VIDEO_EXTENSIONS - if ext not in allowed: - abort(403) - - mimetype = mimetypes.guess_type(file_path)[0] or "application/octet-stream" - return send_file(file_path, mimetype=mimetype) diff --git a/src/morphic/inspector/__init__.py b/src/morphic/inspector/__init__.py deleted file mode 100644 index aa32505..0000000 --- a/src/morphic/inspector/__init__.py +++ /dev/null @@ -1,31 +0,0 @@ -""" -morphic.inspector - EXIF metadata inspection and file integrity checking. - -Provides EXIF read/edit/strip operations for images, and integrity -validation for both images and videos. -""" - -from morphic.inspector.exif import ( - edit_exif, - read_exif, - strip_exif, - strip_exif_batch, -) -from morphic.inspector.integrity import ( - check_files, - check_image, - check_video, -) -from morphic.inspector.scanner import get_job, start_job - -__all__ = [ - "check_files", - "check_image", - "check_video", - "edit_exif", - "get_job", - "read_exif", - "start_job", - "strip_exif", - "strip_exif_batch", -] diff --git a/src/morphic/inspector/exif.py b/src/morphic/inspector/exif.py deleted file mode 100644 index 1fa2fa9..0000000 --- a/src/morphic/inspector/exif.py +++ /dev/null @@ -1,219 +0,0 @@ -""" -EXIF metadata operations — read, edit, and strip. - -Uses piexif for read/write and Pillow's ExifTags for human-readable -tag name mapping. -""" - -from __future__ import annotations - -import logging -import os -from typing import Any - -import piexif -from PIL import Image -from PIL.ExifTags import GPSTAGS, TAGS - -logger = logging.getLogger(__name__) - -# ── Tag name mapping ─────────────────────────────────────────────────────── - -# Build reverse lookup: human name → (ifd_name, tag_id) -_NAME_TO_TAG: dict[str, tuple[str, int]] = {} -_IFD_KEYS = { - "0th": piexif.ImageIFD, - "Exif": piexif.ExifIFD, - "GPS": piexif.GPSIFD, - "1st": piexif.ImageIFD, -} - -for _ifd_name, _ifd_module in [ - ("0th", piexif.ImageIFD), - ("Exif", piexif.ExifIFD), - ("GPS", piexif.GPSIFD), -]: - for _attr in dir(_ifd_module): - if _attr.startswith("_"): - continue - _tag_id = getattr(_ifd_module, _attr) - if isinstance(_tag_id, int): - _human = TAGS.get(_tag_id, _attr) - _NAME_TO_TAG[_human] = (_ifd_name, _tag_id) - - -def _decode_value(value: Any) -> Any: - """Decode piexif byte values to strings where possible.""" - if isinstance(value, bytes): - try: - return value.decode("utf-8").rstrip("\x00") - except (UnicodeDecodeError, AttributeError): - return value.hex() - if isinstance(value, tuple) and len(value) == 2: - # Rational number (numerator, denominator) - num, den = value - if isinstance(num, int) and isinstance(den, int) and den != 0: - return round(num / den, 6) - return value - - -def _gps_to_decimal( - coords: tuple[tuple[int, int], ...], - ref: str, -) -> float: - """Convert GPS DMS (degrees/minutes/seconds) to decimal degrees.""" - degrees = coords[0][0] / coords[0][1] if coords[0][1] else 0 - minutes = coords[1][0] / coords[1][1] if coords[1][1] else 0 - seconds = coords[2][0] / coords[2][1] if coords[2][1] else 0 - decimal = degrees + minutes / 60 + seconds / 3600 - if ref in ("S", "W"): - decimal = -decimal - return round(decimal, 6) - - -# ── Public API ───────────────────────────────────────────────────────────── - - -def read_exif(path: str) -> dict[str, Any]: - """Read EXIF metadata from an image file. - - Parameters - ---------- - path : str - Path to the image file. - - Returns - ------- - dict - Flat dictionary of human-readable tag names to values. - Includes ``_gps_lat`` and ``_gps_lng`` if GPS data is present. - """ - if not os.path.isfile(path): - raise FileNotFoundError(f"File not found: {path}") - - try: - exif_dict = piexif.load(path) - except piexif.InvalidImageDataError: - # File exists but has no EXIF - return {} - except Exception: - # Try via Pillow as fallback - try: - img = Image.open(path) - exif_bytes = img.info.get("exif", b"") - if not exif_bytes: - return {} - exif_dict = piexif.load(exif_bytes) - except Exception: - return {} - - result: dict[str, Any] = {} - - for ifd_name in ("0th", "Exif", "1st"): - ifd_data = exif_dict.get(ifd_name, {}) - if not ifd_data: - continue - for tag_id, value in ifd_data.items(): - tag_name = TAGS.get(tag_id, f"Tag_{tag_id}") - result[tag_name] = _decode_value(value) - - # GPS data — special handling for lat/lng - gps_data = exif_dict.get("GPS", {}) - if gps_data: - for tag_id, value in gps_data.items(): - tag_name = GPSTAGS.get(tag_id, f"GPSTag_{tag_id}") - result[tag_name] = _decode_value(value) - - # Compute decimal coordinates - lat_data = gps_data.get(piexif.GPSIFD.GPSLatitude) - lat_ref = gps_data.get(piexif.GPSIFD.GPSLatitudeRef, b"N") - lng_data = gps_data.get(piexif.GPSIFD.GPSLongitude) - lng_ref = gps_data.get(piexif.GPSIFD.GPSLongitudeRef, b"E") - - if lat_data and lng_data: - if isinstance(lat_ref, bytes): - lat_ref = lat_ref.decode() - if isinstance(lng_ref, bytes): - lng_ref = lng_ref.decode() - result["_gps_lat"] = _gps_to_decimal(lat_data, lat_ref) - result["_gps_lng"] = _gps_to_decimal(lng_data, lng_ref) - - return result - - -def edit_exif(path: str, updates: dict[str, Any]) -> None: - """Edit EXIF fields on an image file in-place. - - Parameters - ---------- - path : str - Path to the image file. - updates : dict - Mapping of human-readable tag names to new values. - Example: ``{"Artist": "Alice", "Copyright": "2026"}`` - """ - if not os.path.isfile(path): - raise FileNotFoundError(f"File not found: {path}") - - try: - exif_dict = piexif.load(path) - except Exception: - # Start with empty EXIF - exif_dict = {"0th": {}, "Exif": {}, "GPS": {}, "1st": {}} - - for name, value in updates.items(): - tag_info = _NAME_TO_TAG.get(name) - if not tag_info: - logger.warning("Unknown EXIF tag name: %s", name) - continue - - ifd_name, tag_id = tag_info - # Encode string values to bytes - if isinstance(value, str): - value = value.encode("utf-8") - - if ifd_name not in exif_dict or exif_dict[ifd_name] is None: - exif_dict[ifd_name] = {} - exif_dict[ifd_name][tag_id] = value - - exif_bytes = piexif.dump(exif_dict) - piexif.insert(exif_bytes, path) - - -def strip_exif(path: str) -> None: - """Remove all EXIF metadata from an image file. - - Parameters - ---------- - path : str - Path to the image file. - """ - if not os.path.isfile(path): - raise FileNotFoundError(f"File not found: {path}") - - piexif.remove(path) - - -def strip_exif_batch( - paths: list[str], -) -> dict[str, dict[str, str | bool]]: - """Strip EXIF from multiple files. - - Parameters - ---------- - paths : list[str] - List of image file paths. - - Returns - ------- - dict - Per-file results: ``{"path": {"success": True/False, "error": ...}}`` - """ - results: dict[str, dict[str, str | bool]] = {} - for path in paths: - try: - strip_exif(path) - results[path] = {"success": True} - except Exception as e: - results[path] = {"success": False, "error": str(e)} - return results diff --git a/src/morphic/inspector/integrity.py b/src/morphic/inspector/integrity.py deleted file mode 100644 index ac95a21..0000000 --- a/src/morphic/inspector/integrity.py +++ /dev/null @@ -1,219 +0,0 @@ -""" -File integrity checking for images and videos. - -Uses Pillow's ``verify()`` / ``load()`` for images and ``ffprobe`` -for videos. -""" - -from __future__ import annotations - -import logging -import os -import subprocess -from concurrent.futures import ThreadPoolExecutor, as_completed - -from PIL import Image - -from morphic.shared.constants import IMAGE_EXTENSIONS, VIDEO_EXTENSIONS -from morphic.shared.utils import ( - find_files_by_extension, - format_file_size, - is_image, - is_video, -) - -logger = logging.getLogger(__name__) - - -def check_image(path: str) -> dict: - """Validate an image file's integrity. - - Parameters - ---------- - path : str - Path to the image file. - - Returns - ------- - dict - ``{"path", "valid", "error", "size", "size_formatted", - "width", "height", "format"}`` - """ - result: dict = { - "path": path, - "valid": False, - "error": None, - "size": 0, - "size_formatted": "0 B", - "width": 0, - "height": 0, - "format": None, - "type": "image", - } - - if not os.path.isfile(path): - result["error"] = "File not found" - return result - - result["size"] = os.path.getsize(path) - result["size_formatted"] = format_file_size(result["size"]) - - if result["size"] == 0: - result["error"] = "Zero-byte file" - return result - - try: - # First pass: verify structure - img = Image.open(path) - result["format"] = img.format - result["width"] = img.width - result["height"] = img.height - img.verify() - - # Second pass: actually decode all pixels - img = Image.open(path) - img.load() - - result["valid"] = True - except Exception as e: - result["error"] = str(e) - - return result - - -def check_video(path: str) -> dict: - """Validate a video file using ffprobe. - - Parameters - ---------- - path : str - Path to the video file. - - Returns - ------- - dict - ``{"path", "valid", "error", "size", "size_formatted", - "width", "height", "duration", "codec"}`` - """ - result: dict = { - "path": path, - "valid": False, - "error": None, - "size": 0, - "size_formatted": "0 B", - "width": 0, - "height": 0, - "duration": 0.0, - "codec": None, - "type": "video", - } - - if not os.path.isfile(path): - result["error"] = "File not found" - return result - - result["size"] = os.path.getsize(path) - result["size_formatted"] = format_file_size(result["size"]) - - if result["size"] == 0: - result["error"] = "Zero-byte file" - return result - - try: - cmd = [ - "ffprobe", - "-v", - "error", - "-select_streams", - "v:0", - "-show_entries", - "stream=codec_name,width,height,duration", - "-of", - "csv=p=0", - path, - ] - proc = subprocess.run( - cmd, - capture_output=True, - text=True, - timeout=30, - ) - - if proc.returncode != 0: - stderr = proc.stderr.strip() - result["error"] = stderr or f"ffprobe exit code {proc.returncode}" - return result - - output = proc.stdout.strip() - if not output: - result["error"] = "No video stream found" - return result - - parts = output.split(",") - if len(parts) >= 1: - result["codec"] = parts[0] - if len(parts) >= 2: - result["width"] = int(parts[1]) if parts[1] else 0 - if len(parts) >= 3: - result["height"] = int(parts[2]) if parts[2] else 0 - if len(parts) >= 4 and parts[3]: - try: - result["duration"] = float(parts[3]) - except ValueError: - pass - - result["valid"] = True - - except FileNotFoundError: - result["error"] = "ffprobe not found (install ffmpeg)" - except subprocess.TimeoutExpired: - result["error"] = "ffprobe timed out" - except Exception as e: - result["error"] = str(e) - - return result - - -def check_files( - folder: str, - max_workers: int = 4, -) -> list[dict]: - """Check integrity of all media files in a folder. - - Parameters - ---------- - folder : str - Root folder to scan. - max_workers : int - Number of threads for parallel checking. - - Returns - ------- - list[dict] - Per-file integrity results. - """ - all_ext = IMAGE_EXTENSIONS | VIDEO_EXTENSIONS - paths = find_files_by_extension(folder, all_ext) - results: list[dict] = [] - - with ThreadPoolExecutor(max_workers=max_workers) as pool: - futures = {} - for path in paths: - if is_image(path): - futures[pool.submit(check_image, path)] = path - elif is_video(path): - futures[pool.submit(check_video, path)] = path - - for future in as_completed(futures): - try: - results.append(future.result()) - except Exception as e: - results.append( - { - "path": futures[future], - "valid": False, - "error": str(e), - } - ) - - return sorted(results, key=lambda r: r["path"]) diff --git a/src/morphic/inspector/scanner.py b/src/morphic/inspector/scanner.py deleted file mode 100644 index 0f0d6eb..0000000 --- a/src/morphic/inspector/scanner.py +++ /dev/null @@ -1,169 +0,0 @@ -""" -Background scan job management for the inspector module. - -Handles EXIF scanning and integrity checking in background threads. -""" - -from __future__ import annotations - -import logging -import os -import threading -import time -import uuid -from dataclasses import dataclass, field - -from morphic.shared.constants import ALL_EXTENSIONS, IMAGE_EXTENSIONS -from morphic.shared.utils import ( - find_files_by_extension, - format_duration, - is_image, - is_video, -) - -logger = logging.getLogger(__name__) - - -@dataclass -class ScanJob: - """Represents a running or completed inspector job.""" - - id: str - folder: str - mode: str # "exif" or "integrity" - status: str = "pending" - progress: float = 0.0 - message: str = "" - error: str | None = None - results: list[dict] = field(default_factory=list) - total_files: int = 0 - processed_files: int = 0 - started_at: float = 0.0 - finished_at: float = 0.0 - - -# ── Job Registry ─────────────────────────────────────────────────────────── - -_jobs: dict[str, ScanJob] = {} -_lock = threading.Lock() - - -def get_job(job_id: str) -> ScanJob | None: - """Retrieve a scan job by ID.""" - with _lock: - return _jobs.get(job_id) - - -def start_job(folder: str, mode: str) -> str: - """Create and launch a new inspector job. Returns the job ID.""" - job_id = str(uuid.uuid4())[:8] - job = ScanJob(id=job_id, folder=folder, mode=mode) - with _lock: - _jobs[job_id] = job - - thread = threading.Thread(target=_run_scan, args=(job,), daemon=True) - thread.start() - return job_id - - -def _run_scan(job: ScanJob) -> None: - """Execute the inspector scan in a background thread.""" - try: - job.status = "scanning" - job.started_at = time.time() - - # Determine extensions to look for - extensions = IMAGE_EXTENSIONS if job.mode == "exif" else ALL_EXTENSIONS - job.message = f"Scanning folder: {job.folder}" - paths = find_files_by_extension(job.folder, extensions) - job.total_files = len(paths) - - if not paths: - job.status = "done" - job.progress = 1.0 - job.finished_at = time.time() - job.message = "No files found." - return - - job.status = "processing" - - if job.mode == "exif": - _scan_exif(job, paths) - else: - _scan_integrity(job, paths) - - job.status = "done" - job.progress = 1.0 - job.finished_at = time.time() - elapsed = job.finished_at - job.started_at - job.message = ( - f"Done! Processed {job.processed_files} files " - f"in {format_duration(elapsed)}." - ) - - except Exception as e: - logger.exception("Inspector scan failed") - job.status = "error" - job.error = str(e) - job.message = f"Error: {e}" - job.finished_at = time.time() - - -def _scan_exif(job: ScanJob, paths: list[str]) -> None: - """Read EXIF from all image files.""" - from morphic.inspector.exif import read_exif - - for i, path in enumerate(paths): - if not is_image(path): - continue - try: - exif = read_exif(path) - job.results.append( - { - "path": path, - "filename": os.path.basename(path), - "directory": os.path.dirname(path), - "exif": exif, - "has_exif": bool(exif), - "has_gps": "_gps_lat" in exif, - } - ) - except Exception as e: - job.results.append( - { - "path": path, - "filename": os.path.basename(path), - "directory": os.path.dirname(path), - "exif": {}, - "has_exif": False, - "has_gps": False, - "error": str(e), - } - ) - job.processed_files = i + 1 - job.progress = (i + 1) / job.total_files - job.message = f"Reading EXIF: {i + 1}/{job.total_files}" - - -def _scan_integrity(job: ScanJob, paths: list[str]) -> None: - """Check integrity of all media files.""" - from morphic.inspector.integrity import check_image, check_video - - for i, path in enumerate(paths): - if is_image(path): - result = check_image(path) - elif is_video(path): - result = check_video(path) - else: - result = { - "path": path, - "valid": False, - "error": "Unknown file type", - "type": "unknown", - } - result["filename"] = os.path.basename(path) - result["directory"] = os.path.dirname(path) - job.results.append(result) - job.processed_files = i + 1 - job.progress = (i + 1) / job.total_files - job.message = f"Checking: {i + 1}/{job.total_files}" diff --git a/src/morphic/organizer/__init__.py b/src/morphic/organizer/__init__.py deleted file mode 100644 index cfd83f1..0000000 --- a/src/morphic/organizer/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -""" -morphic.organizer - File organisation: date sorting and batch renaming. - -Provides plan→preview→execute workflows for moving or copying media -files into date-based folder structures or with new naming patterns. -""" - -from morphic.organizer.date_sorter import ( - execute_sort, - get_file_date, - plan_sort, -) -from morphic.organizer.renamer import ( - execute_rename, - plan_rename, -) -from morphic.organizer.scanner import get_job, start_job - -__all__ = [ - "execute_rename", - "execute_sort", - "get_file_date", - "get_job", - "plan_rename", - "plan_sort", - "start_job", -] diff --git a/src/morphic/organizer/date_sorter.py b/src/morphic/organizer/date_sorter.py deleted file mode 100644 index 6898e29..0000000 --- a/src/morphic/organizer/date_sorter.py +++ /dev/null @@ -1,191 +0,0 @@ -""" -Date-based file sorting with configurable folder templates. - -Supports EXIF date extraction with fallback to file modification time, -configurable folder structure templates, and move/copy operations. -""" - -from __future__ import annotations - -import logging -import os -import shutil -from datetime import datetime - -import piexif - -from morphic.shared.constants import ALL_EXTENSIONS -from morphic.shared.utils import find_files_by_extension - -logger = logging.getLogger(__name__) - -# ── Date Extraction ──────────────────────────────────────────────────────── - - -def get_file_date(path: str) -> datetime: - """Extract the best available date for a file. - - Priority: EXIF DateTimeOriginal → EXIF DateTime → file mtime. - - Parameters - ---------- - path : str - Path to the file. - - Returns - ------- - datetime - The extracted date. - """ - # Try EXIF first (images only) - try: - exif_dict = piexif.load(path) - for ifd_key in ("Exif", "0th"): - ifd = exif_dict.get(ifd_key, {}) - if not ifd: - continue - # DateTimeOriginal = 36867, DateTime = 306 - for tag_id in (36867, 306): - val = ifd.get(tag_id) - if val: - if isinstance(val, bytes): - val = val.decode("utf-8", errors="ignore") - val = val.strip().rstrip("\x00") - if val and val != "0000:00:00 00:00:00": - return datetime.strptime( - val, - "%Y:%m:%d %H:%M:%S", - ) - except Exception: - pass - - # Fall back to file modification time - mtime = os.path.getmtime(path) - return datetime.fromtimestamp(mtime) - - -# ── Template Rendering ───────────────────────────────────────────────────── - - -def _render_template(template: str, dt: datetime) -> str: - """Expand a folder template with date tokens. - - Supported tokens: ``{year}``, ``{month}``, ``{day}``, - ``{hour}``, ``{minute}``. - """ - return template.format( - year=dt.strftime("%Y"), - month=dt.strftime("%m"), - day=dt.strftime("%d"), - hour=dt.strftime("%H"), - minute=dt.strftime("%M"), - ) - - -# ── Plan & Execute ───────────────────────────────────────────────────────── - - -def plan_sort( - folder: str, - template: str = "{year}/{month}/{day}", - destination: str | None = None, -) -> list[dict]: - """Generate a sort plan without executing it. - - Parameters - ---------- - folder : str - Source folder to scan. - template : str - Folder template using ``{year}``, ``{month}``, ``{day}``, - ``{hour}``, ``{minute}`` tokens. - destination : str, optional - Base destination folder. Defaults to *folder* itself. - - Returns - ------- - list[dict] - List of ``{"source", "destination", "date", "date_formatted"}`` - entries. - """ - base = destination or folder - paths = find_files_by_extension(folder, ALL_EXTENSIONS) - plan: list[dict] = [] - - for path in paths: - dt = get_file_date(path) - sub_path = _render_template(template, dt) - dest_dir = os.path.join(base, sub_path) - dest_file = os.path.join(dest_dir, os.path.basename(path)) - - plan.append( - { - "source": path, - "destination": dest_file, - "date": dt.isoformat(), - "date_formatted": dt.strftime("%Y-%m-%d %H:%M:%S"), - } - ) - - return plan - - -def execute_sort( - plan: list[dict], - operation: str = "copy", -) -> dict: - """Execute a previously generated sort plan. - - Parameters - ---------- - plan : list[dict] - Plan from :func:`plan_sort`. - operation : str - ``"move"`` or ``"copy"``. - - Returns - ------- - dict - ``{"completed", "errors", "total", "results"}`` - """ - if operation not in ("move", "copy"): - raise ValueError("operation must be 'move' or 'copy'") - - results: list[dict] = [] - completed = 0 - errors = 0 - - for entry in plan: - src = entry["source"] - dest = entry["destination"] - try: - os.makedirs(os.path.dirname(dest), exist_ok=True) - if operation == "move": - shutil.move(src, dest) - else: - shutil.copy2(src, dest) - completed += 1 - results.append( - { - "source": src, - "destination": dest, - "status": "ok", - } - ) - except Exception as e: - errors += 1 - results.append( - { - "source": src, - "destination": dest, - "status": "error", - "error": str(e), - } - ) - - return { - "completed": completed, - "errors": errors, - "total": len(plan), - "results": results, - } diff --git a/src/morphic/organizer/renamer.py b/src/morphic/organizer/renamer.py deleted file mode 100644 index cff5e6a..0000000 --- a/src/morphic/organizer/renamer.py +++ /dev/null @@ -1,204 +0,0 @@ -""" -Batch file renaming with fixed token templates. - -Supported tokens: ``{date}``, ``{datetime}``, ``{seq}``, ``{seq:N}``, -``{original}``, ``{ext}``. -""" - -from __future__ import annotations - -import logging -import os -import re -import shutil -from pathlib import Path - -from morphic.organizer.date_sorter import get_file_date -from morphic.shared.constants import ALL_EXTENSIONS -from morphic.shared.utils import find_files_by_extension - -logger = logging.getLogger(__name__) - - -def _render_name( - template: str, - path: str, - seq: int, -) -> str: - """Expand a rename template for a single file. - - Tokens - ------ - ``{date}`` - ``YYYY-MM-DD`` from EXIF or mtime. - ``{datetime}`` - ``YYYY-MM-DD_HH-MM-SS``. - ``{seq}`` - Zero-padded sequence number (default 4 digits). - ``{seq:N}`` - Sequence number padded to *N* digits. - ``{original}`` - Original filename without extension. - ``{ext}`` - Original extension including the dot. - """ - dt = get_file_date(path) - p = Path(path) - - result = template - result = result.replace("{date}", dt.strftime("%Y-%m-%d")) - result = result.replace("{datetime}", dt.strftime("%Y-%m-%d_%H-%M-%S")) - result = result.replace("{original}", p.stem) - result = result.replace("{ext}", p.suffix) - - # Handle {seq:N} patterns - seq_pattern = re.compile(r"\{seq:(\d+)\}") - match = seq_pattern.search(result) - if match: - pad = int(match.group(1)) - result = seq_pattern.sub(str(seq).zfill(pad), result) - # Handle plain {seq} (default 4-digit padding) - result = result.replace("{seq}", str(seq).zfill(4)) - - return result - - -def plan_rename( - folder: str, - template: str = "{date}_{seq}_{original}{ext}", - operation: str = "move", - start_seq: int = 1, - output_folder: str | None = None, -) -> list[dict]: - """Generate a rename plan without executing it. - - Parameters - ---------- - folder : str - Source folder to scan. - template : str - Naming template with tokens. - operation : str - ``"move"`` (rename in place) or ``"copy"`` (write to - *output_folder*). - start_seq : int - Starting sequence number. - output_folder : str, optional - Destination folder for copies. Defaults to *folder*. - - Returns - ------- - list[dict] - List of ``{"source", "new_name", "destination", "conflict"}`` - entries. - """ - dest_base = output_folder or folder - paths = find_files_by_extension(folder, ALL_EXTENSIONS) - - # Sort by date then name for consistent sequencing - dated = [] - for path in paths: - dt = get_file_date(path) - dated.append((dt, path)) - dated.sort(key=lambda x: (x[0], x[1])) - - plan: list[dict] = [] - seen_destinations: set[str] = set() - - for i, (dt, path) in enumerate(dated): - seq = start_seq + i - new_name = _render_name(template, path, seq) - dest = os.path.join(dest_base, new_name) - - conflict = dest in seen_destinations or ( - os.path.exists(dest) - and os.path.abspath(dest) != os.path.abspath(path) - ) - seen_destinations.add(dest) - - plan.append( - { - "source": path, - "new_name": new_name, - "destination": dest, - "conflict": conflict, - } - ) - - return plan - - -def execute_rename( - plan: list[dict], - operation: str = "move", -) -> dict: - """Execute a previously generated rename plan. - - Parameters - ---------- - plan : list[dict] - Plan from :func:`plan_rename`. - operation : str - ``"move"`` or ``"copy"``. - - Returns - ------- - dict - ``{"completed", "errors", "skipped", "total", "results"}`` - """ - if operation not in ("move", "copy"): - raise ValueError("operation must be 'move' or 'copy'") - - results: list[dict] = [] - completed = 0 - errors = 0 - skipped = 0 - - for entry in plan: - src = entry["source"] - dest = entry["destination"] - - if entry.get("conflict"): - skipped += 1 - results.append( - { - "source": src, - "destination": dest, - "status": "skipped", - "reason": "name conflict", - } - ) - continue - - try: - os.makedirs(os.path.dirname(dest), exist_ok=True) - if operation == "move": - shutil.move(src, dest) - else: - shutil.copy2(src, dest) - completed += 1 - results.append( - { - "source": src, - "destination": dest, - "status": "ok", - } - ) - except Exception as e: - errors += 1 - results.append( - { - "source": src, - "destination": dest, - "status": "error", - "error": str(e), - } - ) - - return { - "completed": completed, - "errors": errors, - "skipped": skipped, - "total": len(plan), - "results": results, - } diff --git a/src/morphic/organizer/scanner.py b/src/morphic/organizer/scanner.py deleted file mode 100644 index 06cf4ed..0000000 --- a/src/morphic/organizer/scanner.py +++ /dev/null @@ -1,179 +0,0 @@ -""" -Background job management for the organizer module. - -Handles plan→preview→execute workflows for date sorting and batch -renaming in background threads. -""" - -from __future__ import annotations - -import logging -import threading -import time -import uuid -from dataclasses import dataclass, field - -from morphic.shared.utils import format_duration - -logger = logging.getLogger(__name__) - - -@dataclass -class ScanJob: - """Represents a running or completed organizer job.""" - - id: str - folder: str - mode: str # "sort" or "rename" - operation: str = "copy" # "move" or "copy" - template: str = "{year}/{month}/{day}" - destination: str | None = None - start_seq: int = 1 - status: str = "pending" - phase: str = "idle" # "planning", "executing", "done" - progress: float = 0.0 - message: str = "" - error: str | None = None - plan: list[dict] = field(default_factory=list) - execution_result: dict = field(default_factory=dict) - started_at: float = 0.0 - finished_at: float = 0.0 - - -# ── Job Registry ─────────────────────────────────────────────────────────── - -_jobs: dict[str, ScanJob] = {} -_lock = threading.Lock() - - -def get_job(job_id: str) -> ScanJob | None: - """Retrieve an organizer job by ID.""" - with _lock: - return _jobs.get(job_id) - - -def start_job( - folder: str, - mode: str, - operation: str = "copy", - template: str = "{year}/{month}/{day}", - destination: str | None = None, - start_seq: int = 1, -) -> str: - """Create and launch a planning job. Returns the job ID.""" - job_id = str(uuid.uuid4())[:8] - job = ScanJob( - id=job_id, - folder=folder, - mode=mode, - operation=operation, - template=template, - destination=destination, - start_seq=start_seq, - ) - with _lock: - _jobs[job_id] = job - - thread = threading.Thread(target=_run_plan, args=(job,), daemon=True) - thread.start() - return job_id - - -def execute_job(job_id: str) -> bool: - """Execute a previously planned job. Returns False if not found.""" - with _lock: - job = _jobs.get(job_id) - if not job or job.phase != "planned": - return False - - thread = threading.Thread( - target=_run_execute, - args=(job,), - daemon=True, - ) - thread.start() - return True - - -def _run_plan(job: ScanJob) -> None: - """Generate the plan in a background thread.""" - try: - job.status = "scanning" - job.phase = "planning" - job.started_at = time.time() - job.message = f"Planning {job.mode} for: {job.folder}" - - if job.mode == "sort": - from morphic.organizer.date_sorter import plan_sort - - job.plan = plan_sort( - job.folder, - template=job.template, - destination=job.destination, - ) - else: - from morphic.organizer.renamer import plan_rename - - job.plan = plan_rename( - job.folder, - template=job.template, - operation=job.operation, - start_seq=job.start_seq, - output_folder=job.destination, - ) - - job.phase = "planned" - job.status = "planned" - job.progress = 0.5 - job.message = ( - f"Plan ready: {len(job.plan)} file(s) to {job.operation}." - ) - - except Exception as e: - logger.exception("Organizer planning failed") - job.status = "error" - job.error = str(e) - job.message = f"Error: {e}" - job.finished_at = time.time() - - -def _run_execute(job: ScanJob) -> None: - """Execute the plan in a background thread.""" - try: - job.status = "processing" - job.phase = "executing" - job.message = f"Executing {job.operation}..." - - if job.mode == "sort": - from morphic.organizer.date_sorter import execute_sort - - job.execution_result = execute_sort( - job.plan, - operation=job.operation, - ) - else: - from morphic.organizer.renamer import execute_rename - - job.execution_result = execute_rename( - job.plan, - operation=job.operation, - ) - - job.phase = "done" - job.status = "done" - job.progress = 1.0 - job.finished_at = time.time() - elapsed = job.finished_at - job.started_at - res = job.execution_result - job.message = ( - f"Done! {res.get('completed', 0)} files " - f"{job.operation}d, {res.get('errors', 0)} error(s) " - f"in {format_duration(elapsed)}." - ) - - except Exception as e: - logger.exception("Organizer execution failed") - job.status = "error" - job.error = str(e) - job.message = f"Error: {e}" - job.finished_at = time.time() diff --git a/src/morphic/resizer/__init__.py b/src/morphic/resizer/__init__.py deleted file mode 100644 index 14df605..0000000 --- a/src/morphic/resizer/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -""" -morphic.resizer - Batch image resizing with multiple modes. - -Supports fit, fill, stretch, and pad operations with configurable -output format and background colour. -""" - -from morphic.resizer.operations import resize_image -from morphic.resizer.scanner import get_job, start_job - -__all__ = [ - "get_job", - "resize_image", - "start_job", -] diff --git a/src/morphic/resizer/operations.py b/src/morphic/resizer/operations.py deleted file mode 100644 index aaacb03..0000000 --- a/src/morphic/resizer/operations.py +++ /dev/null @@ -1,119 +0,0 @@ -""" -Image resize operations — fit, fill, stretch, and pad. - -All operations preserve the original format by default and support -configurable output quality and format override. -""" - -from __future__ import annotations - -import logging -import os -from pathlib import Path - -from PIL import Image, ImageOps - -logger = logging.getLogger(__name__) - -# Valid resize modes -RESIZE_MODES = ("fit", "fill", "stretch", "pad") - - -def resize_image( - path: str, - width: int, - height: int, - mode: str = "fit", - output_folder: str | None = None, - bg_color: str = "#000000", - quality: int = 90, - output_format: str | None = None, -) -> str: - """Resize a single image. - - Parameters - ---------- - path : str - Path to the source image. - width : int - Target width in pixels. - height : int - Target height in pixels. - mode : str - Resize mode: ``"fit"`` (within bounds), ``"fill"`` (cover + crop), - ``"stretch"`` (ignore ratio), ``"pad"`` (fit + pad borders). - output_folder : str, optional - Write output here instead of overwriting. Creates the folder - if needed. - bg_color : str - Background colour for pad mode (CSS hex, default black). - quality : int - JPEG/WebP quality (1-100). - output_format : str, optional - Force output format (e.g. ``".png"``). Uses original if *None*. - - Returns - ------- - str - Path to the output file. - - Raises - ------ - FileNotFoundError - If the source file does not exist. - ValueError - If an invalid mode is given. - """ - if not os.path.isfile(path): - raise FileNotFoundError(f"File not found: {path}") - if mode not in RESIZE_MODES: - raise ValueError( - f"Invalid mode '{mode}'. Must be one of {RESIZE_MODES}" - ) - if width <= 0 or height <= 0: - raise ValueError("Width and height must be positive integers") - - img = Image.open(path) - - # Convert palette/LA images to RGBA/RGB for processing - if img.mode in ("P", "LA"): - img = img.convert("RGBA") - elif img.mode == "L": - img = img.convert("RGB") - - size = (width, height) - - if mode == "fit": - img.thumbnail(size, Image.Resampling.LANCZOS) - elif mode == "fill": - img = ImageOps.fit(img, size, Image.Resampling.LANCZOS) - elif mode == "stretch": - img = img.resize(size, Image.Resampling.LANCZOS) - elif mode == "pad": - img = ImageOps.pad(img, size, Image.Resampling.LANCZOS, color=bg_color) - - # Determine output path - src = Path(path) - ext = output_format if output_format else src.suffix - if not ext.startswith("."): - ext = f".{ext}" - - if output_folder: - os.makedirs(output_folder, exist_ok=True) - dest = Path(output_folder) / f"{src.stem}{ext}" - else: - dest = src.with_suffix(ext) - - # Convert RGBA to RGB for formats that don't support alpha - if img.mode == "RGBA" and ext.lower() in (".jpg", ".jpeg", ".bmp"): - img = img.convert("RGB") - - save_kwargs: dict = {} - if ext.lower() in (".jpg", ".jpeg", ".webp"): - save_kwargs["quality"] = quality - if ext.lower() == ".png": - save_kwargs["optimize"] = True - - img.save(str(dest), **save_kwargs) - logger.info("Resized %s → %s (%s)", path, dest, mode) - return str(dest) diff --git a/src/morphic/resizer/scanner.py b/src/morphic/resizer/scanner.py deleted file mode 100644 index 70bfc68..0000000 --- a/src/morphic/resizer/scanner.py +++ /dev/null @@ -1,168 +0,0 @@ -""" -Background scan job management for the resizer module. - -Discovers images in a folder and resizes them in a background thread. -""" - -from __future__ import annotations - -import logging -import os -import threading -import time -import uuid -from dataclasses import dataclass, field - -from morphic.shared.constants import IMAGE_EXTENSIONS -from morphic.shared.utils import ( - find_files_by_extension, - format_duration, - format_file_size, -) - -logger = logging.getLogger(__name__) - - -@dataclass -class ScanJob: - """Represents a running or completed resize job.""" - - id: str - folder: str - width: int - height: int - mode: str - output_folder: str | None = None - bg_color: str = "#000000" - quality: int = 90 - status: str = "pending" - progress: float = 0.0 - message: str = "" - error: str | None = None - total_files: int = 0 - processed_files: int = 0 - errors: list[dict] = field(default_factory=list) - results: list[dict] = field(default_factory=list) - started_at: float = 0.0 - finished_at: float = 0.0 - - -# ── Job Registry ─────────────────────────────────────────────────────────── - -_jobs: dict[str, ScanJob] = {} -_lock = threading.Lock() - - -def get_job(job_id: str) -> ScanJob | None: - """Retrieve a resize job by ID.""" - with _lock: - return _jobs.get(job_id) - - -def start_job( - folder: str, - width: int, - height: int, - mode: str, - output_folder: str | None = None, - bg_color: str = "#000000", - quality: int = 90, -) -> str: - """Create and launch a new resize job. Returns the job ID.""" - job_id = str(uuid.uuid4())[:8] - job = ScanJob( - id=job_id, - folder=folder, - width=width, - height=height, - mode=mode, - output_folder=output_folder, - bg_color=bg_color, - quality=quality, - ) - with _lock: - _jobs[job_id] = job - - thread = threading.Thread(target=_run_resize, args=(job,), daemon=True) - thread.start() - return job_id - - -def _run_resize(job: ScanJob) -> None: - """Execute the resize operation in a background thread.""" - from morphic.resizer.operations import resize_image - - try: - job.status = "scanning" - job.started_at = time.time() - job.message = f"Scanning folder: {job.folder}" - - paths = find_files_by_extension(job.folder, IMAGE_EXTENSIONS) - job.total_files = len(paths) - - if not paths: - job.status = "done" - job.progress = 1.0 - job.finished_at = time.time() - job.message = "No image files found." - return - - job.status = "processing" - for i, path in enumerate(paths): - try: - original_size = os.path.getsize(path) - dest = resize_image( - path, - job.width, - job.height, - mode=job.mode, - output_folder=job.output_folder, - bg_color=job.bg_color, - quality=job.quality, - ) - new_size = os.path.getsize(dest) if os.path.isfile(dest) else 0 - job.results.append( - { - "source": path, - "destination": dest, - "status": "ok", - "original_size": original_size, - "new_size": new_size, - "original_size_fmt": format_file_size(original_size), - "new_size_fmt": format_file_size(new_size), - } - ) - except Exception as e: - job.errors.append({"path": path, "error": str(e)}) - job.results.append( - { - "source": path, - "destination": None, - "status": "error", - "error": str(e), - } - ) - - job.processed_files = i + 1 - job.progress = (i + 1) / job.total_files - job.message = ( - f"Resizing: {i + 1}/{job.total_files} " - f"({len(job.errors)} errors)" - ) - - job.status = "done" - job.progress = 1.0 - job.finished_at = time.time() - elapsed = job.finished_at - job.started_at - job.message = ( - f"Done! Resized {job.processed_files} images " - f"in {format_duration(elapsed)}. " - f"{len(job.errors)} error(s)." - ) - - except Exception as e: - logger.exception("Resize job failed") - job.status = "error" - job.error = str(e) - job.message = f"Error: {e}" - job.finished_at = time.time() diff --git a/src/morphic/shared/__init__.py b/src/morphic/shared/__init__.py deleted file mode 100644 index 3576858..0000000 --- a/src/morphic/shared/__init__.py +++ /dev/null @@ -1,33 +0,0 @@ -""" -morphic.shared - Constants, utilities, and helpers shared across modules. -""" - -from morphic.shared.constants import ( - ALL_EXTENSIONS, - EXCLUDED_FOLDERS, - IMAGE_EXTENSIONS, - VIDEO_EXTENSIONS, -) -from morphic.shared.utils import ( - find_files_by_extension, - format_duration, - format_file_size, - is_excluded_path, - is_image, - is_video, - normalise_ext, -) - -__all__ = [ - "ALL_EXTENSIONS", - "EXCLUDED_FOLDERS", - "IMAGE_EXTENSIONS", - "VIDEO_EXTENSIONS", - "find_files_by_extension", - "format_duration", - "format_file_size", - "is_excluded_path", - "is_image", - "is_video", - "normalise_ext", -] diff --git a/src/morphic/shared/constants.py b/src/morphic/shared/constants.py deleted file mode 100644 index a499725..0000000 --- a/src/morphic/shared/constants.py +++ /dev/null @@ -1,126 +0,0 @@ -""" -Shared constants for morphic – extension sets, exclusion lists, defaults. - -Merges constants from both the converter and dupfinder modules so that -every part of the project works with a single canonical set of -supported file types. -""" - -from __future__ import annotations - -# ── Supported extensions ─────────────────────────────────────────────────── -# Union of converter + dupfinder sets. - -IMAGE_EXTENSIONS: frozenset[str] = frozenset( - { - ".jpg", - ".jpeg", - ".png", - ".tif", - ".tiff", - ".bmp", - ".webp", - ".gif", - ".ico", - ".heic", - ".heif", - ".avif", - # Extra formats from dupfinder (raw / vector) - ".svg", - ".raw", - ".cr2", - ".nef", - ".arw", - ".dng", - ".orf", - ".rw2", - ".pef", - ".srw", - } -) - -VIDEO_EXTENSIONS: frozenset[str] = frozenset( - { - ".mp4", - ".mov", - ".avi", - ".mkv", - ".webm", - ".flv", - ".wmv", - ".m4v", - ".mpeg", - ".mpg", - ".3gp", - ".ts", - # Extra formats from dupfinder - ".ogv", - ".mts", - ".m2ts", - ".vob", - ".divx", - ".xvid", - ".asf", - ".rm", - ".rmvb", - } -) - -ALL_EXTENSIONS: frozenset[str] = IMAGE_EXTENSIONS | VIDEO_EXTENSIONS - -# ── Folders to skip when scanning ────────────────────────────────────────── - -EXCLUDED_FOLDERS: frozenset[str] = frozenset( - { - # Windows - "$recycle.bin", - "$recycle", - "recycler", - "recycled", - "system volume information", - "windows", - "appdata", - # macOS - ".trash", - ".trashes", - ".spotlight-v100", - ".fseventsd", - ".ds_store", - # Linux - "lost+found", - "trash", - # Thumbnails - ".thumbnails", - ".thumb", - "thumbs", - # NAS - "@eadir", - # Version control - ".git", - ".svn", - ".hg", - # Development - "__pycache__", - ".cache", - "node_modules", - ".venv", - "venv", - } -) - -# ── Alias resolution ────────────────────────────────────────────────────── - -ALIASES: dict[str, str] = { - ".jpeg": ".jpg", - ".tiff": ".tif", - ".mpg": ".mpeg", -} - -# ── Dupfinder default thresholds ────────────────────────────────────────── - -DEFAULT_IMAGE_THRESHOLD: float = 0.90 -DEFAULT_VIDEO_THRESHOLD: float = 0.85 -DEFAULT_HASH_SIZE: int = 16 -DEFAULT_NUM_FRAMES: int = 10 -DEFAULT_NUM_WORKERS: int = 4 -DEFAULT_BATCH_SIZE: int = 1000 diff --git a/src/morphic/shared/file_browser.py b/src/morphic/shared/file_browser.py deleted file mode 100644 index 12121e3..0000000 --- a/src/morphic/shared/file_browser.py +++ /dev/null @@ -1,181 +0,0 @@ -""" -Native OS file/folder dialog support. - -Attempts to open a native folder picker on Linux, macOS, and Windows. -Falls back gracefully when no GUI toolkit is available (e.g. headless server). -""" - -from __future__ import annotations - -import logging -import os -import platform -import subprocess -from pathlib import Path - -logger = logging.getLogger(__name__) - - -def open_native_folder_dialog( - initial_dir: str | None = None, -) -> str | None: - """ - Open the native OS folder picker dialog. - - Returns the selected folder path, or ``None`` if cancelled / unavailable. - - Tries, in order: - - 1. **tkinter** ``filedialog.askdirectory()`` - 2. **zenity** — GNOME / GTK-based Linux - 3. **kdialog** — KDE Linux - 4. **osascript** — macOS - 5. **powershell** — Windows - """ - # In test mode we prefer a headless preset path to avoid GUI dialog popups. - test_folder = os.environ.get("MORPHIC_TEST_FOLDER") - if test_folder and os.path.isdir(test_folder): - return test_folder - - if os.environ.get("PYTEST_CURRENT_TEST"): - asset_folder = Path(__file__).resolve().parents[2] / "assets" / "test" - if asset_folder.exists(): - return str(asset_folder) - - initial_dir = initial_dir or str(os.path.expanduser("~")) - - result = _try_tkinter(initial_dir) - if result is not None: - return result - - system = platform.system() - - if system == "Linux": - result = _try_zenity(initial_dir) - if result is not None: - return result - result = _try_kdialog(initial_dir) - if result is not None: - return result - - if system == "Darwin": - result = _try_osascript(initial_dir) - if result is not None: - return result - - if system == "Windows": - result = _try_powershell(initial_dir) - if result is not None: - return result - - logger.debug("No native folder dialog available on this system") - return None - - -# ── Backend implementations ──────────────────────────────────────────────── - - -def _try_tkinter(initial_dir: str) -> str | None: - try: - import tkinter as tk - from tkinter import filedialog - - root = tk.Tk() - root.withdraw() - root.attributes("-topmost", True) - folder = filedialog.askdirectory( - initialdir=initial_dir, - title="Select folder to scan", - ) - root.destroy() - return folder if folder else None - except Exception as exc: - logger.debug("tkinter dialog failed: %s", exc) - return None - - -def _try_zenity(initial_dir: str) -> str | None: - try: - result = subprocess.run( - [ - "zenity", - "--file-selection", - "--directory", - f"--filename={initial_dir}/", - "--title=Select folder to scan", - ], - capture_output=True, - text=True, - timeout=120, - ) - if result.returncode == 0 and result.stdout.strip(): - return result.stdout.strip() - return None - except (FileNotFoundError, subprocess.TimeoutExpired): - return None - - -def _try_kdialog(initial_dir: str) -> str | None: - try: - result = subprocess.run( - [ - "kdialog", - "--getexistingdirectory", - initial_dir, - "--title", - "Select folder to scan", - ], - capture_output=True, - text=True, - timeout=120, - ) - if result.returncode == 0 and result.stdout.strip(): - return result.stdout.strip() - return None - except (FileNotFoundError, subprocess.TimeoutExpired): - return None - - -def _try_osascript(initial_dir: str) -> str | None: - try: - script = ( - f'set defaultDir to POSIX file "{initial_dir}"\n' - f"set chosenDir to choose folder with prompt " - f'"Select folder to scan" default location defaultDir\n' - f"return POSIX path of chosenDir" - ) - result = subprocess.run( - ["osascript", "-e", script], - capture_output=True, - text=True, - timeout=120, - ) - if result.returncode == 0 and result.stdout.strip(): - return result.stdout.strip().rstrip("/") - return None - except (FileNotFoundError, subprocess.TimeoutExpired): - return None - - -def _try_powershell(initial_dir: str) -> str | None: - try: - script = ( - "[System.Reflection.Assembly]::LoadWithPartialName(" - "'System.Windows.Forms') | Out-Null; " - "$dialog = New-Object System.Windows.Forms.FolderBrowserDialog; " - f"$dialog.SelectedPath = '{initial_dir}'; " - "$dialog.Description = 'Select folder to scan'; " - "if ($dialog.ShowDialog() -eq 'OK') " - "{ $dialog.SelectedPath }" - ) - result = subprocess.run( - ["powershell", "-Command", script], - capture_output=True, - text=True, - timeout=120, - ) - if result.returncode == 0 and result.stdout.strip(): - return result.stdout.strip() - return None - except (FileNotFoundError, subprocess.TimeoutExpired): - return None diff --git a/src/morphic/shared/thumbnails.py b/src/morphic/shared/thumbnails.py deleted file mode 100644 index d183e43..0000000 --- a/src/morphic/shared/thumbnails.py +++ /dev/null @@ -1,107 +0,0 @@ -""" -Thumbnail generation shared by converter and dupfinder frontends. - -Generates JPEG thumbnails for images (Pillow) and videos (ffmpeg subprocess). -""" - -from __future__ import annotations - -import io -import logging -import subprocess - -from PIL import Image - -logger = logging.getLogger(__name__) - - -def generate_image_thumbnail( - file_path: str, - size: int = 300, -) -> io.BytesIO: - """ - Create a JPEG thumbnail for an image file. - - Parameters - ---------- - file_path : str - Absolute path to the image. - size : int - Maximum width/height in pixels. - - Returns - ------- - io.BytesIO - JPEG image bytes (seeked to 0). - """ - img = Image.open(file_path) - img.thumbnail((size, size), Image.Resampling.LANCZOS) - - if img.mode in ("RGBA", "P", "LA"): - img = img.convert("RGB") - - buf = io.BytesIO() - img.save(buf, format="JPEG", quality=80) - buf.seek(0) - return buf - - -def generate_video_thumbnail( - file_path: str, - size: int = 300, -) -> io.BytesIO | None: - """ - Extract a single frame from a video and return it as a JPEG thumbnail. - - Uses ``ffmpeg`` piped to stdout. Returns ``None`` on failure. - - Parameters - ---------- - file_path : str - Absolute path to the video. - size : int - Maximum width/height in pixels. - - Returns - ------- - io.BytesIO | None - JPEG image bytes (seeked to 0), or ``None``. - """ - cmd = [ - "ffmpeg", - "-y", - "-i", - file_path, - "-ss", - "00:00:01", - "-vframes", - "1", - "-vf", - (f"scale={size}:{size}:force_original_aspect_ratio=decrease"), - "-f", - "image2pipe", - "-vcodec", - "mjpeg", - "-q:v", - "5", - "pipe:1", - ] - result = subprocess.run( - cmd, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - timeout=10, - ) - if result.returncode != 0 or not result.stdout: - # Retry at 0s for very short clips - cmd[5] = "00:00:00" - result = subprocess.run( - cmd, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - timeout=10, - ) - if result.stdout: - buf = io.BytesIO(result.stdout) - return buf - return None diff --git a/src/morphic/shared/utils.py b/src/morphic/shared/utils.py deleted file mode 100644 index 21a5ceb..0000000 --- a/src/morphic/shared/utils.py +++ /dev/null @@ -1,141 +0,0 @@ -""" -Shared utility helpers used across morphic modules. -""" - -from __future__ import annotations - -import logging -import os -from contextlib import contextmanager -from pathlib import Path -from typing import Generator - - -from morphic.shared.constants import ( - ALIASES, - EXCLUDED_FOLDERS, - IMAGE_EXTENSIONS, - VIDEO_EXTENSIONS, -) - -logger = logging.getLogger(__name__) - - -# ── Extension helpers ────────────────────────────────────────────────────── - - -def normalise_ext(ext: str) -> str: - """Lowercase and unify aliases (.jpeg -> .jpg, .tiff -> .tif).""" - ext = ext.lower() - return ALIASES.get(ext, ext) - - -def is_image(path: str) -> bool: - """Return True if the file extension is an image type.""" - return normalise_ext(Path(path).suffix) in IMAGE_EXTENSIONS - - -def is_video(path: str) -> bool: - """Return True if the file extension is a video type.""" - return normalise_ext(Path(path).suffix) in VIDEO_EXTENSIONS - - -# ── Formatting helpers ───────────────────────────────────────────────────── - - -def format_file_size(size_bytes: int) -> str: - """Format file size in human-readable format.""" - size = float(size_bytes) - for unit in ["B", "KB", "MB", "GB"]: - if size < 1024: - return f"{size:.2f} {unit}" - size /= 1024 - return f"{size:.2f} TB" - - -def format_duration(seconds: float) -> str: - """Format duration in human-readable format.""" - hours = int(seconds // 3600) - minutes = int((seconds % 3600) // 60) - secs = int(seconds % 60) - if hours > 0: - return f"{hours}h {minutes}m {secs}s" - if minutes > 0: - return f"{minutes}m {secs}s" - return f"{secs}s" - - -# ── File scanning helpers ────────────────────────────────────────────────── - - -def is_excluded_path( - file_path: str, - excluded_folders: frozenset[str] = EXCLUDED_FOLDERS, -) -> bool: - """Check if a file path contains any excluded folder.""" - path_parts = Path(file_path).parts - return any( - excluded in part.lower() - for part in path_parts - for excluded in excluded_folders - ) - - -def find_files_by_extension( - folder: str, - extensions: frozenset[str] | set[str], - excluded_folders: frozenset[str] = EXCLUDED_FOLDERS, -) -> list[str]: - """ - Find all files with given extensions in *folder* recursively. - - Parameters - ---------- - folder : str - Root folder to search. - extensions : set[str] - File extensions to match (with dot, e.g. ``".jpg"``). - excluded_folders : set[str] - Folder names to exclude. - - Returns - ------- - list[str] - Sorted list of absolute file paths. - """ - files: list[str] = [] - folder_path = Path(folder) - logger.info("Scanning for files in: %s", folder) - - for ext in extensions: - files.extend(str(p) for p in folder_path.rglob(f"*{ext}")) - files.extend(str(p) for p in folder_path.rglob(f"*{ext.upper()}")) - - # De-duplicate and filter - files = sorted( - {f for f in files if not is_excluded_path(f, excluded_folders)} - ) - logger.info("Found %d files", len(files)) - return files - - -# ── stderr suppression (for OpenCV/ffmpeg) ───────────────────────────────── - - -@contextmanager -def suppress_stderr() -> Generator[None, None, None]: - """ - Suppress stderr output at the file-descriptor level. - - Silences low-level library warnings (e.g. ffmpeg/OpenCV codec messages) - that cannot be caught by Python's logging framework. - """ - devnull_fd = os.open(os.devnull, os.O_WRONLY) - old_stderr_fd = os.dup(2) - os.dup2(devnull_fd, 2) - try: - yield - finally: - os.dup2(old_stderr_fd, 2) - os.close(devnull_fd) - os.close(old_stderr_fd) diff --git a/tests/conftest.py b/tests/conftest.py deleted file mode 100644 index 3dfd9ea..0000000 --- a/tests/conftest.py +++ /dev/null @@ -1,86 +0,0 @@ -"""Shared test fixtures.""" - -from __future__ import annotations - -from pathlib import Path - -import pytest -from PIL import Image - -from morphic.frontend.app import create_app - -ASSETS_TEST_DIR = Path(__file__).resolve().parents[1] / "assets" / "test" - - -@pytest.fixture() -def app(monkeypatch): - """Create a Flask app for testing.""" - if ASSETS_TEST_DIR.exists(): - monkeypatch.setenv("MORPHIC_TEST_FOLDER", str(ASSETS_TEST_DIR)) - - initial_folder = ( - str(ASSETS_TEST_DIR) if ASSETS_TEST_DIR.exists() else "/tmp" - ) - application = create_app(initial_folder=initial_folder) - application.config["TESTING"] = True - return application - - -@pytest.fixture() -def client(app): - """Flask test client.""" - with app.test_client() as c: - yield c - - -@pytest.fixture() -def tmp_media(tmp_path): - """Create a temp directory with sample image/video files.""" - - # Create images - for name in ["photo.jpg", "image.png", "pic.tif"]: - img = Image.new("RGB", (10, 10), color="red") - img.save(str(tmp_path / name)) - - # Create fake video files (0-byte placeholders) - for name in ["clip.mp4", "movie.mov"]: - (tmp_path / name).write_bytes(b"\x00" * 100) - - # Create a non-media file - (tmp_path / "readme.txt").write_text("hello") - - # Subfolder with more files - sub = tmp_path / "sub" - sub.mkdir() - img = Image.new("RGB", (20, 20), color="blue") - img.save(str(sub / "deep.jpg")) - (sub / "deep.mp4").write_bytes(b"\x00" * 50) - - return tmp_path - - -@pytest.fixture() -def test_image(tmp_path): - """Create a single test image and return its path.""" - path = tmp_path / "test.jpg" - img = Image.new("RGB", (100, 100), color="green") - img.save(str(path)) - return str(path) - - -@pytest.fixture() -def rgba_image(tmp_path): - """Create an RGBA test image and return its path.""" - path = tmp_path / "test_rgba.png" - img = Image.new("RGBA", (50, 50), color=(255, 0, 0, 128)) - img.save(str(path)) - return str(path) - - -@pytest.fixture() -def palette_image(tmp_path): - """Create a palette (P mode) test image and return its path.""" - path = tmp_path / "test_palette.png" - img = Image.new("P", (50, 50)) - img.save(str(path)) - return str(path) diff --git a/tests/converter/__init__.py b/tests/converter/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/converter/test_constants.py b/tests/converter/test_constants.py deleted file mode 100644 index 2c73126..0000000 --- a/tests/converter/test_constants.py +++ /dev/null @@ -1,93 +0,0 @@ -"""Tests for morphic.converter.constants.""" - -from morphic.converter.constants import ( - IMAGE_CONVERSIONS, - VIDEO_CONVERSIONS, - _CANONICAL_IMAGE, - _CANONICAL_VIDEO, - _normalise, -) -from morphic.shared.constants import IMAGE_EXTENSIONS - - -class TestNormalise: - def test_aliases(self) -> None: - assert _normalise(".jpeg") == ".jpg" - assert _normalise(".tiff") == ".tif" - - def test_passthrough(self) -> None: - assert _normalise(".png") == ".png" - - def test_case_insensitive(self) -> None: - assert _normalise(".JPEG") == ".jpg" - - -class TestCanonicalSets: - def test_canonical_image_not_empty(self) -> None: - assert len(_CANONICAL_IMAGE) > 0 - - def test_canonical_video_not_empty(self) -> None: - assert len(_CANONICAL_VIDEO) > 0 - - def test_canonical_are_subsets(self) -> None: - normed_img = {_normalise(e) for e in IMAGE_EXTENSIONS} - for ext in _CANONICAL_IMAGE: - assert ext in normed_img - - def test_common_image_canonicals(self) -> None: - for ext in [".jpg", ".png", ".webp", ".bmp", ".gif"]: - assert ext in _CANONICAL_IMAGE - - def test_common_video_canonicals(self) -> None: - for ext in [".mp4", ".mov", ".avi", ".mkv", ".webm"]: - assert ext in _CANONICAL_VIDEO - - -class TestImageConversions: - def test_not_empty(self) -> None: - assert len(IMAGE_CONVERSIONS) > 0 - - def test_image_target_does_not_include_self(self) -> None: - for ext, targets in IMAGE_CONVERSIONS.items(): - norm = _normalise(ext) - assert norm not in targets, f"{norm} in targets for {ext}" - - def test_all_targets_are_canonical(self) -> None: - for ext, targets in IMAGE_CONVERSIONS.items(): - for t in targets: - assert t in _CANONICAL_IMAGE, f"{t} not canonical" - - def test_jpg_can_convert_to_png(self) -> None: - targets = IMAGE_CONVERSIONS.get(".jpg", []) - assert ".png" in targets - - def test_png_can_convert_to_jpg(self) -> None: - targets = IMAGE_CONVERSIONS.get(".png", []) - assert ".jpg" in targets - - def test_targets_are_sorted(self) -> None: - for ext, targets in IMAGE_CONVERSIONS.items(): - assert targets == sorted(targets), f"targets for {ext} not sorted" - - -class TestVideoConversions: - def test_not_empty(self) -> None: - assert len(VIDEO_CONVERSIONS) > 0 - - def test_video_target_does_not_include_self(self) -> None: - for ext, targets in VIDEO_CONVERSIONS.items(): - norm = _normalise(ext) - assert norm not in targets, f"{norm} in targets for {ext}" - - def test_all_targets_are_canonical(self) -> None: - for ext, targets in VIDEO_CONVERSIONS.items(): - for t in targets: - assert t in _CANONICAL_VIDEO, f"{t} not canonical" - - def test_mp4_can_convert_to_mkv(self) -> None: - targets = VIDEO_CONVERSIONS.get(".mp4", []) - assert ".mkv" in targets - - def test_targets_are_sorted(self) -> None: - for ext, targets in VIDEO_CONVERSIONS.items(): - assert targets == sorted(targets), f"targets for {ext} not sorted" diff --git a/tests/converter/test_converter.py b/tests/converter/test_converter.py deleted file mode 100644 index a5f5e1f..0000000 --- a/tests/converter/test_converter.py +++ /dev/null @@ -1,280 +0,0 @@ -"""Tests for morphic.converter.converter.""" - -from __future__ import annotations - -import os -from unittest.mock import MagicMock, patch - -import pytest -from PIL import Image - -from morphic.converter import converter -from morphic.converter.converter import ( - _ffmpeg_available, - convert_file, - convert_image, - convert_video, -) - - -class TestFfmpegAvailable: - def test_returns_bool(self) -> None: - result = _ffmpeg_available() - assert isinstance(result, bool) - - -class TestConvertImage: - def test_jpg_to_png(self, tmp_path) -> None: - src = tmp_path / "test.jpg" - Image.new("RGB", (50, 50), "red").save(str(src)) - - dest = convert_image(str(src), ".png") - assert os.path.isfile(dest) - assert dest.endswith(".png") - - def test_png_to_jpg(self, tmp_path) -> None: - src = tmp_path / "test.png" - Image.new("RGB", (50, 50), "blue").save(str(src)) - - dest = convert_image(str(src), ".jpg") - assert os.path.isfile(dest) - assert dest.endswith(".jpg") - - def test_rgba_to_jpg(self, tmp_path) -> None: - src = tmp_path / "rgba.png" - Image.new("RGBA", (50, 50), (255, 0, 0, 128)).save(str(src)) - - dest = convert_image(str(src), ".jpg") - assert os.path.isfile(dest) - img = Image.open(dest) - assert img.mode == "RGB" - - def test_palette_to_jpg(self, tmp_path) -> None: - src = tmp_path / "palette.png" - Image.new("P", (50, 50)).save(str(src)) - - dest = convert_image(str(src), ".jpg") - assert os.path.isfile(dest) - img = Image.open(dest) - assert img.mode == "RGB" - - def test_output_dir(self, tmp_path) -> None: - src = tmp_path / "test.jpg" - Image.new("RGB", (50, 50), "green").save(str(src)) - out_dir = tmp_path / "output" - - dest = convert_image(str(src), ".png", output_dir=str(out_dir)) - assert os.path.isfile(dest) - assert str(out_dir) in dest - - def test_avoid_overwrite(self, tmp_path) -> None: - src = tmp_path / "img.jpg" - Image.new("RGB", (50, 50), "red").save(str(src)) - - existing = tmp_path / "img.png" - existing.write_text("existing") - - dest = convert_image(str(src), ".png") - assert os.path.isfile(dest) - assert "converted" in os.path.basename(dest) - - def test_ext_without_dot(self, tmp_path) -> None: - src = tmp_path / "test.jpg" - Image.new("RGB", (50, 50), "red").save(str(src)) - - dest = convert_image(str(src), "png") - assert dest.endswith(".png") - - def test_webp_quality(self, tmp_path) -> None: - src = tmp_path / "test.jpg" - Image.new("RGB", (50, 50), "red").save(str(src)) - - dest = convert_image(str(src), ".webp") - assert os.path.isfile(dest) - assert dest.endswith(".webp") - - def test_tiff_compression(self, tmp_path) -> None: - src = tmp_path / "test.jpg" - Image.new("RGB", (50, 50), "red").save(str(src)) - - dest = convert_image(str(src), ".tif") - assert os.path.isfile(dest) - - def test_bmp_conversion(self, tmp_path) -> None: - src = tmp_path / "test.png" - Image.new("RGBA", (50, 50), (255, 0, 0, 128)).save(str(src)) - - dest = convert_image(str(src), ".bmp") - assert os.path.isfile(dest) - img = Image.open(dest) - assert img.mode == "RGB" - - def test_ico_conversion(self, tmp_path) -> None: - src = tmp_path / "test.png" - Image.new("RGBA", (32, 32), (0, 255, 0, 200)).save(str(src)) - - dest = convert_image(str(src), ".ico") - assert os.path.isfile(dest) - - -class TestConvertVideo: - @patch("morphic.converter.converter._ffmpeg_available", return_value=False) - def test_no_ffmpeg(self, mock_ffmpeg, tmp_path) -> None: - src = tmp_path / "test.mp4" - src.write_bytes(b"\x00" * 100) - - with pytest.raises(RuntimeError, match="ffmpeg is not installed"): - convert_video(str(src), ".avi") - - @patch("morphic.converter.converter.subprocess.run") - @patch("morphic.converter.converter._ffmpeg_available", return_value=True) - def test_successful_conversion( - self, mock_ffmpeg, mock_run, tmp_path - ) -> None: - src = tmp_path / "test.mp4" - src.write_bytes(b"\x00" * 100) - - mock_run.return_value = MagicMock(returncode=0, stderr="") - - expected_dest = tmp_path / "test.avi" - expected_dest.write_bytes(b"\x00" * 50) - - dest = convert_video(str(src), ".avi") - assert dest.endswith(".avi") - mock_run.assert_called_once() - - @patch("morphic.converter.converter.subprocess.run") - @patch("morphic.converter.converter._ffmpeg_available", return_value=True) - def test_ffmpeg_error(self, mock_ffmpeg, mock_run, tmp_path) -> None: - src = tmp_path / "test.mp4" - src.write_bytes(b"\x00" * 100) - - mock_run.return_value = MagicMock( - returncode=1, - stderr="conversion error", - ) - - with pytest.raises(RuntimeError, match="ffmpeg error"): - convert_video(str(src), ".avi") - - @patch("morphic.converter.converter.subprocess.run") - @patch("morphic.converter.converter._ffmpeg_available", return_value=True) - def test_mkv_stream_copy(self, mock_ffmpeg, mock_run, tmp_path) -> None: - src = tmp_path / "test.mp4" - src.write_bytes(b"\x00" * 100) - - mock_run.return_value = MagicMock(returncode=0, stderr="") - - expected_dest = tmp_path / "test.mkv" - expected_dest.write_bytes(b"\x00" * 50) - - dest = convert_video(str(src), ".mkv") - assert dest.endswith(".mkv") - cmd_args = mock_run.call_args[0][0] - assert "-c" in cmd_args - assert "copy" in cmd_args - - @patch("morphic.converter.converter.subprocess.run") - @patch("morphic.converter.converter._ffmpeg_available", return_value=True) - def test_ts_stream_copy(self, mock_ffmpeg, mock_run, tmp_path) -> None: - src = tmp_path / "test.mp4" - src.write_bytes(b"\x00" * 100) - - mock_run.return_value = MagicMock(returncode=0, stderr="") - - expected_dest = tmp_path / "test.ts" - expected_dest.write_bytes(b"\x00" * 50) - - dest = convert_video(str(src), ".ts") - assert dest.endswith(".ts") - - @patch("morphic.converter.converter.subprocess.run") - @patch("morphic.converter.converter._ffmpeg_available", return_value=True) - def test_output_dir(self, mock_ffmpeg, mock_run, tmp_path) -> None: - src = tmp_path / "test.mp4" - src.write_bytes(b"\x00" * 100) - out_dir = tmp_path / "output" - - mock_run.return_value = MagicMock(returncode=0, stderr="") - - out_dir.mkdir() - (out_dir / "test.avi").write_bytes(b"\x00" * 50) - - dest = convert_video(str(src), ".avi", output_dir=str(out_dir)) - assert str(out_dir) in dest - - @patch("morphic.converter.converter.subprocess.run") - @patch("morphic.converter.converter._ffmpeg_available", return_value=True) - def test_avoid_overwrite(self, mock_ffmpeg, mock_run, tmp_path) -> None: - src = tmp_path / "test.mp4" - src.write_bytes(b"\x00" * 100) - - existing = tmp_path / "test.avi" - existing.write_bytes(b"\x00" * 50) - - mock_run.return_value = MagicMock(returncode=0, stderr="") - - dest = convert_video(str(src), ".avi") - assert "converted" in os.path.basename(dest) - - -class TestConvertFile: - def test_image_dispatch(self, tmp_path) -> None: - src = tmp_path / "test.jpg" - Image.new("RGB", (50, 50), "red").save(str(src)) - - dest = convert_file(str(src), ".png") - assert dest.endswith(".png") - - @patch("morphic.converter.converter.convert_video") - def test_video_dispatch(self, mock_convert, tmp_path) -> None: - src = tmp_path / "test.mp4" - src.write_bytes(b"\x00" * 100) - mock_convert.return_value = str(tmp_path / "test.avi") - - _ = convert_file(str(src), ".avi") - mock_convert.assert_called_once() - - def test_unsupported_type(self, tmp_path) -> None: - src = tmp_path / "test.txt" - src.write_text("hello") - - with pytest.raises(ValueError, match="Unsupported"): - convert_file(str(src), ".jpg") - - def test_ext_normalization(self, tmp_path) -> None: - src = tmp_path / "test.jpeg" - Image.new("RGB", (50, 50), "red").save(str(src), format="JPEG") - - dest = convert_file(str(src), "png") - assert dest.endswith(".png") - - -class TestConvertHelperFunctions: - def test_get_video_encoder_fallbacks(self, monkeypatch) -> None: - monkeypatch.setattr( - converter, "_is_torch_cuda_available", lambda: True - ) - monkeypatch.setattr(converter, "_ffmpeg_has_hwaccel", lambda x: True) - monkeypatch.setattr( - converter, "_ffmpeg_has_encoder", lambda e: e == "h264_nvenc" - ) - - encoder, hw, out = converter._get_video_encoder(".mp4") - assert encoder == "h264_nvenc" - assert hw is True - assert out == "mp4" - - monkeypatch.setattr(converter, "_ffmpeg_has_encoder", lambda e: False) - encoder, hw, out = converter._get_video_encoder(".avi") - assert encoder == "mpeg4" - assert hw is False - - monkeypatch.setattr( - converter, - "_ffmpeg_has_encoder", - lambda e: e in ("libsvtav1", "libaom-av1"), - ) - encoder, hw, out = converter._get_video_encoder(".webm-av1") - assert out == "webm" - assert encoder in ("libsvtav1", "libaom-av1", "libvpx-vp9") diff --git a/tests/converter/test_scanner.py b/tests/converter/test_scanner.py deleted file mode 100644 index 7590ee2..0000000 --- a/tests/converter/test_scanner.py +++ /dev/null @@ -1,75 +0,0 @@ -"""Tests for morphic.converter.scanner.""" - -from __future__ import annotations - -import pytest - -from morphic.converter.scanner import get_compatible_targets, scan_folder - - -@pytest.mark.parametrize( - "filter_type,expected_names", - [ - ( - "images", - {"photo.jpg", "image.png", "pic.tif", "deep.jpg"}, - ), - ("videos", {"clip.mp4", "movie.mov"}), - ], -) -def test_scan_folder_filter_types( - tmp_media, filter_type, expected_names -) -> None: - result = scan_folder(str(tmp_media), filter_type=filter_type) - names = {f["name"] for f in result["files"]} - assert expected_names <= names - - -def test_scan_folder_both_and_subfolder_control(tmp_media) -> None: - both = scan_folder(str(tmp_media), filter_type="both") - types = {f["type"] for f in both["files"]} - assert "image" in types - assert "video" in types - - without_sub = scan_folder(str(tmp_media), include_subfolders=False) - names = {f["name"] for f in without_sub["files"]} - assert "deep.jpg" not in names - - with_sub = scan_folder(str(tmp_media), include_subfolders=True) - names = {f["name"] for f in with_sub["files"]} - assert "deep.jpg" in names - - -def test_scan_folder_summary_and_meta(tmp_media) -> None: - result = scan_folder(str(tmp_media), filter_type="images") - assert ".jpg" in result["summary"] - assert result["summary"][".jpg"] >= 2 - - full = scan_folder(str(tmp_media)) - assert full["folder"] == str(tmp_media) - for f in full["files"]: - assert all( - k in f for k in ["path", "name", "ext", "size", "type", "targets"] - ) - names = {f["name"] for f in full["files"]} - assert "readme.txt" not in names - - -@pytest.mark.parametrize( - "filename,expected_substr", - [ - ("photo.jpg", ".png"), - ("clip.mp4", ".mov"), - ("file.xyz", ""), - ("photo.JPG", ""), - ], -) -def test_compatible_targets(filename, expected_substr) -> None: - targets = get_compatible_targets(filename) - if expected_substr: - assert expected_substr in targets - else: - if filename == "file.xyz": - assert targets == [] - else: - assert len(targets) > 0 diff --git a/tests/dupfinder/__init__.py b/tests/dupfinder/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/dupfinder/test_accelerator.py b/tests/dupfinder/test_accelerator.py deleted file mode 100644 index ae4ee42..0000000 --- a/tests/dupfinder/test_accelerator.py +++ /dev/null @@ -1,299 +0,0 @@ -"""Tests for morphic.dupfinder.accelerator.""" - -from __future__ import annotations - -from typing import cast -from unittest.mock import patch - -import numpy as np -import pytest - -from morphic.dupfinder.accelerator import ( - AcceleratorType, - GPUAccelerator, - compute_phash_gpu, - compute_similarity_matrix_gpu, - get_accelerator, -) - - -class TestGPUAcceleratorProperties: - def test_is_gpu_available_on_cpu(self) -> None: - acc = GPUAccelerator() - if acc.backend == AcceleratorType.CPU: - assert acc.is_gpu_available is False - else: - assert acc.is_gpu_available is True - - def test_get_backend_name(self) -> None: - acc = GPUAccelerator() - name = acc.get_backend_name() - assert isinstance(name, str) - assert len(name) > 0 - expected = { - "CUDA (NVIDIA GPU)", - "ROCm (AMD GPU)", - "OpenCL (GPU)", - "CPU Multiprocessing", - } - assert name in expected - - def test_num_cpus(self) -> None: - acc = GPUAccelerator() - assert acc.num_cpus >= 1 - - -class TestResizeImageBatch: - def test_empty_batch(self) -> None: - acc = GPUAccelerator() - result = acc.resize_image_batch([], (32, 32)) - assert result == [] - - def test_single_image(self) -> None: - acc = GPUAccelerator() - img = np.random.randint(0, 255, (100, 80, 3), dtype=np.uint8) - result = acc.resize_image_batch([img], (32, 32)) - assert len(result) == 1 - assert result[0].shape[0] == 32 - assert result[0].shape[1] == 32 - - def test_multiple_images(self) -> None: - acc = GPUAccelerator() - imgs = [ - np.random.randint(0, 255, (100, 80, 3), dtype=np.uint8) - for _ in range(3) - ] - result = acc.resize_image_batch(imgs, (64, 64)) - assert len(result) == 3 - for r in result: - assert r.shape[:2] == (64, 64) - - def test_grayscale_image(self) -> None: - acc = GPUAccelerator() - img = np.random.randint(0, 255, (100, 80), dtype=np.uint8) - result = acc.resize_image_batch([img], (32, 32)) - assert len(result) == 1 - - -class TestComputeDctBatch: - def test_single_image(self) -> None: - acc = GPUAccelerator() - img = np.random.rand(32, 32).astype(np.float32) - result = acc.compute_dct_batch([img]) - assert len(result) == 1 - assert result[0].shape == (32, 32) - - def test_multiple_images(self) -> None: - acc = GPUAccelerator() - imgs = [np.random.rand(16, 16).astype(np.float32) for _ in range(3)] - result = acc.compute_dct_batch(imgs) - assert len(result) == 3 - - -class TestComputeSimilarityMatrix: - def test_empty_hashes(self) -> None: - acc = GPUAccelerator() - result = acc.compute_similarity_matrix([]) - assert result.size == 0 - - def test_identical_hashes(self) -> None: - acc = GPUAccelerator() - h = np.array([1, 0, 1, 0, 1, 0, 1, 0], dtype=np.float32) - result = acc.compute_similarity_matrix([h, h]) - assert result.shape == (2, 2) - assert result[0, 1] == pytest.approx(1.0) - assert result[1, 0] == pytest.approx(1.0) - - def test_different_hashes(self) -> None: - acc = GPUAccelerator() - h1 = np.array([1, 0, 1, 0, 1, 0, 1, 0], dtype=np.float32) - h2 = np.array([0, 1, 0, 1, 0, 1, 0, 1], dtype=np.float32) - result = acc.compute_similarity_matrix([h1, h2]) - assert result.shape == (2, 2) - assert result[0, 1] < 0.5 - - -class TestBatchHammingDistance: - def test_identical_hashes(self) -> None: - acc = GPUAccelerator() - hashes = ["abcdef01", "abcdef01"] - result = acc.batch_hamming_distance(hashes, hashes) - assert result.shape == (2, 2) - assert result[0, 0] == pytest.approx(0.0) - assert result[1, 1] == pytest.approx(0.0) - - def test_different_hashes(self) -> None: - acc = GPUAccelerator() - h1 = ["ff000000"] - h2 = ["00ffffff"] - result = acc.batch_hamming_distance(h1, h2) - assert result.shape == (1, 1) - assert result[0, 0] > 0 - - -class TestGetAccelerator: - def test_returns_gpu_accelerator(self) -> None: - acc = get_accelerator() - assert isinstance(acc, GPUAccelerator) - - def test_returns_same_instance(self) -> None: - a1 = get_accelerator() - a2 = get_accelerator() - assert a1 is a2 - - -class TestComputePhashGpu: - def test_empty_list(self) -> None: - result = compute_phash_gpu([]) - assert result == [] - - def test_single_image(self) -> None: - img = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8) - result = compute_phash_gpu([img], hash_size=8) - assert len(result) == 1 - assert isinstance(result[0], np.ndarray) - - def test_grayscale_image(self) -> None: - img = np.random.randint(0, 255, (100, 100), dtype=np.uint8) - result = compute_phash_gpu([img], hash_size=8) - assert len(result) == 1 - - -class TestComputeSimilarityMatrixGpu: - def test_empty(self) -> None: - result = compute_similarity_matrix_gpu([]) - assert result.size == 0 - - def test_hex_strings(self) -> None: - hashes = ["abcdef0123456789", "abcdef0123456789"] - result = compute_similarity_matrix_gpu( - cast(list[str | np.ndarray], hashes), hash_size=4 - ) - assert result.shape == (2, 2) - assert result[0, 1] == pytest.approx(1.0) - - def test_numpy_arrays(self) -> None: - h1 = np.array([1, 0, 1, 0], dtype=np.uint8) - h2 = np.array([1, 0, 1, 0], dtype=np.uint8) - result = compute_similarity_matrix_gpu([h1, h2], hash_size=2) - assert result.shape == (2, 2) - - def test_invalid_hex_string(self) -> None: - """Non-hex strings should trigger ValueError and return zeros.""" - result = compute_similarity_matrix_gpu(["gg", "hh"], hash_size=2) - assert result.shape == (2, 2) - - def test_all_invalid_hex(self) -> None: - """All invalid hex strings should all map to zeros.""" - result = compute_similarity_matrix_gpu(["zz", "xx"], hash_size=2) - assert result.shape == (2, 2) - - -class TestAcceleratorCPUMethods: - def test_resize_batch_cpu_grayscale(self) -> None: - acc = GPUAccelerator() - img = np.random.randint(0, 255, (100, 80), dtype=np.uint8) - result = acc._resize_batch_cpu([img], (32, 32)) - assert len(result) == 1 - - def test_dct_batch_cpu_multiple(self) -> None: - acc = GPUAccelerator() - imgs = [np.random.rand(16, 16).astype(np.float32) for _ in range(5)] - result = acc._dct_batch_cpu(imgs) - assert len(result) == 5 - - def test_similarity_matrix_cpu(self) -> None: - acc = GPUAccelerator() - h1 = np.array([1, 0, 1, 0, 1, 0, 1, 0], dtype=np.float32) - h2 = np.array([0, 1, 0, 1, 0, 1, 0, 1], dtype=np.float32) - matrix = np.vstack([h1, h2]) - result = acc._similarity_matrix_cpu(matrix, 2) - assert result.shape == (2, 2) - - def test_batch_hamming_cpu(self) -> None: - acc = GPUAccelerator() - arr1 = np.array([[1, 0, 1, 0]], dtype=np.float32) - arr2 = np.array([[1, 0, 1, 0]], dtype=np.float32) - result = acc._batch_hamming_cpu(arr1, arr2) - assert result.shape == (1, 1) - assert result[0, 0] == pytest.approx(0.0) - - -class TestAcceleratorTorchPath: - def test_try_cuda_no_torch(self) -> None: - acc = GPUAccelerator() - original_backend = acc.backend - with patch.dict("sys.modules", {"torch": None}): - result = acc._try_cuda() - assert isinstance(result, bool) - acc.backend = original_backend - - def test_try_rocm_no_torch(self) -> None: - acc = GPUAccelerator() - with patch.dict("sys.modules", {"torch": None}): - result = acc._try_rocm() - assert result is False - - def test_try_opencl_no_pyopencl(self) -> None: - acc = GPUAccelerator() - with patch.dict("sys.modules", {"pyopencl": None}): - result = acc._try_opencl() - assert result is False - - def test_setup_cpu(self) -> None: - acc = GPUAccelerator() - acc._setup_cpu() - assert acc.backend == AcceleratorType.CPU - - -class TestAcceleratorBranchSelection: - def test_resize_routes_to_cpu_when_cpu_backend(self) -> None: - acc = GPUAccelerator() - original = acc.backend - acc.backend = AcceleratorType.CPU - acc._torch = None - acc._cp = None - - img = np.random.randint(0, 255, (50, 50, 3), dtype=np.uint8) - result = acc.resize_image_batch([img], (16, 16)) - assert len(result) == 1 - - acc.backend = original - - def test_dct_routes_to_cpu_when_cpu_backend(self) -> None: - acc = GPUAccelerator() - original = acc.backend - acc.backend = AcceleratorType.CPU - acc._torch = None - acc._cp = None - - img = np.random.rand(16, 16).astype(np.float32) - result = acc.compute_dct_batch([img]) - assert len(result) == 1 - - acc.backend = original - - def test_similarity_routes_to_cpu_when_cpu_backend(self) -> None: - acc = GPUAccelerator() - original = acc.backend - acc.backend = AcceleratorType.CPU - acc._torch = None - acc._cp = None - - h = np.array([1, 0, 1, 0], dtype=np.float32) - result = acc.compute_similarity_matrix([h, h]) - assert result.shape == (2, 2) - - acc.backend = original - - def test_hamming_routes_to_cpu_when_cpu_backend(self) -> None: - acc = GPUAccelerator() - original = acc.backend - acc.backend = AcceleratorType.CPU - acc._torch = None - acc._cp = None - - result = acc.batch_hamming_distance(["abcd"], ["abcd"]) - assert result.shape == (1, 1) - - acc.backend = original diff --git a/tests/dupfinder/test_images.py b/tests/dupfinder/test_images.py deleted file mode 100644 index 594b7d8..0000000 --- a/tests/dupfinder/test_images.py +++ /dev/null @@ -1,401 +0,0 @@ -"""Tests for morphic.dupfinder.images.""" - -from __future__ import annotations - -import os -from unittest.mock import patch - -import numpy as np -import pytest -from PIL import Image - -from morphic.dupfinder.images import ( - ImageDuplicateFinder, - ImageHasher, - ImageInfo, -) - - -# ── ImageInfo ────────────────────────────────────────────────────────────── - - -class TestImageInfoToDict: - def test_to_dict_keys(self) -> None: - info = ImageInfo( - path="/img.jpg", - width=100, - height=200, - file_size=999, - format="JPEG", - mode="RGB", - phash="abc", - ahash="def", - dhash="ghi", - ) - d = info.to_dict() - assert d["path"] == "/img.jpg" - assert d["width"] == 100 - assert d["height"] == 200 - assert d["file_size"] == 999 - assert d["format"] == "JPEG" - assert d["mode"] == "RGB" - assert d["phash"] == "abc" - assert d["ahash"] == "def" - assert d["dhash"] == "ghi" - - def test_to_dict_none_hashes(self) -> None: - info = ImageInfo(path="/x.jpg") - d = info.to_dict() - assert d["phash"] is None - assert d["ahash"] is None - assert d["dhash"] is None - - def test_defaults(self) -> None: - info = ImageInfo(path="/test.jpg") - assert info.path == "/test.jpg" - assert info.width == 0 - assert info.height == 0 - assert info.file_size == 0 - assert info.phash is None - - def test_custom_values(self) -> None: - info = ImageInfo( - path="/a.jpg", - width=1920, - height=1080, - format="JPEG", - file_size=5000, - phash="abc123", - ) - assert info.width == 1920 - assert info.format == "JPEG" - assert info.phash == "abc123" - - -# ── ImageHasher ──────────────────────────────────────────────────────────── - - -class TestImageHasher: - def test_default_hash_size(self) -> None: - hasher = ImageHasher() - assert hasher.hash_size == 16 - - def test_custom_hash_size(self) -> None: - hasher = ImageHasher(hash_size=8) - assert hasher.hash_size == 8 - - def test_compute_hashes_valid_image(self, tmp_path) -> None: - img_path = tmp_path / "test.jpg" - Image.new("RGB", (100, 100), "red").save(str(img_path)) - - hasher = ImageHasher(hash_size=8) - info = hasher.compute_hashes(str(img_path)) - - assert info.path == str(img_path) - assert info.width == 100 - assert info.height == 100 - assert info.file_size > 0 - assert info.format == "JPEG" - assert info.mode == "RGB" - assert info.phash is not None - assert info.ahash is not None - assert info.dhash is not None - assert info.whash is not None - - def test_compute_hashes_rgba_image(self, tmp_path) -> None: - img_path = tmp_path / "rgba.png" - Image.new("RGBA", (50, 50), (255, 0, 0, 128)).save(str(img_path)) - - hasher = ImageHasher(hash_size=8) - info = hasher.compute_hashes(str(img_path)) - assert info.phash is not None - - def test_compute_hashes_palette_image(self, tmp_path) -> None: - img_path = tmp_path / "palette.png" - Image.new("P", (50, 50)).save(str(img_path)) - - hasher = ImageHasher(hash_size=8) - info = hasher.compute_hashes(str(img_path)) - assert info.phash is not None - - def test_compute_hashes_grayscale(self, tmp_path) -> None: - img_path = tmp_path / "gray.png" - Image.new("L", (50, 50), 128).save(str(img_path)) - - hasher = ImageHasher(hash_size=8) - info = hasher.compute_hashes(str(img_path)) - assert info.phash is not None - assert info.mode == "L" - - def test_compute_hashes_nonexistent(self) -> None: - hasher = ImageHasher(hash_size=8) - info = hasher.compute_hashes("/nonexistent/file.jpg") - assert info.phash is None - assert info.file_size == 0 - - def test_compute_hashes_corrupt_file(self, tmp_path) -> None: - img_path = tmp_path / "corrupt.jpg" - img_path.write_bytes(b"not an image") - - hasher = ImageHasher(hash_size=8) - info = hasher.compute_hashes(str(img_path)) - assert info.phash is None - - def test_cmyk_image(self, tmp_path) -> None: - img_path = tmp_path / "cmyk.jpg" - img = Image.new("CMYK", (50, 50), (0, 0, 0, 0)) - img.save(str(img_path)) - - hasher = ImageHasher(hash_size=8) - info = hasher.compute_hashes(str(img_path)) - assert info.phash is not None - - -# ── ImageDuplicateFinder ─────────────────────────────────────────────────── - - -class TestImageDuplicateFinder: - def test_init_defaults(self) -> None: - finder = ImageDuplicateFinder(use_gpu=False) - assert finder.similarity_threshold == 0.90 - assert finder.hash_type == "combined" - assert finder.use_gpu is False - - def test_find_images(self, tmp_path) -> None: - (tmp_path / "a.jpg").write_bytes(b"\xff\xd8\xff\xe0") - Image.new("RGB", (10, 10), "red").save(str(tmp_path / "b.png")) - (tmp_path / "c.txt").write_text("hello") - - finder = ImageDuplicateFinder(use_gpu=False) - files = finder.find_images(str(tmp_path)) - exts = {os.path.splitext(f)[1].lower() for f in files} - assert ".txt" not in exts - - def test_process_images(self, tmp_path) -> None: - for name in ["a.jpg", "b.jpg", "c.jpg"]: - Image.new("RGB", (50, 50), "red").save(str(tmp_path / name)) - - finder = ImageDuplicateFinder(use_gpu=False, hash_size=8) - files = [str(tmp_path / n) for n in ["a.jpg", "b.jpg", "c.jpg"]] - infos = finder.process_images(files) - assert len(infos) == 3 - for info in infos.values(): - assert info.phash is not None - - def test_compute_similarity_identical(self, tmp_path) -> None: - img_path = tmp_path / "test.jpg" - Image.new("RGB", (50, 50), "red").save(str(img_path)) - - finder = ImageDuplicateFinder(use_gpu=False, hash_size=8) - info = finder.hasher.compute_hashes(str(img_path)) - similarity = finder.compute_similarity(info, info) - assert similarity == pytest.approx(1.0) - - def test_compute_similarity_different(self, tmp_path) -> None: - img1_path = tmp_path / "red.jpg" - img2_path = tmp_path / "noise.jpg" - Image.new("RGB", (50, 50), "red").save(str(img1_path)) - noise_arr = np.random.randint(0, 255, (50, 50, 3), dtype=np.uint8) - Image.fromarray(noise_arr).save(str(img2_path)) - - finder = ImageDuplicateFinder(use_gpu=False, hash_size=8) - info1 = finder.hasher.compute_hashes(str(img1_path)) - info2 = finder.hasher.compute_hashes(str(img2_path)) - similarity = finder.compute_similarity(info1, info2) - assert 0.0 <= similarity <= 1.0 - - def test_compute_similarity_no_hashes(self) -> None: - finder = ImageDuplicateFinder(use_gpu=False, hash_size=8) - info1 = ImageInfo(path="/a.jpg") - info2 = ImageInfo(path="/b.jpg") - assert finder.compute_similarity(info1, info2) == 0.0 - - def test_compute_similarity_phash_only(self, tmp_path) -> None: - img_path = tmp_path / "test.jpg" - Image.new("RGB", (50, 50), "red").save(str(img_path)) - - finder = ImageDuplicateFinder( - use_gpu=False, - hash_size=8, - hash_type="phash", - ) - info = finder.hasher.compute_hashes(str(img_path)) - similarity = finder.compute_similarity(info, info) - assert similarity == pytest.approx(1.0) - - def test_compute_similarity_ahash_only(self, tmp_path) -> None: - img_path = tmp_path / "test.jpg" - Image.new("RGB", (50, 50), "red").save(str(img_path)) - - finder = ImageDuplicateFinder( - use_gpu=False, - hash_size=8, - hash_type="ahash", - ) - info = finder.hasher.compute_hashes(str(img_path)) - similarity = finder.compute_similarity(info, info) - assert similarity == pytest.approx(1.0) - - def test_compute_similarity_dhash_only(self, tmp_path) -> None: - img_path = tmp_path / "test.jpg" - Image.new("RGB", (50, 50), "red").save(str(img_path)) - - finder = ImageDuplicateFinder( - use_gpu=False, - hash_size=8, - hash_type="dhash", - ) - info = finder.hasher.compute_hashes(str(img_path)) - similarity = finder.compute_similarity(info, info) - assert similarity == pytest.approx(1.0) - - def test_compute_similarity_whash_only(self, tmp_path) -> None: - img_path = tmp_path / "test.jpg" - Image.new("RGB", (50, 50), "red").save(str(img_path)) - - finder = ImageDuplicateFinder( - use_gpu=False, - hash_size=8, - hash_type="whash", - ) - info = finder.hasher.compute_hashes(str(img_path)) - similarity = finder.compute_similarity(info, info) - assert similarity == pytest.approx(1.0) - - def test_find_duplicates_identical_images(self, tmp_path) -> None: - for name in ["a.jpg", "b.jpg", "c.jpg"]: - Image.new("RGB", (50, 50), "red").save(str(tmp_path / name)) - - finder = ImageDuplicateFinder( - use_gpu=False, - hash_size=8, - similarity_threshold=0.9, - ) - files = [str(tmp_path / n) for n in ["a.jpg", "b.jpg", "c.jpg"]] - finder.process_images(files) - groups = finder.find_duplicates() - assert len(groups) >= 1 - - def test_find_duplicates_fast_identical(self, tmp_path) -> None: - for name in ["a.jpg", "b.jpg"]: - Image.new("RGB", (50, 50), "red").save(str(tmp_path / name)) - - finder = ImageDuplicateFinder( - use_gpu=False, - hash_size=8, - similarity_threshold=0.9, - ) - files = [str(tmp_path / n) for n in ["a.jpg", "b.jpg"]] - finder.process_images(files) - groups = finder.find_duplicates_fast() - assert len(groups) >= 1 - - def test_find_duplicates_no_images(self) -> None: - finder = ImageDuplicateFinder(use_gpu=False, hash_size=8) - groups = finder.find_duplicates() - assert groups == [] - - def test_find_near_duplicates_empty(self) -> None: - finder = ImageDuplicateFinder(use_gpu=False, hash_size=8) - groups = finder._find_near_duplicates([]) - assert groups == [] - - def test_find_duplicates_fast_with_near_dups(self, tmp_path) -> None: - img1_path = tmp_path / "a.jpg" - img2_path = tmp_path / "b.jpg" - - arr = np.ones((50, 50, 3), dtype=np.uint8) * 128 - Image.fromarray(arr).save(str(img1_path)) - arr[25, 25] = [255, 0, 0] - Image.fromarray(arr).save(str(img2_path)) - - finder = ImageDuplicateFinder( - use_gpu=False, - hash_size=8, - similarity_threshold=0.8, - ) - files = [str(img1_path), str(img2_path)] - finder.process_images(files) - groups = finder.find_duplicates_fast() - assert isinstance(groups, list) - - @patch("morphic.dupfinder.images._compute_similarity_matrix_gpu") - @patch("morphic.dupfinder.images._gpu_available", True) - def test_find_near_duplicates_gpu(self, mock_sim, tmp_path) -> None: - for name in ["a.jpg", "b.jpg", "c.jpg"]: - Image.new("RGB", (50, 50), "red").save(str(tmp_path / name)) - - finder = ImageDuplicateFinder( - use_gpu=False, - hash_size=8, - similarity_threshold=0.9, - ) - files = [str(tmp_path / n) for n in ["a.jpg", "b.jpg", "c.jpg"]] - finder.process_images(files) - finder.use_gpu = True - - n = len(finder.image_infos) - sim_matrix = np.ones((n, n), dtype=np.float32) - mock_sim.return_value = sim_matrix - - paths = list(finder.image_infos.keys()) - result = finder._find_near_duplicates_gpu(paths) - assert isinstance(result, list) - - @patch("morphic.dupfinder.images._compute_similarity_matrix_gpu") - @patch("morphic.dupfinder.images._gpu_available", True) - def test_find_near_duplicates_gpu_fallback( - self, mock_sim, tmp_path - ) -> None: - for name in ["a.jpg", "b.jpg"]: - Image.new("RGB", (50, 50), "red").save(str(tmp_path / name)) - - finder = ImageDuplicateFinder( - use_gpu=False, - hash_size=8, - similarity_threshold=0.9, - ) - files = [str(tmp_path / n) for n in ["a.jpg", "b.jpg"]] - finder.process_images(files) - finder.use_gpu = True - - mock_sim.side_effect = RuntimeError("GPU failed") - paths = list(finder.image_infos.keys()) - result = finder._find_near_duplicates_gpu(paths) - assert isinstance(result, list) - - def test_find_near_duplicates_gpu_empty(self) -> None: - finder = ImageDuplicateFinder(use_gpu=False, hash_size=8) - result = finder._find_near_duplicates_gpu([]) - assert result == [] - - def test_find_duplicates_uses_fast_for_large(self, tmp_path) -> None: - finder = ImageDuplicateFinder(use_gpu=False, hash_size=8) - for i in range(105): - path = f"/fake/img_{i}.jpg" - finder.image_infos[path] = ImageInfo( - path=path, - phash=f"hash{i:04d}", - ahash=f"ahash{i:04d}", - dhash=f"dhash{i:04d}", - ) - - with patch.object( - finder, "find_duplicates_fast", return_value=[] - ) as mock: - finder.find_duplicates() - mock.assert_called_once() - - def test_find_duplicates_regular_path(self, tmp_path) -> None: - for name in ["a.jpg", "b.jpg"]: - Image.new("RGB", (50, 50), "red").save(str(tmp_path / name)) - - finder = ImageDuplicateFinder( - use_gpu=False, - hash_size=8, - similarity_threshold=0.9, - ) - files = [str(tmp_path / n) for n in ["a.jpg", "b.jpg"]] - finder.process_images(files) - groups = finder.find_duplicates() - assert isinstance(groups, list) diff --git a/tests/dupfinder/test_scanner.py b/tests/dupfinder/test_scanner.py deleted file mode 100644 index 3615fb9..0000000 --- a/tests/dupfinder/test_scanner.py +++ /dev/null @@ -1,382 +0,0 @@ -"""Tests for morphic.dupfinder.scanner.""" - -from __future__ import annotations - -import time - -from PIL import Image - -from morphic.dupfinder.images import ImageInfo -from morphic.dupfinder.scanner import ( - ScanJob, - _calculate_space_savings, - _format_image_groups, - _format_video_groups, - _run_scan, - get_job, - start_job, -) -from morphic.dupfinder.videos import VideoInfo - - -# ── ScanJob ──────────────────────────────────────────────────────────────── - - -class TestScanJob: - def test_defaults(self) -> None: - job = ScanJob(id="test", folder="/tmp", scan_type="both") - assert job.status == "pending" - assert job.progress == 0.0 - assert job.image_groups == [] - assert job.video_groups == [] - - def test_custom_thresholds(self) -> None: - job = ScanJob( - id="test", - folder="/tmp", - scan_type="images", - image_threshold=0.95, - video_threshold=0.80, - ) - assert job.image_threshold == 0.95 - assert job.video_threshold == 0.80 - - -# ── get_job / start_job ──────────────────────────────────────────────────── - - -class TestGetJob: - def test_nonexistent_job(self) -> None: - result = get_job("nonexistent-id-xyz") - assert result is None - - -class TestStartJob: - def test_returns_job_id(self, tmp_path) -> None: - job_id = start_job( - folder=str(tmp_path), - scan_type="images", - ) - assert isinstance(job_id, str) - assert len(job_id) == 8 - - def test_job_is_retrievable(self, tmp_path) -> None: - job_id = start_job( - folder=str(tmp_path), - scan_type="images", - ) - time.sleep(0.2) - job = get_job(job_id) - assert job is not None - assert job.folder == str(tmp_path) - assert job.scan_type == "images" - - def test_custom_thresholds(self, tmp_path) -> None: - job_id = start_job( - folder=str(tmp_path), - scan_type="both", - image_threshold=0.95, - video_threshold=0.80, - ) - job = get_job(job_id) - assert job is not None - assert job.image_threshold == 0.95 - assert job.video_threshold == 0.80 - - -# ── _run_scan ────────────────────────────────────────────────────────────── - - -class TestRunScan: - def test_scan_empty_folder(self, tmp_path) -> None: - job = ScanJob( - id="test-empty", - folder=str(tmp_path), - scan_type="images", - ) - _run_scan(job) - assert job.status == "done" - assert job.progress == 1.0 - assert len(job.image_groups) == 0 - assert job.finished_at > 0 - - def test_scan_videos_type(self, tmp_path) -> None: - job = ScanJob( - id="test-vid", - folder=str(tmp_path), - scan_type="videos", - ) - _run_scan(job) - assert job.status == "done" - - def test_scan_both_type(self, tmp_path) -> None: - job = ScanJob( - id="test-both", - folder=str(tmp_path), - scan_type="both", - ) - _run_scan(job) - assert job.status == "done" - assert "Done!" in job.message - - def test_scan_nonexistent_folder(self) -> None: - job = ScanJob( - id="test-bad", - folder="/nonexistent_folder_xyz", - scan_type="images", - ) - _run_scan(job) - assert job.status in ("done", "error") - - def test_scan_folder_with_images(self, tmp_path) -> None: - for name in ["a.jpg", "b.jpg", "c.jpg"]: - Image.new("RGB", (50, 50), "red").save(str(tmp_path / name)) - - job = ScanJob( - id="test-imgs", - folder=str(tmp_path), - scan_type="images", - ) - _run_scan(job) - - assert job.status == "done" - assert job.total_files_found >= 3 - assert job.total_files_processed >= 3 - assert job.finished_at > job.started_at - assert "Done!" in job.message - - def test_scan_folder_with_duplicates(self, tmp_path) -> None: - for name in ["a.jpg", "b.jpg"]: - Image.new("RGB", (50, 50), "red").save(str(tmp_path / name)) - - job = ScanJob( - id="test-dups", - folder=str(tmp_path), - scan_type="images", - image_threshold=0.9, - ) - _run_scan(job) - - assert job.status == "done" - assert len(job.image_groups) >= 1 - - def test_scan_both_image_and_video(self, tmp_path) -> None: - Image.new("RGB", (50, 50), "red").save(str(tmp_path / "img.jpg")) - (tmp_path / "vid.mp4").write_bytes(b"\x00" * 100) - - job = ScanJob( - id="test-both", - folder=str(tmp_path), - scan_type="both", - ) - _run_scan(job) - - assert job.status == "done" - assert job.progress == 1.0 - - def test_scan_progress_tracking(self, tmp_path) -> None: - Image.new("RGB", (10, 10), "red").save(str(tmp_path / "a.jpg")) - - job = ScanJob( - id="test-prog", - folder=str(tmp_path), - scan_type="images", - ) - _run_scan(job) - - assert job.progress == 1.0 - assert job.total_files_found >= 1 - assert job.space_savings >= 0 - - def test_scan_message_updates(self, tmp_path) -> None: - for i in range(3): - Image.new("RGB", (50, 50), "blue").save( - str(tmp_path / f"img{i}.jpg"), - ) - - job = ScanJob( - id="test-msg", - folder=str(tmp_path), - scan_type="images", - ) - _run_scan(job) - - assert "Done!" in job.message or "Error" in job.message - - -# ── _format_image_groups ─────────────────────────────────────────────────── - - -class TestFormatImageGroups: - def test_empty_groups(self) -> None: - result = _format_image_groups([], {}) - assert result == [] - - def test_formats_group(self) -> None: - infos = { - "/a.jpg": ImageInfo( - path="/a.jpg", - width=1920, - height=1080, - format="JPEG", - file_size=100000, - ), - "/b.jpg": ImageInfo( - path="/b.jpg", - width=1920, - height=1080, - format="JPEG", - file_size=50000, - ), - } - groups = [[("/a.jpg", 1.0), ("/b.jpg", 0.95)]] - result = _format_image_groups(groups, infos) - assert len(result) == 1 - assert len(result[0]) == 2 - assert result[0][0]["path"] == "/a.jpg" - assert result[0][0]["type"] == "image" - assert result[0][1]["similarity"] == 95.0 - - def test_single_item_group_filtered(self) -> None: - infos = { - "/a.jpg": ImageInfo( - path="/a.jpg", - width=100, - height=100, - format="JPEG", - file_size=1000, - ), - } - groups = [[("/a.jpg", 1.0)]] - result = _format_image_groups(groups, infos) - assert result == [] - - def test_missing_info_skipped(self) -> None: - infos = { - "/a.jpg": ImageInfo( - path="/a.jpg", - width=100, - height=100, - format="JPEG", - file_size=1000, - ), - } - groups = [[("/a.jpg", 1.0), ("/b.jpg", 0.95)]] - result = _format_image_groups(groups, infos) - assert result == [] - - def test_three_items(self) -> None: - infos = { - f"/{n}.jpg": ImageInfo( - path=f"/{n}.jpg", - width=100, - height=100, - format="JPEG", - file_size=i * 1000, - ) - for i, n in enumerate(["a", "b", "c"], 1) - } - groups = [[("/a.jpg", 1.0), ("/b.jpg", 0.95), ("/c.jpg", 0.92)]] - result = _format_image_groups(groups, infos) - assert len(result) == 1 - assert len(result[0]) == 3 - - -# ── _format_video_groups ─────────────────────────────────────────────────── - - -class TestFormatVideoGroups: - def test_empty_groups(self) -> None: - result = _format_video_groups([], {}) - assert result == [] - - def test_formats_group(self) -> None: - infos = { - "/a.mp4": VideoInfo( - path="/a.mp4", - width=1920, - height=1080, - duration=120.0, - fps=30.0, - file_size=5000000, - ), - "/b.mp4": VideoInfo( - path="/b.mp4", - width=1920, - height=1080, - duration=120.0, - fps=30.0, - file_size=3000000, - ), - } - groups = [[("/a.mp4", 1.0), ("/b.mp4", 0.88)]] - result = _format_video_groups(groups, infos) - assert len(result) == 1 - assert len(result[0]) == 2 - assert result[0][0]["path"] == "/a.mp4" - assert result[0][0]["type"] == "video" - assert "duration_formatted" in result[0][0] - - def test_missing_info_skipped(self) -> None: - infos = { - "/a.mp4": VideoInfo( - path="/a.mp4", - width=1920, - height=1080, - duration=60.0, - fps=30.0, - file_size=5000000, - ), - } - groups = [[("/a.mp4", 1.0), ("/b.mp4", 0.88)]] - result = _format_video_groups(groups, infos) - assert result == [] - - -# ── _calculate_space_savings ─────────────────────────────────────────────── - - -class TestCalculateSpaceSavings: - def test_no_groups(self) -> None: - job = ScanJob(id="x", folder="/tmp", scan_type="both") - assert _calculate_space_savings(job) == 0 - - def test_with_groups(self) -> None: - job = ScanJob(id="x", folder="/tmp", scan_type="both") - job.image_groups = [ - [ - {"file_size": 100000, "path": "/a.jpg"}, - {"file_size": 50000, "path": "/b.jpg"}, - ] - ] - savings = _calculate_space_savings(job) - assert savings == 50000 - - def test_multiple_groups(self) -> None: - job = ScanJob(id="x", folder="/tmp", scan_type="both") - job.image_groups = [ - [ - {"file_size": 10000, "path": "/a.jpg"}, - {"file_size": 5000, "path": "/b.jpg"}, - ], - ] - job.video_groups = [ - [ - {"file_size": 20000, "path": "/a.mp4"}, - {"file_size": 15000, "path": "/b.mp4"}, - ], - ] - savings = _calculate_space_savings(job) - assert savings == 5000 + 15000 - - def test_three_files_in_group(self) -> None: - job = ScanJob(id="x", folder="/tmp", scan_type="both") - job.image_groups = [ - [ - {"file_size": 10000, "path": "/a.jpg"}, - {"file_size": 8000, "path": "/b.jpg"}, - {"file_size": 5000, "path": "/c.jpg"}, - ], - ] - savings = _calculate_space_savings(job) - assert savings == 13000 diff --git a/tests/dupfinder/test_videos.py b/tests/dupfinder/test_videos.py deleted file mode 100644 index c2ac8eb..0000000 --- a/tests/dupfinder/test_videos.py +++ /dev/null @@ -1,553 +0,0 @@ -"""Tests for morphic.dupfinder.videos.""" - -from __future__ import annotations - -import os -from contextlib import contextmanager -from unittest.mock import MagicMock, patch - -import numpy as np -import pytest - -from morphic.dupfinder.videos import ( - VideoDuplicateFinder, - VideoHasher, - VideoInfo, -) - - -# ── VideoInfo ────────────────────────────────────────────────────────────── - - -class TestVideoInfoToDict: - def test_to_dict_keys(self) -> None: - info = VideoInfo( - path="/v.mp4", - duration=120.5, - fps=30.0, - frame_count=3615, - width=1920, - height=1080, - file_size=50000000, - average_hash="abc123", - ) - d = info.to_dict() - assert d["path"] == "/v.mp4" - assert d["duration"] == 120.5 - assert d["fps"] == 30.0 - assert d["frame_count"] == 3615 - assert d["width"] == 1920 - assert d["height"] == 1080 - assert d["file_size"] == 50000000 - assert d["average_hash"] == "abc123" - - def test_to_dict_defaults(self) -> None: - info = VideoInfo(path="/x.avi") - d = info.to_dict() - assert d["duration"] == 0.0 - assert d["average_hash"] is None - - def test_frame_hashes_default_empty(self) -> None: - info = VideoInfo(path="/v.mp4") - assert info.frame_hashes == [] - - def test_defaults(self) -> None: - info = VideoInfo(path="/test.mp4") - assert info.path == "/test.mp4" - assert info.width == 0 - assert info.height == 0 - assert info.duration == 0.0 - assert info.fps == 0.0 - - def test_custom_values(self) -> None: - info = VideoInfo( - path="/v.mp4", - width=3840, - height=2160, - duration=120.5, - fps=60.0, - file_size=10000000, - ) - assert info.duration == 120.5 - assert info.fps == 60.0 - - -# ── VideoHasher ──────────────────────────────────────────────────────────── - - -class TestVideoHasher: - def test_default_params(self) -> None: - hasher = VideoHasher() - assert hasher.num_frames == 10 - assert hasher.hash_size == 16 - - def test_custom_params(self) -> None: - hasher = VideoHasher(num_frames=5, hash_size=8) - assert hasher.num_frames == 5 - assert hasher.hash_size == 8 - - def test_compute_frame_hash(self) -> None: - hasher = VideoHasher(hash_size=8) - frame = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8) - result = hasher.compute_frame_hash(frame) - assert isinstance(result, str) - assert len(result) > 0 - - def test_compute_frame_hash_invalid(self) -> None: - hasher = VideoHasher(hash_size=8) - result = hasher.compute_frame_hash(np.array([])) - assert result == "" or isinstance(result, str) - - def test_extract_frames_nonexistent(self) -> None: - hasher = VideoHasher(hash_size=8) - frames, info = hasher.extract_frames("/nonexistent/video.mp4") - assert frames == [] - assert info.path == "/nonexistent/video.mp4" - - def test_compute_video_hashes_nonexistent(self) -> None: - hasher = VideoHasher(hash_size=8) - info = hasher.compute_video_hashes("/nonexistent/video.mp4") - assert info.frame_hashes == [] - assert info.average_hash is None - - def test_compute_video_hashes_with_mock_frames(self) -> None: - hasher = VideoHasher(num_frames=3, hash_size=8) - frames = [ - np.random.randint(0, 255, (64, 64, 3), dtype=np.uint8) - for _ in range(3) - ] - hashes = [hasher.compute_frame_hash(f) for f in frames] - assert all(isinstance(h, str) and len(h) > 0 for h in hashes) - - def test_manual_frame_hash_building(self) -> None: - hasher = VideoHasher(num_frames=3, hash_size=8) - frames = [ - np.random.randint(0, 255, (64, 64, 3), dtype=np.uint8) - for _ in range(3) - ] - info = VideoInfo(path="/test.mp4", file_size=1000) - for frame in frames: - h = hasher.compute_frame_hash(frame) - if h: - info.frame_hashes.append(h) - assert len(info.frame_hashes) == 3 - - -# ── VideoDuplicateFinder ─────────────────────────────────────────────────── - - -class TestVideoDuplicateFinder: - def test_init_defaults(self) -> None: - finder = VideoDuplicateFinder(use_gpu=False) - assert finder.similarity_threshold == 0.85 - assert finder.use_gpu is False - - def test_find_videos(self, tmp_path) -> None: - (tmp_path / "a.mp4").write_bytes(b"\x00" * 100) - (tmp_path / "b.avi").write_bytes(b"\x00" * 100) - (tmp_path / "c.txt").write_text("hello") - - finder = VideoDuplicateFinder(use_gpu=False) - files = finder.find_videos(str(tmp_path)) - exts = {os.path.splitext(f)[1].lower() for f in files} - assert ".txt" not in exts - - def test_find_videos_empty_folder(self, tmp_path) -> None: - finder = VideoDuplicateFinder(use_gpu=False) - result = finder.find_videos(str(tmp_path)) - assert result == [] - - def test_compute_similarity_no_hashes(self) -> None: - finder = VideoDuplicateFinder(use_gpu=False) - info1 = VideoInfo(path="/a.mp4") - info2 = VideoInfo(path="/b.mp4") - assert finder.compute_similarity(info1, info2) == 0.0 - - def test_compute_similarity_identical_hashes(self) -> None: - finder = VideoDuplicateFinder(use_gpu=False, hash_size=8) - hasher = VideoHasher(hash_size=8) - - frame = np.zeros((100, 100, 3), dtype=np.uint8) - h = hasher.compute_frame_hash(frame) - - info1 = VideoInfo(path="/a.mp4", frame_hashes=[h, h]) - info2 = VideoInfo(path="/b.mp4", frame_hashes=[h, h]) - similarity = finder.compute_similarity(info1, info2) - assert similarity == pytest.approx(1.0) - - def test_compute_similarity_different_frames(self) -> None: - hasher = VideoHasher(hash_size=8) - finder = VideoDuplicateFinder(use_gpu=False, hash_size=8) - - frame1 = np.zeros((64, 64, 3), dtype=np.uint8) - frame2 = np.ones((64, 64, 3), dtype=np.uint8) * 255 - h1 = hasher.compute_frame_hash(frame1) - h2 = hasher.compute_frame_hash(frame2) - - info1 = VideoInfo(path="/a.mp4", frame_hashes=[h1]) - info2 = VideoInfo(path="/b.mp4", frame_hashes=[h2]) - - sim = finder.compute_similarity(info1, info2) - assert 0.0 <= sim <= 1.0 - - def test_find_duplicates_empty(self) -> None: - finder = VideoDuplicateFinder(use_gpu=False) - groups = finder.find_duplicates() - assert groups == [] - - def test_find_duplicates_cpu_with_infos(self) -> None: - finder = VideoDuplicateFinder(use_gpu=False, hash_size=8) - hasher = VideoHasher(hash_size=8) - - frame = np.zeros((100, 100, 3), dtype=np.uint8) - h = hasher.compute_frame_hash(frame) - - finder.video_infos = { - "/a.mp4": VideoInfo( - path="/a.mp4", - frame_hashes=[h], - file_size=1000, - ), - "/b.mp4": VideoInfo( - path="/b.mp4", - frame_hashes=[h], - file_size=1000, - ), - } - groups = finder.find_duplicates() - assert len(groups) >= 1 - - def test_find_duplicates_cpu(self) -> None: - hasher = VideoHasher(hash_size=8) - finder = VideoDuplicateFinder( - use_gpu=False, - hash_size=8, - similarity_threshold=0.9, - ) - - frame = np.zeros((64, 64, 3), dtype=np.uint8) - h = hasher.compute_frame_hash(frame) - - finder.video_infos = { - "/a.mp4": VideoInfo( - path="/a.mp4", - frame_hashes=[h], - file_size=1000, - ), - "/b.mp4": VideoInfo( - path="/b.mp4", - frame_hashes=[h], - file_size=1000, - ), - "/c.mp4": VideoInfo( - path="/c.mp4", - frame_hashes=[h], - file_size=1000, - ), - } - - groups = finder._find_duplicates_cpu(list(finder.video_infos.keys())) - assert len(groups) >= 1 - - def test_find_duplicates_cpu_no_match(self) -> None: - hasher = VideoHasher(hash_size=8) - finder = VideoDuplicateFinder( - use_gpu=False, - hash_size=8, - similarity_threshold=0.99, - ) - - frames = [ - np.random.randint(0, 255, (64, 64, 3), dtype=np.uint8) - for _ in range(3) - ] - - for i, frame in enumerate(frames): - h = hasher.compute_frame_hash(frame) - finder.video_infos[f"/v{i}.mp4"] = VideoInfo( - path=f"/v{i}.mp4", - frame_hashes=[h], - file_size=1000, - ) - - groups = finder._find_duplicates_cpu(list(finder.video_infos.keys())) - assert isinstance(groups, list) - - def test_process_videos_empty(self) -> None: - finder = VideoDuplicateFinder(use_gpu=False) - result = finder.process_videos([]) - assert result == {} - - @patch("morphic.dupfinder.videos._compute_similarity_matrix_gpu") - @patch("morphic.dupfinder.videos._gpu_available", True) - def test_find_duplicates_gpu_path(self, mock_sim) -> None: - hasher = VideoHasher(hash_size=8) - finder = VideoDuplicateFinder(use_gpu=False, hash_size=8) - - frame = np.zeros((64, 64, 3), dtype=np.uint8) - h = hasher.compute_frame_hash(frame) - - finder.video_infos = { - "/a.mp4": VideoInfo(path="/a.mp4", frame_hashes=[h]), - "/b.mp4": VideoInfo(path="/b.mp4", frame_hashes=[h]), - } - - finder.use_gpu = True - sim_matrix = np.ones((2, 2), dtype=np.float32) - mock_sim.return_value = sim_matrix - - result = finder._find_duplicates_gpu(list(finder.video_infos.keys())) - assert isinstance(result, list) - - @patch("morphic.dupfinder.videos._compute_similarity_matrix_gpu") - @patch("morphic.dupfinder.videos._gpu_available", True) - def test_find_duplicates_gpu_fallback(self, mock_sim) -> None: - hasher = VideoHasher(hash_size=8) - finder = VideoDuplicateFinder(use_gpu=False, hash_size=8) - - frame = np.zeros((64, 64, 3), dtype=np.uint8) - h = hasher.compute_frame_hash(frame) - - finder.video_infos = { - "/a.mp4": VideoInfo(path="/a.mp4", frame_hashes=[h]), - "/b.mp4": VideoInfo(path="/b.mp4", frame_hashes=[h]), - } - finder.use_gpu = True - - mock_sim.side_effect = RuntimeError("GPU failed") - result = finder._find_duplicates_gpu(list(finder.video_infos.keys())) - assert isinstance(result, list) - - -# ── Video extraction (cv2) ───────────────────────────────────────────────── - - -@contextmanager -def _noop_ctx(): - yield - - -_PATCH_GETSIZE = patch( - "morphic.dupfinder.videos.os.path.getsize", return_value=1024 -) -_PATCH_SUPPRESS = patch( - "morphic.dupfinder.videos.suppress_stderr", - side_effect=_noop_ctx, -) - - -class TestVideoExtraction: - @_PATCH_SUPPRESS - @_PATCH_GETSIZE - @patch("morphic.dupfinder.videos.cv2.cvtColor") - @patch("morphic.dupfinder.videos.cv2.VideoCapture") - def test_extract_frames_success( - self, - mock_vc_cls, - mock_cvt, - _mock_gs, - _mock_ss, - ) -> None: - mock_cap = MagicMock() - mock_vc_cls.return_value = mock_cap - mock_cap.isOpened.return_value = True - - def get_side_effect(prop): - mapping = {5: 30.0, 7: 300, 3: 640, 4: 480} - return mapping.get(prop, 0) - - mock_cap.get.side_effect = get_side_effect - - frame = np.zeros((480, 640, 3), dtype=np.uint8) - mock_cap.read.return_value = (True, frame) - mock_cvt.return_value = frame - - hasher = VideoHasher(num_frames=3, hash_size=8) - frames, info = hasher.extract_frames("/test/video.mp4") - - assert info.fps == 30.0 - assert info.frame_count == 300 - assert info.width == 640 - assert info.height == 480 - assert len(frames) > 0 - mock_cap.release.assert_called_once() - - @_PATCH_SUPPRESS - @_PATCH_GETSIZE - @patch("morphic.dupfinder.videos.cv2.VideoCapture") - def test_extract_frames_not_opened( - self, - mock_vc_cls, - _mock_gs, - _mock_ss, - ) -> None: - mock_cap = MagicMock() - mock_vc_cls.return_value = mock_cap - mock_cap.isOpened.return_value = False - - hasher = VideoHasher(hash_size=8) - frames, info = hasher.extract_frames("/test/video.mp4") - assert frames == [] - - @_PATCH_SUPPRESS - @_PATCH_GETSIZE - @patch("morphic.dupfinder.videos.cv2.VideoCapture") - def test_extract_frames_zero_frame_count( - self, - mock_vc_cls, - _mock_gs, - _mock_ss, - ) -> None: - mock_cap = MagicMock() - mock_vc_cls.return_value = mock_cap - mock_cap.isOpened.return_value = True - - def get_side_effect(prop): - mapping = {5: 30.0, 7: 0, 3: 640, 4: 480} - return mapping.get(prop, 0) - - mock_cap.get.side_effect = get_side_effect - - hasher = VideoHasher(hash_size=8) - frames, info = hasher.extract_frames("/test/short.mp4") - assert frames == [] - mock_cap.release.assert_called_once() - - @_PATCH_SUPPRESS - @_PATCH_GETSIZE - @patch("morphic.dupfinder.videos.cv2.cvtColor") - @patch("morphic.dupfinder.videos.cv2.VideoCapture") - def test_extract_frames_read_fails( - self, - mock_vc_cls, - mock_cvt, - _mock_gs, - _mock_ss, - ) -> None: - mock_cap = MagicMock() - mock_vc_cls.return_value = mock_cap - mock_cap.isOpened.return_value = True - - def get_side_effect(prop): - mapping = {5: 30.0, 7: 100, 3: 320, 4: 240} - return mapping.get(prop, 0) - - mock_cap.get.side_effect = get_side_effect - mock_cap.read.return_value = (False, None) - - hasher = VideoHasher(num_frames=3, hash_size=8) - frames, info = hasher.extract_frames("/test/video.mp4") - assert frames == [] - - -class TestComputeVideoHashes: - @_PATCH_SUPPRESS - @_PATCH_GETSIZE - @patch("morphic.dupfinder.videos.cv2.cvtColor") - @patch("morphic.dupfinder.videos.cv2.VideoCapture") - def test_full_pipeline( - self, - mock_vc_cls, - mock_cvt, - _mock_gs, - _mock_ss, - ) -> None: - mock_cap = MagicMock() - mock_vc_cls.return_value = mock_cap - mock_cap.isOpened.return_value = True - - def get_side_effect(prop): - return {5: 30.0, 7: 300, 3: 64, 4: 64}.get(prop, 0) - - mock_cap.get.side_effect = get_side_effect - - frame = np.random.randint(0, 255, (64, 64, 3), dtype=np.uint8) - mock_cap.read.return_value = (True, frame) - mock_cvt.return_value = frame - - hasher = VideoHasher(num_frames=3, hash_size=8) - info = hasher.compute_video_hashes("/test/video.mp4") - - assert len(info.frame_hashes) > 0 - assert info.average_hash is not None - - @_PATCH_SUPPRESS - @_PATCH_GETSIZE - @patch("morphic.dupfinder.videos.cv2.VideoCapture") - def test_no_frames_extracted( - self, - mock_vc_cls, - _mock_gs, - _mock_ss, - ) -> None: - mock_cap = MagicMock() - mock_vc_cls.return_value = mock_cap - mock_cap.isOpened.return_value = False - - hasher = VideoHasher(hash_size=8) - info = hasher.compute_video_hashes("/test/bad_video.mp4") - assert info.frame_hashes == [] - assert info.average_hash is None - - -class TestVideoProcessing: - @_PATCH_SUPPRESS - @_PATCH_GETSIZE - @patch("morphic.dupfinder.videos.cv2.cvtColor") - @patch("morphic.dupfinder.videos.cv2.VideoCapture") - def test_process_videos_with_mocked_cv2( - self, - mock_vc_cls, - mock_cvt, - _mock_gs, - _mock_ss, - ) -> None: - mock_cap = MagicMock() - mock_vc_cls.return_value = mock_cap - mock_cap.isOpened.return_value = True - - def get_side_effect(prop): - return {5: 30.0, 7: 100, 3: 64, 4: 64}.get(prop, 0) - - mock_cap.get.side_effect = get_side_effect - - frame = np.random.randint(0, 255, (64, 64, 3), dtype=np.uint8) - mock_cap.read.return_value = (True, frame) - mock_cvt.return_value = frame - - finder = VideoDuplicateFinder( - use_gpu=False, - hash_size=8, - num_workers=1, - ) - result = finder.process_videos(["/test/a.mp4", "/test/b.mp4"]) - assert len(result) >= 0 - - @_PATCH_SUPPRESS - @_PATCH_GETSIZE - @patch("morphic.dupfinder.videos.cv2.cvtColor") - @patch("morphic.dupfinder.videos.cv2.VideoCapture") - def test_duration_calculation( - self, - mock_vc_cls, - mock_cvt, - _mock_gs, - _mock_ss, - ) -> None: - mock_cap = MagicMock() - mock_vc_cls.return_value = mock_cap - mock_cap.isOpened.return_value = True - - def get_side_effect(prop): - return {5: 25.0, 7: 250, 3: 320, 4: 240}.get(prop, 0) - - mock_cap.get.side_effect = get_side_effect - - frame = np.zeros((240, 320, 3), dtype=np.uint8) - mock_cap.read.return_value = (True, frame) - mock_cvt.return_value = frame - - hasher = VideoHasher(num_frames=2, hash_size=8) - frames, info = hasher.extract_frames("/test/video.avi") - - assert info.duration == pytest.approx(10.0) diff --git a/tests/frontend/__init__.py b/tests/frontend/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/frontend/test_app.py b/tests/frontend/test_app.py deleted file mode 100644 index 9616b82..0000000 --- a/tests/frontend/test_app.py +++ /dev/null @@ -1,1023 +0,0 @@ -"""Tests for morphic.frontend — app factory and routes.""" - -from __future__ import annotations - -import importlib -import json -import os -import time - -from PIL import Image - -from morphic.frontend.app import create_app - - -# ── App factory ──────────────────────────────────────────────────────────── - - -class TestCreateApp: - def test_creates_flask_app(self) -> None: - app = create_app() - assert app is not None - assert app.name == "morphic.frontend.app" - - def test_initial_folder_config(self) -> None: - app = create_app(initial_folder="/test/path") - assert app.config["INITIAL_FOLDER"] == "/test/path" - - def test_no_initial_folder(self) -> None: - app = create_app() - assert app.config["INITIAL_FOLDER"] == "" - - -# ── __main__.py ──────────────────────────────────────────────────────────── - - -class TestMain: - def test_main_module_exists(self) -> None: - spec = importlib.util.find_spec("morphic.frontend.__main__") - assert spec is not None - - -# ── Index ────────────────────────────────────────────────────────────────── - - -class TestIndexRoute: - def test_returns_html(self, client) -> None: - resp = client.get("/") - assert resp.status_code == 200 - assert b"Morphic" in resp.data - - def test_has_tabs(self, client) -> None: - resp = client.get("/") - assert b"Converter" in resp.data - assert b"Dupfinder" in resp.data - assert b"Inspector" in resp.data - assert b"Resizer" in resp.data - assert b"Organizer" in resp.data - - def test_no_cache_headers(self, client) -> None: - resp = client.get("/") - assert "no-cache" in resp.headers.get("Cache-Control", "") - - -# ── Browse ───────────────────────────────────────────────────────────────── - - -class TestBrowseRoute: - def test_browse_home(self, client) -> None: - resp = client.get("/api/browse") - assert resp.status_code == 200 - data = resp.get_json() - assert "current" in data - assert "entries" in data - - def test_browse_specific_dir(self, client, tmp_path) -> None: - sub = tmp_path / "testdir" - sub.mkdir() - resp = client.get(f"/api/browse?path={tmp_path}") - assert resp.status_code == 200 - data = resp.get_json() - assert data["current"] == str(tmp_path) - names = [e["name"] for e in data["entries"]] - assert "testdir" in names - - def test_browse_invalid_dir(self, client) -> None: - resp = client.get("/api/browse?path=/nonexistent_xyz_path") - assert resp.status_code == 400 - - def test_browse_parent(self, client, tmp_path) -> None: - resp = client.get(f"/api/browse?path={tmp_path}") - data = resp.get_json() - assert data["parent"] is not None or data["parent"] is None - - def test_native_browse_returns_json(self, client) -> None: - resp = client.post( - "/api/browse/native", - data=json.dumps({}), - content_type="application/json", - ) - assert resp.status_code == 200 - data = resp.get_json() - assert "folder" in data - - def test_system_info(self, client) -> None: - resp = client.get("/api/system_info") - assert resp.status_code == 200 - data = resp.get_json() - assert "ffmpeg" in data - - def test_browse_hidden_dirs_excluded(self, client, tmp_path) -> None: - hidden = tmp_path / ".hidden" - hidden.mkdir() - visible = tmp_path / "visible" - visible.mkdir() - - resp = client.get(f"/api/browse?path={tmp_path}") - data = resp.get_json() - names = [e["name"] for e in data["entries"]] - assert "visible" in names - assert ".hidden" not in names - - def test_browse_permission_error(self, client) -> None: - resp = client.get("/api/browse?path=/root") - assert resp.status_code in (200, 400, 500) - - def test_browse_tilde_expansion(self, client) -> None: - resp = client.get("/api/browse?path=~") - assert resp.status_code == 200 - data = resp.get_json() - assert "current" in data - - -# ── Thumbnail ────────────────────────────────────────────────────────────── - - -class TestThumbnailRoute: - def test_nonexistent_file(self, client) -> None: - resp = client.get("/api/thumbnail?path=/nonexistent/file.jpg") - assert resp.status_code == 404 - - def test_no_path_param(self, client) -> None: - resp = client.get("/api/thumbnail") - assert resp.status_code == 404 - - def test_valid_image(self, client, test_image) -> None: - resp = client.get(f"/api/thumbnail?path={test_image}") - assert resp.status_code == 200 - assert resp.content_type == "image/jpeg" - - def test_forbidden_extension(self, client, tmp_path) -> None: - txt = tmp_path / "test.txt" - txt.write_text("hello") - resp = client.get(f"/api/thumbnail?path={txt}") - assert resp.status_code in (403, 500) - - def test_rgba_image_thumbnail(self, client, tmp_path) -> None: - img_path = tmp_path / "rgba.png" - Image.new("RGBA", (100, 100), (255, 0, 0, 128)).save(str(img_path)) - - resp = client.get(f"/api/thumbnail?path={img_path}") - assert resp.status_code == 200 - assert resp.content_type == "image/jpeg" - - def test_palette_image_thumbnail(self, client, tmp_path) -> None: - img_path = tmp_path / "palette.gif" - Image.new("P", (100, 100)).save(str(img_path)) - - resp = client.get(f"/api/thumbnail?path={img_path}") - assert resp.status_code == 200 - - def test_thumbnail_video_file(self, client, tmp_path) -> None: - vid = tmp_path / "test.mp4" - vid.write_bytes(b"\x00" * 100) - - resp = client.get(f"/api/thumbnail?path={vid}") - assert resp.status_code in (200, 404, 500) - - -# ── Media ────────────────────────────────────────────────────────────────── - - -class TestMediaRoute: - def test_nonexistent_file(self, client) -> None: - resp = client.get("/api/media?path=/nonexistent/file.jpg") - assert resp.status_code == 404 - - def test_valid_image(self, client, test_image) -> None: - resp = client.get(f"/api/media?path={test_image}") - assert resp.status_code == 200 - - def test_forbidden_extension(self, client, tmp_path) -> None: - txt = tmp_path / "test.txt" - txt.write_text("hello") - resp = client.get(f"/api/media?path={txt}") - assert resp.status_code == 403 - - def test_media_no_path(self, client) -> None: - resp = client.get("/api/media") - assert resp.status_code == 404 - - def test_media_empty_path(self, client) -> None: - resp = client.get("/api/media?path=") - assert resp.status_code == 404 - - def test_media_video_file(self, client, tmp_path) -> None: - vid = tmp_path / "test.mp4" - vid.write_bytes(b"\x00" * 100) - - resp = client.get(f"/api/media?path={vid}") - assert resp.status_code == 200 - - -class TestInspectorRoute: - def test_inspector_scan(self, client, tmp_path) -> None: - resp = client.post("/api/inspector/scan", json={}) - assert resp.status_code == 400 - - resp = client.post( - "/api/inspector/scan", - json={"folder": str(tmp_path), "mode": "exif"}, - ) - assert resp.status_code == 202 - job_id = resp.get_json()["job_id"] - - status = client.get(f"/api/inspector/scan/{job_id}/status") - assert status.status_code == 200 - - results = client.get(f"/api/inspector/scan/{job_id}/results") - assert results.status_code in (200, 409) - - def test_exif_edit_strip(self, client, tmp_path) -> None: - resp = client.post("/api/inspector/exif/edit", json={}) - assert resp.status_code == 400 - - resp = client.post("/api/inspector/exif/strip", json={}) - assert resp.status_code == 400 - - -class TestOrganizerRoute: - def test_organizer_plan_invalid(self, client, tmp_path) -> None: - resp = client.post("/api/organizer/plan", json={}) - assert resp.status_code == 400 - - resp = client.post( - "/api/organizer/plan", - json={ - "folder": str(tmp_path), - "mode": "sort", - "operation": "copy", - }, - ) - assert resp.status_code == 202 - - def test_organizer_status_not_found(self, client) -> None: - resp = client.get("/api/organizer/status/notfound") - assert resp.status_code == 404 - - -class TestResizerRoute: - def test_resizer_scan_invalid(self, client, tmp_path) -> None: - resp = client.post("/api/resizer/scan", json={}) - assert resp.status_code == 400 - - resp = client.post( - "/api/resizer/scan", - json={"folder": str(tmp_path), "width": 100, "height": 100}, - ) - assert resp.status_code == 202 - - def test_resizer_status_results(self, client) -> None: - resp = client.get("/api/resizer/scan/notfound/status") - assert resp.status_code == 404 - - resp = client.get("/api/resizer/scan/notfound/results") - assert resp.status_code == 404 - - -# ── Converter — scan ─────────────────────────────────────────────────────── - - -class TestConverterScanRoute: - def test_missing_folder(self, client) -> None: - resp = client.post( - "/api/converter/scan", - data=json.dumps({"folder": ""}), - content_type="application/json", - ) - assert resp.status_code == 400 - - def test_invalid_folder(self, client) -> None: - resp = client.post( - "/api/converter/scan", - data=json.dumps({"folder": "/nonexistent_xyz"}), - content_type="application/json", - ) - assert resp.status_code == 400 - - def test_valid_scan(self, client, tmp_media) -> None: - resp = client.post( - "/api/converter/scan", - data=json.dumps({"folder": str(tmp_media)}), - content_type="application/json", - ) - assert resp.status_code == 200 - data = resp.get_json() - assert data["total"] > 0 - - def test_no_body(self, client) -> None: - resp = client.post("/api/converter/scan") - assert resp.status_code in (400, 415) - - def test_scan_images_only(self, client, tmp_media) -> None: - resp = client.post( - "/api/converter/scan", - data=json.dumps( - { - "folder": str(tmp_media), - "filter_type": "images", - } - ), - content_type="application/json", - ) - assert resp.status_code == 200 - data = resp.get_json() - for f in data.get("files", []): - ext = os.path.splitext(f["name"])[1].lower() - assert ext not in {".mp4", ".mov", ".avi"} - - def test_scan_videos_only(self, client, tmp_media) -> None: - resp = client.post( - "/api/converter/scan", - data=json.dumps( - { - "folder": str(tmp_media), - "filter_type": "videos", - } - ), - content_type="application/json", - ) - assert resp.status_code == 200 - - def test_scan_no_subfolders(self, client, tmp_media) -> None: - resp = client.post( - "/api/converter/scan", - data=json.dumps( - { - "folder": str(tmp_media), - "include_subfolders": False, - } - ), - content_type="application/json", - ) - assert resp.status_code == 200 - data = resp.get_json() - for f in data.get("files", []): - assert "/sub/" not in f["path"] - - def test_scan_invalid_filter_type(self, client, tmp_media) -> None: - resp = client.post( - "/api/converter/scan", - data=json.dumps( - { - "folder": str(tmp_media), - "filter_type": "invalid", - } - ), - content_type="application/json", - ) - assert resp.status_code == 200 - - -# ── Converter — formats ──────────────────────────────────────────────────── - - -class TestConverterFormatsRoute: - def test_returns_json(self, client) -> None: - resp = client.get("/api/converter/formats") - assert resp.status_code == 200 - data = resp.get_json() - assert "image" in data - assert "video" in data - - def test_formats_structure(self, client) -> None: - resp = client.get("/api/converter/formats") - data = resp.get_json() - assert isinstance(data["image"], dict) - assert isinstance(data["video"], dict) - for targets in data["image"].values(): - assert isinstance(targets, list) - - -# ── Converter — convert ──────────────────────────────────────────────────── - - -class TestConverterConvertRoute: - def test_no_files(self, client) -> None: - resp = client.post( - "/api/converter/convert", - data=json.dumps({"files": [], "target_ext": ".png"}), - content_type="application/json", - ) - assert resp.status_code == 400 - - def test_no_body(self, client) -> None: - resp = client.post("/api/converter/convert") - assert resp.status_code in (400, 415) - - def test_convert_single_file(self, client, test_image) -> None: - resp = client.post( - "/api/converter/convert", - data=json.dumps( - { - "files": [test_image], - "target_ext": ".png", - } - ), - content_type="application/json", - ) - assert resp.status_code == 202 - data = resp.get_json() - assert "job_id" in data - - def test_convert_progress(self, client, test_image) -> None: - resp = client.post( - "/api/converter/convert", - data=json.dumps( - { - "files": [test_image], - "target_ext": ".png", - } - ), - content_type="application/json", - ) - job_id = resp.get_json()["job_id"] - - time.sleep(0.5) - resp = client.get(f"/api/converter/progress/{job_id}") - assert resp.status_code == 200 - data = resp.get_json() - assert "status" in data - assert "completed" in data - - def test_missing_target_ext(self, client, test_image) -> None: - resp = client.post( - "/api/converter/convert", - data=json.dumps( - { - "files": [test_image], - "target_ext": "", - } - ), - content_type="application/json", - ) - assert resp.status_code == 400 - - def test_convert_with_delete(self, client, tmp_path) -> None: - src = tmp_path / "test.jpg" - Image.new("RGB", (50, 50), "red").save(str(src)) - - resp = client.post( - "/api/converter/convert", - data=json.dumps( - { - "files": [str(src)], - "target_ext": ".png", - "delete_original": True, - } - ), - content_type="application/json", - ) - assert resp.status_code == 202 - - job_id = resp.get_json()["job_id"] - time.sleep(1) - - resp = client.get(f"/api/converter/progress/{job_id}") - data = resp.get_json() - assert data["status"] == "done" - - def test_progress_poll(self, client, test_image) -> None: - resp = client.post( - "/api/converter/convert", - data=json.dumps( - { - "files": [test_image], - "target_ext": ".png", - } - ), - content_type="application/json", - ) - job_id = resp.get_json()["job_id"] - time.sleep(0.5) - - resp = client.get( - f"/api/converter/progress/{job_id}/poll?last=0", - ) - assert resp.status_code == 200 - - def test_progress_poll_nonexistent(self, client) -> None: - resp = client.get("/api/converter/progress/nonexistent/poll") - assert resp.status_code == 404 - - def test_convert_nonexistent_source(self, client) -> None: - resp = client.post( - "/api/converter/convert", - data=json.dumps( - { - "files": ["/nonexistent/file.jpg"], - "target_ext": ".png", - } - ), - content_type="application/json", - ) - assert resp.status_code == 202 - job_id = resp.get_json()["job_id"] - - time.sleep(1) - resp = client.get(f"/api/converter/progress/{job_id}") - data = resp.get_json() - assert data["status"] == "done" - assert data["results"][0]["status"] == "error" - - def test_convert_multiple_files(self, client, tmp_path) -> None: - files = [] - for i in range(3): - src = tmp_path / f"img{i}.jpg" - Image.new("RGB", (50, 50), "red").save(str(src)) - files.append(str(src)) - - resp = client.post( - "/api/converter/convert", - data=json.dumps( - { - "files": files, - "target_ext": ".png", - } - ), - content_type="application/json", - ) - assert resp.status_code == 202 - job_id = resp.get_json()["job_id"] - - for _ in range(20): - time.sleep(0.3) - resp = client.get(f"/api/converter/progress/{job_id}") - data = resp.get_json() - if data["status"] == "done": - break - - assert data["completed"] == 3 - assert all(r["status"] == "ok" for r in data["results"]) - - def test_convert_with_size_info(self, client, tmp_path) -> None: - src = tmp_path / "test.jpg" - Image.new("RGB", (100, 100), "blue").save(str(src)) - - resp = client.post( - "/api/converter/convert", - data=json.dumps( - { - "files": [str(src)], - "target_ext": ".png", - } - ), - content_type="application/json", - ) - job_id = resp.get_json()["job_id"] - - time.sleep(1) - resp = client.get(f"/api/converter/progress/{job_id}") - data = resp.get_json() - result = data["results"][0] - assert "original_size" in result - assert "new_size" in result - assert "original_size_fmt" in result - assert "new_size_fmt" in result - - -# ── Converter — delete ───────────────────────────────────────────────────── - - -class TestConverterDeleteRoute: - def test_no_files(self, client) -> None: - resp = client.post( - "/api/converter/delete", - data=json.dumps({"files": []}), - content_type="application/json", - ) - assert resp.status_code == 400 - - def test_delete_nonexistent(self, client) -> None: - resp = client.post( - "/api/converter/delete", - data=json.dumps({"files": ["/nonexistent/file.jpg"]}), - content_type="application/json", - ) - assert resp.status_code == 200 - data = resp.get_json() - assert data["results"][0]["status"] == "not_found" - - def test_delete_real_file(self, client, tmp_path) -> None: - f = tmp_path / "deleteme.jpg" - Image.new("RGB", (10, 10), "red").save(str(f)) - assert f.exists() - - resp = client.post( - "/api/converter/delete", - data=json.dumps({"files": [str(f)]}), - content_type="application/json", - ) - assert resp.status_code == 200 - data = resp.get_json() - assert data["results"][0]["status"] == "deleted" - assert not f.exists() - - def test_delete_no_body(self, client) -> None: - resp = client.post("/api/converter/delete") - assert resp.status_code in (400, 415) - - def test_delete_without_files_key(self, client) -> None: - resp = client.post( - "/api/converter/delete", - data=json.dumps({}), - content_type="application/json", - ) - assert resp.status_code == 400 - - def test_delete_mixed_results(self, client, tmp_path) -> None: - real = tmp_path / "real.jpg" - Image.new("RGB", (10, 10), "red").save(str(real)) - - resp = client.post( - "/api/converter/delete", - data=json.dumps( - { - "files": [str(real), "/nonexistent/file.jpg"], - } - ), - content_type="application/json", - ) - assert resp.status_code == 200 - data = resp.get_json() - assert data["results"][0]["status"] == "deleted" - assert data["results"][1]["status"] == "not_found" - assert data["total_freed"] > 0 - - -# ── Converter — progress ─────────────────────────────────────────────────── - - -class TestConverterProgressRoute: - def test_nonexistent_job(self, client) -> None: - resp = client.get("/api/converter/progress/nonexistent") - assert resp.status_code == 404 - - -# ── Dupfinder — scan ─────────────────────────────────────────────────────── - - -class TestDupfinderScanRoute: - def test_missing_folder(self, client) -> None: - resp = client.post( - "/api/dupfinder/scan", - data=json.dumps({"folder": ""}), - content_type="application/json", - ) - assert resp.status_code == 400 - - def test_invalid_scan_type(self, client, tmp_path) -> None: - resp = client.post( - "/api/dupfinder/scan", - data=json.dumps( - { - "folder": str(tmp_path), - "type": "invalid", - } - ), - content_type="application/json", - ) - assert resp.status_code == 400 - - def test_no_body(self, client) -> None: - resp = client.post("/api/dupfinder/scan") - assert resp.status_code in (400, 415) - - def test_valid_scan_start(self, client, tmp_path) -> None: - resp = client.post( - "/api/dupfinder/scan", - data=json.dumps( - { - "folder": str(tmp_path), - "type": "images", - } - ), - content_type="application/json", - ) - assert resp.status_code == 202 - data = resp.get_json() - assert "job_id" in data - - def test_scan_with_thresholds(self, client, tmp_path) -> None: - resp = client.post( - "/api/dupfinder/scan", - data=json.dumps( - { - "folder": str(tmp_path), - "type": "both", - "image_threshold": 0.95, - "video_threshold": 0.80, - } - ), - content_type="application/json", - ) - assert resp.status_code == 202 - - def test_scan_status_after_start(self, client, tmp_path) -> None: - resp = client.post( - "/api/dupfinder/scan", - data=json.dumps( - { - "folder": str(tmp_path), - "type": "images", - } - ), - content_type="application/json", - ) - job_id = resp.get_json()["job_id"] - - resp = client.get(f"/api/dupfinder/scan/{job_id}/status") - assert resp.status_code == 200 - data = resp.get_json() - assert "status" in data - assert "progress" in data - - def test_scan_results_not_done(self, client, tmp_path) -> None: - resp = client.post( - "/api/dupfinder/scan", - data=json.dumps( - { - "folder": str(tmp_path), - "type": "images", - } - ), - content_type="application/json", - ) - job_id = resp.get_json()["job_id"] - - resp = client.get(f"/api/dupfinder/scan/{job_id}/results") - assert resp.status_code in (200, 409) - - def test_scan_results_after_completion(self, client, tmp_path) -> None: - resp = client.post( - "/api/dupfinder/scan", - data=json.dumps( - { - "folder": str(tmp_path), - "type": "images", - } - ), - content_type="application/json", - ) - job_id = resp.get_json()["job_id"] - - for _ in range(20): - time.sleep(0.5) - resp = client.get(f"/api/dupfinder/scan/{job_id}/status") - data = resp.get_json() - if data["status"] in ("done", "error"): - break - - resp = client.get(f"/api/dupfinder/scan/{job_id}/results") - assert resp.status_code == 200 - data = resp.get_json() - assert "image_groups" in data - assert "space_savings" in data - - def test_scan_videos_only(self, client, tmp_path) -> None: - resp = client.post( - "/api/dupfinder/scan", - data=json.dumps( - { - "folder": str(tmp_path), - "type": "videos", - } - ), - content_type="application/json", - ) - assert resp.status_code == 202 - - def test_scan_both_types(self, client, tmp_path) -> None: - resp = client.post( - "/api/dupfinder/scan", - data=json.dumps( - { - "folder": str(tmp_path), - "type": "both", - } - ), - content_type="application/json", - ) - assert resp.status_code == 202 - - -# ── Dupfinder — status / results ─────────────────────────────────────────── - - -class TestDupfinderStatusRoute: - def test_nonexistent_job(self, client) -> None: - resp = client.get("/api/dupfinder/scan/nonexistent/status") - assert resp.status_code == 404 - - -class TestDupfinderResultsRoute: - def test_nonexistent_job(self, client) -> None: - resp = client.get("/api/dupfinder/scan/nonexistent/results") - assert resp.status_code == 404 - - -# ── Dupfinder — delete ───────────────────────────────────────────────────── - - -class TestDupfinderDeleteRoute: - def test_no_files(self, client) -> None: - resp = client.post( - "/api/dupfinder/delete", - data=json.dumps({"files": []}), - content_type="application/json", - ) - assert resp.status_code == 400 - - def test_delete_nonexistent(self, client) -> None: - resp = client.post( - "/api/dupfinder/delete", - data=json.dumps({"files": ["/nonexistent/file.jpg"]}), - content_type="application/json", - ) - assert resp.status_code == 200 - data = resp.get_json() - assert data["results"][0]["status"] == "not_found" - - def test_no_body(self, client) -> None: - resp = client.post("/api/dupfinder/delete") - assert resp.status_code in (400, 415) - - def test_delete_without_files_key(self, client) -> None: - resp = client.post( - "/api/dupfinder/delete", - data=json.dumps({}), - content_type="application/json", - ) - assert resp.status_code == 400 - - def test_delete_real_file(self, client, tmp_path) -> None: - f = tmp_path / "dup.jpg" - Image.new("RGB", (10, 10), "red").save(str(f)) - - resp = client.post( - "/api/dupfinder/delete", - data=json.dumps({"files": [str(f)]}), - content_type="application/json", - ) - assert resp.status_code == 200 - data = resp.get_json() - assert data["results"][0]["status"] == "deleted" - assert not f.exists() - - def test_delete_mixed(self, client, tmp_path) -> None: - f = tmp_path / "real.jpg" - Image.new("RGB", (10, 10), "red").save(str(f)) - - resp = client.post( - "/api/dupfinder/delete", - data=json.dumps( - { - "files": [str(f), "/nonexistent.jpg"], - } - ), - content_type="application/json", - ) - assert resp.status_code == 200 - data = resp.get_json() - assert len(data["results"]) == 2 - - -# ── Inspector Routes ────────────────────────────────────────────────────── - - -class TestInspectorRoutes: - def test_scan_requires_folder(self, client) -> None: - resp = client.post( - "/api/inspector/scan", - data=json.dumps({"mode": "exif"}), - content_type="application/json", - ) - assert resp.status_code == 400 - data = resp.get_json() - assert "error" in data - - def test_scan_exif(self, client, tmp_path) -> None: - Image.new("RGB", (10, 10)).save(str(tmp_path / "a.jpg"), "JPEG") - resp = client.post( - "/api/inspector/scan", - data=json.dumps({"folder": str(tmp_path), "mode": "exif"}), - content_type="application/json", - ) - assert resp.status_code == 202 - data = resp.get_json() - assert "job_id" in data - - def test_scan_integrity(self, client, tmp_path) -> None: - Image.new("RGB", (10, 10)).save(str(tmp_path / "a.jpg"), "JPEG") - resp = client.post( - "/api/inspector/scan", - data=json.dumps({"folder": str(tmp_path), "mode": "integrity"}), - content_type="application/json", - ) - assert resp.status_code == 202 - data = resp.get_json() - assert "job_id" in data - - def test_status_unknown_job(self, client) -> None: - resp = client.get("/api/inspector/scan/fakeid/status") - assert resp.status_code == 404 - data = resp.get_json() - assert "error" in data - - def test_strip_requires_files(self, client) -> None: - resp = client.post( - "/api/inspector/exif/strip", - data=json.dumps({}), - content_type="application/json", - ) - assert resp.status_code == 400 - data = resp.get_json() - assert "error" in data - - -# ── Resizer Routes ──────────────────────────────────────────────────────── - - -class TestResizerRoutes: - def test_scan_requires_folder(self, client) -> None: - resp = client.post( - "/api/resizer/scan", - data=json.dumps({"width": 100, "height": 100}), - content_type="application/json", - ) - assert resp.status_code == 400 - data = resp.get_json() - assert "error" in data - - def test_scan_starts_job(self, client, tmp_path) -> None: - Image.new("RGB", (100, 100)).save(str(tmp_path / "img.png")) - resp = client.post( - "/api/resizer/scan", - data=json.dumps( - { - "folder": str(tmp_path), - "width": 50, - "height": 50, - "mode": "fit", - } - ), - content_type="application/json", - ) - assert resp.status_code == 202 - data = resp.get_json() - assert "job_id" in data - - def test_status_unknown_job(self, client) -> None: - resp = client.get("/api/resizer/scan/fakeid/status") - assert resp.status_code == 404 - data = resp.get_json() - assert "error" in data - - -# ── Organizer Routes ────────────────────────────────────────────────────── - - -class TestOrganizerRoutes: - def test_plan_requires_folder(self, client) -> None: - resp = client.post( - "/api/organizer/plan", - data=json.dumps({"mode": "sort"}), - content_type="application/json", - ) - assert resp.status_code == 400 - data = resp.get_json() - assert "error" in data - - def test_plan_sort(self, client, tmp_path) -> None: - Image.new("RGB", (10, 10)).save(str(tmp_path / "photo.jpg"), "JPEG") - resp = client.post( - "/api/organizer/plan", - data=json.dumps( - { - "folder": str(tmp_path), - "mode": "sort", - "operation": "copy", - "template": "{year}/{month}", - } - ), - content_type="application/json", - ) - assert resp.status_code == 202 - data = resp.get_json() - assert "job_id" in data - - def test_execute_requires_job_id(self, client) -> None: - resp = client.post( - "/api/organizer/execute", - data=json.dumps({}), - content_type="application/json", - ) - assert resp.status_code == 400 - data = resp.get_json() - assert "error" in data - - def test_status_unknown_job(self, client) -> None: - resp = client.get("/api/organizer/status/fakeid") - assert resp.status_code == 404 - data = resp.get_json() - assert "error" in data diff --git a/tests/inspector/__init__.py b/tests/inspector/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/inspector/test_exif.py b/tests/inspector/test_exif.py deleted file mode 100644 index f9d36c4..0000000 --- a/tests/inspector/test_exif.py +++ /dev/null @@ -1,88 +0,0 @@ -"""Tests for morphic.inspector.exif.""" - -from __future__ import annotations - - -import pytest -from PIL import Image - -from morphic.inspector.exif import ( - edit_exif, - read_exif, - strip_exif, - strip_exif_batch, -) - - -def _make_jpeg(path: str, size: tuple[int, int] = (50, 50)) -> str: - """Create a minimal JPEG file.""" - img = Image.new("RGB", size, "red") - img.save(path, "JPEG") - return path - - -class TestReadExif: - def test_returns_dict(self, tmp_path) -> None: - path = _make_jpeg(str(tmp_path / "photo.jpg")) - result = read_exif(path) - assert isinstance(result, dict) - - def test_nonexistent_file_raises(self, tmp_path) -> None: - with pytest.raises((FileNotFoundError, Exception)): - read_exif(str(tmp_path / "nope.jpg")) - - def test_png_returns_empty_or_dict(self, tmp_path) -> None: - """PNG files may have no EXIF — should not crash.""" - p = tmp_path / "test.png" - Image.new("RGB", (10, 10), "blue").save(str(p)) - result = read_exif(str(p)) - assert isinstance(result, dict) - - -class TestEditExif: - def test_edit_roundtrip(self, tmp_path) -> None: - path = _make_jpeg(str(tmp_path / "edit.jpg")) - # Write some EXIF first so piexif can work with it - edit_exif(path, {"ImageDescription": "hello world"}) - data = read_exif(path) - assert data.get("ImageDescription") == "hello world" - - def test_edit_nonexistent_key_is_ignored(self, tmp_path) -> None: - path = _make_jpeg(str(tmp_path / "edit2.jpg")) - # Unknown key should be silently ignored - edit_exif(path, {"TotallyFakeTag12345": "value"}) - - -class TestStripExif: - def test_strip_removes_data(self, tmp_path) -> None: - path = _make_jpeg(str(tmp_path / "strip.jpg")) - edit_exif(path, {"ImageDescription": "to be removed"}) - strip_exif(path) - data = read_exif(path) - assert data.get("ImageDescription") in (None, "") - - def test_strip_preserves_image(self, tmp_path) -> None: - path = _make_jpeg(str(tmp_path / "strip2.jpg")) - strip_exif(path) - img = Image.open(path) - assert img.size == (50, 50) - - -class TestStripExifBatch: - def test_batch_returns_dict(self, tmp_path) -> None: - paths = [_make_jpeg(str(tmp_path / f"img{i}.jpg")) for i in range(3)] - results = strip_exif_batch(paths) - assert isinstance(results, dict) - assert len(results) == 3 - for path, info in results.items(): - assert "success" in info - assert info["success"] is True - - def test_batch_with_bad_file(self, tmp_path) -> None: - good = _make_jpeg(str(tmp_path / "good.jpg")) - bad = str(tmp_path / "nonexistent.jpg") - results = strip_exif_batch([good, bad]) - assert len(results) == 2 - assert results[good]["success"] is True - assert results[bad]["success"] is False - assert "error" in results[bad] diff --git a/tests/inspector/test_integrity.py b/tests/inspector/test_integrity.py deleted file mode 100644 index e6c91d0..0000000 --- a/tests/inspector/test_integrity.py +++ /dev/null @@ -1,71 +0,0 @@ -"""Tests for morphic.inspector.integrity.""" - -from __future__ import annotations - - -from PIL import Image - -from morphic.inspector.integrity import check_files, check_image, check_video - - -def _make_jpeg(path: str) -> str: - Image.new("RGB", (20, 20), "green").save(path, "JPEG") - return path - - -class TestCheckImage: - def test_valid_image_ok(self, tmp_path) -> None: - path = _make_jpeg(str(tmp_path / "ok.jpg")) - result = check_image(path) - assert result["valid"] is True - assert result["path"] == path - - def test_truncated_image(self, tmp_path) -> None: - path = str(tmp_path / "bad.jpg") - _make_jpeg(path) - # Truncate the file - with open(path, "r+b") as f: - f.truncate(10) - result = check_image(path) - assert result["valid"] is False - assert result["error"] is not None - - def test_zero_byte(self, tmp_path) -> None: - path = str(tmp_path / "empty.jpg") - open(path, "w").close() - result = check_image(path) - assert result["valid"] is False - - def test_nonexistent(self, tmp_path) -> None: - result = check_image(str(tmp_path / "nope.jpg")) - assert result["valid"] is False - - -class TestCheckVideo: - def test_fake_video_fails(self, tmp_path) -> None: - path = str(tmp_path / "fake.mp4") - with open(path, "wb") as f: - f.write(b"\x00" * 100) - result = check_video(path) - # Fake video should fail ffprobe (or return valid=False if no ffprobe) - assert isinstance(result["valid"], bool) - assert "path" in result - - -class TestCheckFiles: - def test_scans_folder(self, tmp_path) -> None: - _make_jpeg(str(tmp_path / "a.jpg")) - _make_jpeg(str(tmp_path / "b.png")) - # Non-media file should be ignored - (tmp_path / "readme.txt").write_text("hello") - - results = check_files(str(tmp_path)) - # Should have at least the 2 images - image_results = [ - r for r in results if r["path"].endswith((".jpg", ".png")) - ] - assert len(image_results) >= 2 - - def test_empty_folder(self, tmp_path) -> None: - results = check_files(str(tmp_path)) - assert results == [] diff --git a/tests/inspector/test_scanner.py b/tests/inspector/test_scanner.py deleted file mode 100644 index c7382a1..0000000 --- a/tests/inspector/test_scanner.py +++ /dev/null @@ -1,58 +0,0 @@ -"""Tests for morphic.inspector.scanner.""" - -from __future__ import annotations - -from PIL import Image - -from morphic.inspector.scanner import get_job, start_job - - -def _make_jpeg(path, size=(20, 20)): - Image.new("RGB", size, "red").save(str(path), "JPEG") - - -class TestInspectorScanner: - def test_start_exif_scan(self, tmp_path) -> None: - _make_jpeg(tmp_path / "a.jpg") - _make_jpeg(tmp_path / "b.jpg") - - job_id = start_job(str(tmp_path), mode="exif") - assert isinstance(job_id, str) - - # Poll until done - import time - - for _ in range(50): - job = get_job(job_id) - if job and job.status in ("done", "error"): - break - time.sleep(0.1) - - job = get_job(job_id) - assert job is not None - assert job.status == "done" - assert len(job.results) == 2 - - def test_start_integrity_scan(self, tmp_path) -> None: - _make_jpeg(tmp_path / "ok.jpg") - # Truncated file - bad = tmp_path / "bad.jpg" - bad.write_bytes(b"\xff\xd8" + b"\x00" * 5) - - job_id = start_job(str(tmp_path), mode="integrity") - - import time - - for _ in range(50): - job = get_job(job_id) - if job and job.status in ("done", "error"): - break - time.sleep(0.1) - - job = get_job(job_id) - assert job is not None - assert job.status == "done" - assert len(job.results) >= 2 - - def test_get_nonexistent_job(self) -> None: - assert get_job("nonexistent-id") is None diff --git a/tests/organizer/__init__.py b/tests/organizer/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/organizer/test_date_sorter.py b/tests/organizer/test_date_sorter.py deleted file mode 100644 index 480a74a..0000000 --- a/tests/organizer/test_date_sorter.py +++ /dev/null @@ -1,92 +0,0 @@ -"""Tests for morphic.organizer.date_sorter.""" - -from __future__ import annotations - -import os - -import pytest -from PIL import Image - -from morphic.organizer.date_sorter import ( - execute_sort, - get_file_date, - plan_sort, -) - - -def _make_jpeg(path: str) -> str: - Image.new("RGB", (10, 10), "red").save(path, "JPEG") - return path - - -class TestGetFileDate: - def test_returns_datetime(self, tmp_path) -> None: - path = _make_jpeg(str(tmp_path / "a.jpg")) - dt = get_file_date(path) - assert dt is not None - assert dt.year >= 2020 - - def test_fallback_to_mtime(self, tmp_path) -> None: - # PNG has no EXIF — should fall back to mtime - p = str(tmp_path / "test.png") - Image.new("RGB", (10, 10)).save(p) - dt = get_file_date(p) - assert dt is not None - - -class TestPlanSort: - def test_plan_returns_list(self, tmp_path) -> None: - _make_jpeg(str(tmp_path / "a.jpg")) - _make_jpeg(str(tmp_path / "b.jpg")) - plan = plan_sort(str(tmp_path)) - assert isinstance(plan, list) - assert len(plan) == 2 - for entry in plan: - assert "source" in entry - assert "destination" in entry - assert "date" in entry - - def test_plan_with_template(self, tmp_path) -> None: - _make_jpeg(str(tmp_path / "photo.jpg")) - plan = plan_sort(str(tmp_path), template="{year}/{month}") - assert len(plan) == 1 - # Destination should contain year/month path - dest = plan[0]["destination"] - parts = dest.replace("\\", "/").split("/") - # Should have numeric year and month somewhere in path - assert any(p.isdigit() and len(p) == 4 for p in parts) - - def test_plan_with_destination(self, tmp_path) -> None: - src = tmp_path / "src" - src.mkdir() - _make_jpeg(str(src / "a.jpg")) - - dest = str(tmp_path / "dest") - plan = plan_sort(str(src), destination=dest) - assert len(plan) == 1 - assert plan[0]["destination"].startswith(dest) - - -class TestExecuteSort: - def test_copy(self, tmp_path) -> None: - _make_jpeg(str(tmp_path / "orig.jpg")) - plan = plan_sort(str(tmp_path), destination=str(tmp_path / "sorted")) - result = execute_sort(plan, operation="copy") - assert result["completed"] == 1 - assert result["errors"] == 0 - # Original still exists (copy) - assert os.path.isfile(str(tmp_path / "orig.jpg")) - # Destination exists - assert os.path.isfile(plan[0]["destination"]) - - def test_move(self, tmp_path) -> None: - _make_jpeg(str(tmp_path / "orig.jpg")) - plan = plan_sort(str(tmp_path), destination=str(tmp_path / "sorted")) - result = execute_sort(plan, operation="move") - assert result["completed"] == 1 - # Original should be gone - assert not os.path.isfile(str(tmp_path / "orig.jpg")) - - def test_invalid_operation(self, tmp_path) -> None: - with pytest.raises(ValueError, match="move.*copy"): - execute_sort([], operation="bad") diff --git a/tests/organizer/test_renamer.py b/tests/organizer/test_renamer.py deleted file mode 100644 index 8c2dfe8..0000000 --- a/tests/organizer/test_renamer.py +++ /dev/null @@ -1,96 +0,0 @@ -"""Tests for morphic.organizer.renamer.""" - -from __future__ import annotations - -import os - -from PIL import Image - -from morphic.organizer.renamer import execute_rename, plan_rename - - -def _make_jpeg(path: str) -> str: - Image.new("RGB", (10, 10), "red").save(path, "JPEG") - return path - - -class TestPlanRename: - def test_basic_plan(self, tmp_path) -> None: - _make_jpeg(str(tmp_path / "a.jpg")) - _make_jpeg(str(tmp_path / "b.jpg")) - plan = plan_rename(str(tmp_path), template="{seq:3}{ext}") - assert len(plan) == 2 - for entry in plan: - assert "source" in entry - assert "new_name" in entry - assert "destination" in entry - assert "conflict" in entry - - def test_seq_token(self, tmp_path) -> None: - _make_jpeg(str(tmp_path / "photo.jpg")) - plan = plan_rename(str(tmp_path), template="{seq:4}{ext}", start_seq=1) - assert plan[0]["new_name"] == "0001.jpg" - - def test_original_token(self, tmp_path) -> None: - _make_jpeg(str(tmp_path / "nice_photo.jpg")) - plan = plan_rename(str(tmp_path), template="{original}_renamed{ext}") - assert "nice_photo_renamed.jpg" in plan[0]["new_name"] - - def test_date_token(self, tmp_path) -> None: - _make_jpeg(str(tmp_path / "photo.jpg")) - plan = plan_rename(str(tmp_path), template="{date}_{seq}{ext}") - # Should have date-like prefix - name = plan[0]["new_name"] - assert name.count("-") >= 2 # YYYY-MM-DD has 2 dashes - - def test_conflict_detection(self, tmp_path) -> None: - # Create 2 files that would get the same name - _make_jpeg(str(tmp_path / "a.jpg")) - _make_jpeg(str(tmp_path / "b.jpg")) - # Template without seq = all get same name - plan = plan_rename(str(tmp_path), template="same{ext}") - conflicts = [p for p in plan if p["conflict"]] - # At least one conflict expected - assert len(conflicts) >= 1 - - def test_output_folder(self, tmp_path) -> None: - _make_jpeg(str(tmp_path / "photo.jpg")) - out = str(tmp_path / "renamed") - plan = plan_rename( - str(tmp_path), template="{seq}{ext}", output_folder=out - ) - assert plan[0]["destination"].startswith(out) - - -class TestExecuteRename: - def test_move(self, tmp_path) -> None: - _make_jpeg(str(tmp_path / "orig.jpg")) - plan = plan_rename( - str(tmp_path), - template="renamed_{seq:2}{ext}", - output_folder=str(tmp_path / "out"), - ) - result = execute_rename(plan, operation="move") - assert result["completed"] == 1 - assert result["errors"] == 0 - assert not os.path.isfile(str(tmp_path / "orig.jpg")) - - def test_copy(self, tmp_path) -> None: - _make_jpeg(str(tmp_path / "orig.jpg")) - plan = plan_rename( - str(tmp_path), - template="renamed{ext}", - output_folder=str(tmp_path / "out"), - ) - result = execute_rename(plan, operation="copy") - assert result["completed"] == 1 - # Original still exists - assert os.path.isfile(str(tmp_path / "orig.jpg")) - - def test_skips_conflicts(self, tmp_path) -> None: - _make_jpeg(str(tmp_path / "a.jpg")) - _make_jpeg(str(tmp_path / "b.jpg")) - plan = plan_rename(str(tmp_path), template="same{ext}") - result = execute_rename(plan, operation="copy") - # Should skip at least 1 conflict - assert result["skipped"] >= 1 diff --git a/tests/resizer/__init__.py b/tests/resizer/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/resizer/test_operations.py b/tests/resizer/test_operations.py deleted file mode 100644 index a9859a2..0000000 --- a/tests/resizer/test_operations.py +++ /dev/null @@ -1,119 +0,0 @@ -"""Tests for morphic.resizer.operations.""" - -from __future__ import annotations - -import os - -import pytest -from PIL import Image - -from morphic.resizer.operations import resize_image - - -def _make_image(path: str, size: tuple[int, int] = (200, 100)) -> str: - Image.new("RGB", size, "blue").save(path) - return path - - -class TestResizeModes: - def test_fit(self, tmp_path) -> None: - src = _make_image(str(tmp_path / "src.png"), (400, 200)) - dest = resize_image( - src, 100, 100, mode="fit", output_folder=str(tmp_path / "out") - ) - img = Image.open(dest) - # fit keeps aspect ratio, so 100x50 - assert img.width <= 100 - assert img.height <= 100 - - def test_fill(self, tmp_path) -> None: - src = _make_image(str(tmp_path / "src.png"), (400, 200)) - dest = resize_image( - src, 100, 100, mode="fill", output_folder=str(tmp_path / "out") - ) - img = Image.open(dest) - assert img.size == (100, 100) - - def test_stretch(self, tmp_path) -> None: - src = _make_image(str(tmp_path / "src.png"), (400, 200)) - dest = resize_image( - src, 100, 50, mode="stretch", output_folder=str(tmp_path / "out") - ) - img = Image.open(dest) - assert img.size == (100, 50) - - def test_pad(self, tmp_path) -> None: - src = _make_image(str(tmp_path / "src.png"), (400, 200)) - dest = resize_image( - src, 100, 100, mode="pad", output_folder=str(tmp_path / "out") - ) - img = Image.open(dest) - assert img.size == (100, 100) - - -class TestResizeErrors: - def test_invalid_mode(self, tmp_path) -> None: - src = _make_image(str(tmp_path / "src.png")) - with pytest.raises(ValueError, match="Invalid mode"): - resize_image(src, 100, 100, mode="bad") - - def test_nonexistent_file(self, tmp_path) -> None: - with pytest.raises(FileNotFoundError): - resize_image(str(tmp_path / "nope.png"), 100, 100) - - def test_zero_dimensions(self, tmp_path) -> None: - src = _make_image(str(tmp_path / "src.png")) - with pytest.raises(ValueError, match="positive"): - resize_image(src, 0, 100) - - -class TestResizeOutput: - def test_output_folder_created(self, tmp_path) -> None: - src = _make_image(str(tmp_path / "src.png")) - out = str(tmp_path / "new_dir") - dest = resize_image(src, 50, 50, output_folder=out) - assert os.path.isdir(out) - assert os.path.isfile(dest) - - def test_format_override(self, tmp_path) -> None: - src = _make_image(str(tmp_path / "src.png")) - dest = resize_image( - src, - 50, - 50, - output_format=".jpg", - output_folder=str(tmp_path / "out"), - ) - assert dest.endswith(".jpg") - img = Image.open(dest) - assert img.mode == "RGB" - - def test_quality_param(self, tmp_path) -> None: - src = _make_image(str(tmp_path / "src.jpg")) - dest_high = resize_image( - src, 50, 50, quality=95, output_folder=str(tmp_path / "hi") - ) - dest_low = resize_image( - src, 50, 50, quality=10, output_folder=str(tmp_path / "lo") - ) - # Lower quality should be smaller file - assert os.path.getsize(dest_low) <= os.path.getsize(dest_high) - - def test_rgba_to_jpg(self, tmp_path) -> None: - src = str(tmp_path / "rgba.png") - Image.new("RGBA", (50, 50), (255, 0, 0, 128)).save(src) - dest = resize_image( - src, - 30, - 30, - output_format=".jpg", - output_folder=str(tmp_path / "out"), - ) - img = Image.open(dest) - assert img.mode == "RGB" - - def test_palette_mode(self, tmp_path) -> None: - src = str(tmp_path / "pal.png") - Image.new("P", (50, 50)).save(src) - dest = resize_image(src, 30, 30, output_folder=str(tmp_path / "out")) - assert os.path.isfile(dest) diff --git a/tests/resizer/test_scanner.py b/tests/resizer/test_scanner.py deleted file mode 100644 index 5ea7229..0000000 --- a/tests/resizer/test_scanner.py +++ /dev/null @@ -1,45 +0,0 @@ -"""Tests for morphic.resizer.scanner.""" - -from __future__ import annotations - -import time - -from PIL import Image - -from morphic.resizer.scanner import get_job, start_job - - -def _make_images(tmp_path, count=3): - for i in range(count): - Image.new("RGB", (200, 100), "green").save( - str(tmp_path / f"img{i}.png") - ) - - -class TestResizerScanner: - def test_start_job(self, tmp_path) -> None: - _make_images(tmp_path) - out = tmp_path / "output" - - job_id = start_job( - folder=str(tmp_path), - width=50, - height=50, - mode="fit", - output_folder=str(out), - ) - assert isinstance(job_id, str) - - for _ in range(50): - job = get_job(job_id) - if job and job.status in ("done", "error"): - break - time.sleep(0.1) - - job = get_job(job_id) - assert job is not None - assert job.status == "done" - assert len(job.results) == 3 - - def test_nonexistent_job(self) -> None: - assert get_job("fake-id") is None diff --git a/tests/shared/__init__.py b/tests/shared/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/shared/test_constants.py b/tests/shared/test_constants.py deleted file mode 100644 index 27f1fc0..0000000 --- a/tests/shared/test_constants.py +++ /dev/null @@ -1,99 +0,0 @@ -"""Tests for morphic.shared.constants.""" - -from morphic.shared.constants import ( - ALIASES, - ALL_EXTENSIONS, - DEFAULT_BATCH_SIZE, - DEFAULT_HASH_SIZE, - DEFAULT_IMAGE_THRESHOLD, - DEFAULT_NUM_FRAMES, - DEFAULT_NUM_WORKERS, - DEFAULT_VIDEO_THRESHOLD, - EXCLUDED_FOLDERS, - IMAGE_EXTENSIONS, - VIDEO_EXTENSIONS, -) - - -class TestExtensionSets: - def test_image_extensions_not_empty(self) -> None: - assert len(IMAGE_EXTENSIONS) > 0 - - def test_video_extensions_not_empty(self) -> None: - assert len(VIDEO_EXTENSIONS) > 0 - - def test_all_extensions_is_union(self) -> None: - assert ALL_EXTENSIONS == IMAGE_EXTENSIONS | VIDEO_EXTENSIONS - - def test_no_overlap_between_image_and_video(self) -> None: - assert IMAGE_EXTENSIONS & VIDEO_EXTENSIONS == set() - - def test_all_extensions_start_with_dot(self) -> None: - for ext in ALL_EXTENSIONS: - assert ext.startswith("."), f"{ext} missing leading dot" - - def test_extensions_are_lowercase(self) -> None: - for ext in ALL_EXTENSIONS: - assert ext == ext.lower(), f"{ext} not lowercase" - - def test_image_extensions_are_frozenset(self) -> None: - assert isinstance(IMAGE_EXTENSIONS, frozenset) - - def test_video_extensions_are_frozenset(self) -> None: - assert isinstance(VIDEO_EXTENSIONS, frozenset) - - def test_common_image_formats_present(self) -> None: - for ext in [".jpg", ".png", ".gif", ".bmp", ".webp", ".tif"]: - assert ext in IMAGE_EXTENSIONS - - def test_common_video_formats_present(self) -> None: - for ext in [".mp4", ".mov", ".avi", ".mkv", ".webm"]: - assert ext in VIDEO_EXTENSIONS - - -class TestAliases: - def test_jpeg_alias(self) -> None: - assert ALIASES[".jpeg"] == ".jpg" - - def test_tiff_alias(self) -> None: - assert ALIASES[".tiff"] == ".tif" - - def test_mpg_alias(self) -> None: - assert ALIASES[".mpg"] == ".mpeg" - - def test_aliases_are_lowercase(self) -> None: - for key, val in ALIASES.items(): - assert key == key.lower() - assert val == val.lower() - - -class TestExcludedFolders: - def test_excluded_folders_not_empty(self) -> None: - assert len(EXCLUDED_FOLDERS) > 0 - - def test_common_exclusions_present(self) -> None: - for name in ["node_modules", ".git", "__pycache__"]: - assert name in EXCLUDED_FOLDERS - - def test_excluded_folders_are_frozenset(self) -> None: - assert isinstance(EXCLUDED_FOLDERS, frozenset) - - -class TestDefaults: - def test_image_threshold_range(self) -> None: - assert 0 < DEFAULT_IMAGE_THRESHOLD <= 1.0 - - def test_video_threshold_range(self) -> None: - assert 0 < DEFAULT_VIDEO_THRESHOLD <= 1.0 - - def test_hash_size_positive(self) -> None: - assert DEFAULT_HASH_SIZE > 0 - - def test_num_frames_positive(self) -> None: - assert DEFAULT_NUM_FRAMES > 0 - - def test_num_workers_positive(self) -> None: - assert DEFAULT_NUM_WORKERS > 0 - - def test_batch_size_positive(self) -> None: - assert DEFAULT_BATCH_SIZE > 0 diff --git a/tests/shared/test_file_browser.py b/tests/shared/test_file_browser.py deleted file mode 100644 index 7e3a8b3..0000000 --- a/tests/shared/test_file_browser.py +++ /dev/null @@ -1,224 +0,0 @@ -"""Tests for morphic.shared.file_browser — all fallback paths.""" - -from __future__ import annotations - -from unittest.mock import MagicMock, patch - -from morphic.shared.file_browser import ( - _try_kdialog, - _try_osascript, - _try_powershell, - _try_tkinter, - _try_zenity, - open_native_folder_dialog, -) - - -class TestTryTkinter: - @patch("tkinter.filedialog.askdirectory", return_value="/selected/folder") - @patch("tkinter.Tk") - def test_success(self, mock_tk_cls, mock_askdir) -> None: - mock_root = MagicMock() - mock_tk_cls.return_value = mock_root - - result = _try_tkinter("/home/user") - assert result == "/selected/folder" - - @patch("tkinter.filedialog.askdirectory", return_value="") - @patch("tkinter.Tk") - def test_cancelled(self, mock_tk_cls, mock_askdir) -> None: - mock_root = MagicMock() - mock_tk_cls.return_value = mock_root - - result = _try_tkinter("/home/user") - assert result is None - - @patch("builtins.__import__", side_effect=ImportError("No tkinter")) - def test_import_error(self, mock_import) -> None: - result = _try_tkinter("/home/user") - assert result is None - - -class TestTryZenity: - @patch("morphic.shared.file_browser.subprocess.run") - def test_success(self, mock_run) -> None: - mock_run.return_value = MagicMock( - returncode=0, - stdout="/selected/folder\n", - ) - result = _try_zenity("/home/user") - assert result == "/selected/folder" - - @patch("morphic.shared.file_browser.subprocess.run") - def test_cancelled(self, mock_run) -> None: - mock_run.return_value = MagicMock(returncode=1, stdout="") - result = _try_zenity("/home/user") - assert result is None - - @patch( - "morphic.shared.file_browser.subprocess.run", - side_effect=FileNotFoundError, - ) - def test_not_found(self, mock_run) -> None: - result = _try_zenity("/home/user") - assert result is None - - -class TestTryKdialog: - @patch("morphic.shared.file_browser.subprocess.run") - def test_success(self, mock_run) -> None: - mock_run.return_value = MagicMock( - returncode=0, - stdout="/selected/folder\n", - ) - result = _try_kdialog("/home/user") - assert result == "/selected/folder" - - @patch("morphic.shared.file_browser.subprocess.run") - def test_cancelled(self, mock_run) -> None: - mock_run.return_value = MagicMock(returncode=1, stdout="") - result = _try_kdialog("/home/user") - assert result is None - - @patch( - "morphic.shared.file_browser.subprocess.run", - side_effect=FileNotFoundError, - ) - def test_not_found(self, mock_run) -> None: - result = _try_kdialog("/home/user") - assert result is None - - -class TestTryOsascript: - @patch("morphic.shared.file_browser.subprocess.run") - def test_success(self, mock_run) -> None: - mock_run.return_value = MagicMock( - returncode=0, - stdout="/Users/test/folder/\n", - ) - result = _try_osascript("/Users/test") - assert result == "/Users/test/folder" - - @patch("morphic.shared.file_browser.subprocess.run") - def test_cancelled(self, mock_run) -> None: - mock_run.return_value = MagicMock(returncode=1, stdout="") - result = _try_osascript("/Users/test") - assert result is None - - @patch( - "morphic.shared.file_browser.subprocess.run", - side_effect=FileNotFoundError, - ) - def test_not_found(self, mock_run) -> None: - result = _try_osascript("/Users/test") - assert result is None - - -class TestTryPowershell: - @patch("morphic.shared.file_browser.subprocess.run") - def test_success(self, mock_run) -> None: - mock_run.return_value = MagicMock( - returncode=0, - stdout="C:\\Users\\test\\folder\n", - ) - result = _try_powershell("C:\\Users\\test") - assert result == "C:\\Users\\test\\folder" - - @patch("morphic.shared.file_browser.subprocess.run") - def test_cancelled(self, mock_run) -> None: - mock_run.return_value = MagicMock(returncode=1, stdout="") - result = _try_powershell("C:\\Users\\test") - assert result is None - - @patch( - "morphic.shared.file_browser.subprocess.run", - side_effect=FileNotFoundError, - ) - def test_not_found(self, mock_run) -> None: - result = _try_powershell("C:\\Users\\test") - assert result is None - - -class TestOpenNativeFolderDialog: - @patch("morphic.shared.file_browser._try_tkinter", return_value="/chosen") - def test_tkinter_success(self, mock_tk) -> None: - result = open_native_folder_dialog() - assert result == "/chosen" - - @patch("morphic.shared.file_browser._try_tkinter", return_value=None) - @patch("morphic.shared.file_browser.platform.system", return_value="Linux") - @patch( - "morphic.shared.file_browser._try_zenity", - return_value="/zenity_dir", - ) - def test_linux_zenity_fallback(self, mock_z, mock_sys, mock_tk) -> None: - result = open_native_folder_dialog() - assert result == "/zenity_dir" - - @patch("morphic.shared.file_browser._try_tkinter", return_value=None) - @patch("morphic.shared.file_browser.platform.system", return_value="Linux") - @patch("morphic.shared.file_browser._try_zenity", return_value=None) - @patch( - "morphic.shared.file_browser._try_kdialog", - return_value="/kde_dir", - ) - def test_linux_kdialog_fallback( - self, - mock_k, - mock_z, - mock_sys, - mock_tk, - ) -> None: - result = open_native_folder_dialog() - assert result == "/kde_dir" - - @patch("morphic.shared.file_browser._try_tkinter", return_value=None) - @patch( - "morphic.shared.file_browser.platform.system", - return_value="Darwin", - ) - @patch( - "morphic.shared.file_browser._try_osascript", - return_value="/mac_dir", - ) - def test_macos_fallback(self, mock_osa, mock_sys, mock_tk) -> None: - result = open_native_folder_dialog() - assert result == "/mac_dir" - - @patch("morphic.shared.file_browser._try_tkinter", return_value=None) - @patch( - "morphic.shared.file_browser.platform.system", - return_value="Windows", - ) - @patch( - "morphic.shared.file_browser._try_powershell", - return_value="C:\\dir", - ) - def test_windows_fallback(self, mock_ps, mock_sys, mock_tk) -> None: - result = open_native_folder_dialog() - assert result == "C:\\dir" - - @patch("morphic.shared.file_browser._try_tkinter", return_value=None) - @patch( - "morphic.shared.file_browser.platform.system", - return_value="Linux", - ) - @patch("morphic.shared.file_browser._try_zenity", return_value=None) - @patch("morphic.shared.file_browser._try_kdialog", return_value=None) - def test_all_fail_returns_none( - self, - mock_k, - mock_z, - mock_sys, - mock_tk, - ) -> None: - result = open_native_folder_dialog() - assert result is None - - def test_default_initial_dir(self) -> None: - with patch( - "morphic.shared.file_browser._try_tkinter", - return_value="/chosen", - ) as mock_tk: - open_native_folder_dialog() - assert mock_tk.called diff --git a/tests/shared/test_thumbnails.py b/tests/shared/test_thumbnails.py deleted file mode 100644 index f26d1f9..0000000 --- a/tests/shared/test_thumbnails.py +++ /dev/null @@ -1,132 +0,0 @@ -"""Tests for morphic.shared.thumbnails.""" - -from __future__ import annotations - -import io -from unittest.mock import MagicMock, patch - -import pytest -from PIL import Image - -from morphic.shared.thumbnails import ( - generate_image_thumbnail, - generate_video_thumbnail, -) - - -class TestGenerateImageThumbnail: - def test_basic(self, tmp_path) -> None: - img_path = tmp_path / "test.jpg" - Image.new("RGB", (500, 500), "red").save(str(img_path)) - - buf = generate_image_thumbnail(str(img_path), size=100) - assert isinstance(buf, io.BytesIO) - - result = Image.open(buf) - assert result.format == "JPEG" - assert max(result.size) <= 100 - - def test_large_image(self, tmp_path) -> None: - img_path = tmp_path / "large.png" - Image.new("RGB", (3000, 2000), "blue").save(str(img_path)) - - buf = generate_image_thumbnail(str(img_path), size=300) - result = Image.open(buf) - assert max(result.size) <= 300 - - def test_rgba_converts_to_rgb(self, tmp_path) -> None: - img_path = tmp_path / "rgba.png" - Image.new("RGBA", (100, 100), (255, 0, 0, 128)).save(str(img_path)) - - buf = generate_image_thumbnail(str(img_path)) - result = Image.open(buf) - assert result.mode == "RGB" - - def test_palette_converts(self, tmp_path) -> None: - img_path = tmp_path / "palette.gif" - Image.new("P", (100, 100)).save(str(img_path)) - - buf = generate_image_thumbnail(str(img_path)) - result = Image.open(buf) - assert result.mode == "RGB" - - def test_la_mode_converts(self, tmp_path) -> None: - img_path = tmp_path / "la.png" - Image.new("LA", (100, 100)).save(str(img_path)) - - buf = generate_image_thumbnail(str(img_path)) - result = Image.open(buf) - assert result.mode == "RGB" - - def test_custom_size(self, tmp_path) -> None: - img_path = tmp_path / "test.jpg" - Image.new("RGB", (1000, 1000), "green").save(str(img_path)) - - buf = generate_image_thumbnail(str(img_path), size=150) - result = Image.open(buf) - assert max(result.size) <= 150 - - def test_nonexistent_file(self) -> None: - with pytest.raises(Exception): - generate_image_thumbnail("/nonexistent/file.jpg") - - -class TestGenerateVideoThumbnail: - @patch("morphic.shared.thumbnails.subprocess.run") - def test_success(self, mock_run) -> None: - img = Image.new("RGB", (100, 100), "red") - buf = io.BytesIO() - img.save(buf, format="JPEG") - jpeg_bytes = buf.getvalue() - - mock_run.return_value = MagicMock( - returncode=0, - stdout=jpeg_bytes, - ) - result = generate_video_thumbnail("/test/video.mp4") - assert result is not None - assert isinstance(result, io.BytesIO) - - @patch("morphic.shared.thumbnails.subprocess.run") - def test_failure(self, mock_run) -> None: - mock_run.return_value = MagicMock(returncode=1, stdout=b"") - result = generate_video_thumbnail("/test/video.mp4") - assert result is None - - @patch("morphic.shared.thumbnails.subprocess.run") - def test_retry_at_0s(self, mock_run) -> None: - img = Image.new("RGB", (100, 100), "blue") - buf = io.BytesIO() - img.save(buf, format="JPEG") - jpeg_bytes = buf.getvalue() - - mock_run.side_effect = [ - MagicMock(returncode=1, stdout=b""), - MagicMock(returncode=0, stdout=jpeg_bytes), - ] - result = generate_video_thumbnail("/test/short.mp4") - assert result is not None - assert mock_run.call_count == 2 - - @patch("morphic.shared.thumbnails.subprocess.run") - def test_custom_size(self, mock_run) -> None: - img = Image.new("RGB", (50, 50), "green") - buf = io.BytesIO() - img.save(buf, format="JPEG") - - mock_run.return_value = MagicMock( - returncode=0, - stdout=buf.getvalue(), - ) - result = generate_video_thumbnail("/test/video.mp4", size=150) - assert result is not None - - def test_nonexistent_file(self) -> None: - result = generate_video_thumbnail("/nonexistent/file.mp4") - assert result is None - - def test_invalid_video(self, tmp_path) -> None: - fake = tmp_path / "fake.mp4" - fake.write_bytes(b"\x00" * 10) - result = generate_video_thumbnail(str(fake)) - assert result is None diff --git a/tests/shared/test_utils.py b/tests/shared/test_utils.py deleted file mode 100644 index 7a9d4d9..0000000 --- a/tests/shared/test_utils.py +++ /dev/null @@ -1,200 +0,0 @@ -"""Tests for morphic.shared.utils.""" - -from __future__ import annotations - -import os - -from morphic.shared.utils import ( - find_files_by_extension, - format_duration, - format_file_size, - is_excluded_path, - is_image, - is_video, - normalise_ext, - suppress_stderr, -) - - -class TestNormaliseExt: - def test_jpeg_to_jpg(self) -> None: - assert normalise_ext(".jpeg") == ".jpg" - assert normalise_ext(".JPEG") == ".jpg" - - def test_tiff_to_tif(self) -> None: - assert normalise_ext(".tiff") == ".tif" - assert normalise_ext(".TIFF") == ".tif" - - def test_mpg_to_mpeg(self) -> None: - assert normalise_ext(".mpg") == ".mpeg" - - def test_already_canonical(self) -> None: - assert normalise_ext(".png") == ".png" - assert normalise_ext(".mp4") == ".mp4" - - def test_case_insensitive(self) -> None: - assert normalise_ext(".PNG") == ".png" - assert normalise_ext(".Mp4") == ".mp4" - - def test_unknown_extension(self) -> None: - assert normalise_ext(".xyz") == ".xyz" - - def test_empty_string(self) -> None: - assert normalise_ext("") == "" - - -class TestIsImage: - def test_common_image_extensions(self) -> None: - for ext in ["jpg", "jpeg", "png", "tif", "tiff", "webp", "gif", "bmp"]: - assert is_image(f"photo.{ext}"), f".{ext} should be image" - - def test_case_insensitive(self) -> None: - assert is_image("photo.JPG") - assert is_image("photo.Png") - - def test_not_image(self) -> None: - assert not is_image("video.mp4") - assert not is_image("document.pdf") - assert not is_image("noext") - - def test_full_path(self) -> None: - assert is_image("/home/user/photos/test.png") - - -class TestIsVideo: - def test_common_video_extensions(self) -> None: - for ext in ["mp4", "mov", "mkv", "avi", "webm"]: - assert is_video(f"clip.{ext}"), f".{ext} should be video" - - def test_case_insensitive(self) -> None: - assert is_video("clip.MP4") - assert is_video("clip.Mov") - - def test_not_video(self) -> None: - assert not is_video("photo.jpg") - assert not is_video("document.pdf") - - def test_full_path(self) -> None: - assert is_video("/home/user/videos/test.mp4") - - -class TestFormatFileSize: - def test_bytes(self) -> None: - assert format_file_size(512) == "512.00 B" - - def test_kilobytes(self) -> None: - result = format_file_size(1536) - assert "KB" in result - - def test_megabytes(self) -> None: - result = format_file_size(2 * 1024 * 1024) - assert "MB" in result - - def test_gigabytes(self) -> None: - result = format_file_size(3 * 1024**3) - assert "GB" in result - - def test_terabytes(self) -> None: - result = format_file_size(2 * 1024**4) - assert "TB" in result - - def test_zero(self) -> None: - assert format_file_size(0) == "0.00 B" - - -class TestFormatDuration: - def test_seconds_only(self) -> None: - assert format_duration(45) == "45s" - - def test_minutes_and_seconds(self) -> None: - assert format_duration(125) == "2m 5s" - - def test_hours_minutes_seconds(self) -> None: - assert format_duration(3661) == "1h 1m 1s" - - def test_zero(self) -> None: - assert format_duration(0) == "0s" - - -class TestIsExcludedPath: - def test_excluded_folder(self) -> None: - assert is_excluded_path("/home/user/node_modules/lib/file.jpg") - - def test_git_folder(self) -> None: - assert is_excluded_path("/repo/.git/objects/file") - - def test_pycache(self) -> None: - assert is_excluded_path("/project/__pycache__/module.pyc") - - def test_normal_path(self) -> None: - assert not is_excluded_path("/home/user/photos/vacation.jpg") - - def test_custom_exclusions(self) -> None: - assert is_excluded_path( - "/project/custom/file.jpg", - excluded_folders=frozenset({"custom"}), - ) - - -class TestFindFilesByExtension: - def test_finds_images(self, tmp_path) -> None: - (tmp_path / "a.jpg").touch() - (tmp_path / "b.png").touch() - (tmp_path / "c.txt").touch() - - result = find_files_by_extension( - str(tmp_path), - frozenset({".jpg", ".png"}), - ) - assert len(result) == 2 - - def test_recursive(self, tmp_path) -> None: - (tmp_path / "a.jpg").touch() - sub = tmp_path / "sub" - sub.mkdir() - (sub / "b.jpg").touch() - - result = find_files_by_extension( - str(tmp_path), - frozenset({".jpg"}), - ) - assert len(result) == 2 - - def test_excludes_folders(self, tmp_path) -> None: - (tmp_path / "a.jpg").touch() - excluded = tmp_path / "node_modules" - excluded.mkdir() - (excluded / "b.jpg").touch() - - result = find_files_by_extension( - str(tmp_path), - frozenset({".jpg"}), - ) - assert len(result) == 1 - - def test_empty_folder(self, tmp_path) -> None: - result = find_files_by_extension( - str(tmp_path), - frozenset({".jpg"}), - ) - assert result == [] - - def test_returns_sorted(self, tmp_path) -> None: - (tmp_path / "c.jpg").touch() - (tmp_path / "a.jpg").touch() - (tmp_path / "b.jpg").touch() - - result = find_files_by_extension( - str(tmp_path), - frozenset({".jpg"}), - ) - names = [os.path.basename(f) for f in result] - assert names == sorted(names) - - -class TestSuppressStderr: - def test_suppresses(self) -> None: - import sys - - with suppress_stderr(): - sys.stderr.write("suppressed\n") diff --git a/web/routes_converter.go b/web/routes_converter.go new file mode 100644 index 0000000..cf53fed --- /dev/null +++ b/web/routes_converter.go @@ -0,0 +1,300 @@ +package web + +import ( + "net/http" + "os" + "time" + + "github.com/exterex/morphic/internal/converter" + "github.com/exterex/morphic/internal/shared" + "github.com/gin-gonic/gin" +) + +var conversionStore = shared.NewJobStore[conversionJob]() + +type conversionJob struct { + shared.Job + Total int `json:"total"` + Completed int `json:"completed"` + CurrentFile string `json:"current_file"` + Results []map[string]interface{} `json:"results"` +} + +func init() { + conversionStore.StartCleanup(30*time.Minute, func(j *conversionJob) time.Time { + return j.DoneAt + }) +} + +func registerConverterRoutes(r *gin.Engine) { + g := r.Group("/api/converter") + { + g.POST("/scan", handleConverterScan) + g.GET("/formats", handleConverterFormats) + g.POST("/convert", handleConverterConvert) + g.GET("/progress/:id", handleConverterProgress) + g.GET("/progress/:id/poll", handleConverterPoll) + g.POST("/progress/:id/cancel", handleConverterCancel) + g.POST("/delete", handleConverterDelete) + } +} + +func handleConverterScan(c *gin.Context) { + var req struct { + Folder string `json:"folder"` + IncludeSubfolders *bool `json:"include_subfolders"` + FilterType string `json:"filter_type"` + } + if err := c.ShouldBindJSON(&req); err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) + return + } + if req.Folder == "" || !isDir(req.Folder) { + c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid folder: " + req.Folder}) + return + } + includeSub := true + if req.IncludeSubfolders != nil { + includeSub = *req.IncludeSubfolders + } + filterType := req.FilterType + if filterType != "images" && filterType != "videos" && filterType != "both" { + filterType = "both" + } + + result, err := converter.ScanFolder(req.Folder, includeSub, filterType) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + return + } + c.JSON(http.StatusOK, result) +} + +func handleConverterFormats(c *gin.Context) { + c.JSON(http.StatusOK, gin.H{ + "image": converter.ImageConversions, + "video": gin.H{ + "containers": converter.VideoContainers, + }, + }) +} + +func handleConverterConvert(c *gin.Context) { + var req struct { + Files []string `json:"files"` + TargetExt string `json:"target_ext"` + Codec string `json:"codec"` + DeleteOriginal bool `json:"delete_original"` + AV1CRF *int `json:"av1_crf"` + } + if err := c.ShouldBindJSON(&req); err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) + return + } + if len(req.Files) == 0 || req.TargetExt == "" { + c.JSON(http.StatusBadRequest, gin.H{"error": "files and target_ext required"}) + return + } + + av1CRF := 0 + if req.AV1CRF != nil { + av1CRF = *req.AV1CRF + } + + job := &conversionJob{ + Job: shared.NewJob(), + Total: len(req.Files), + } + job.Status = shared.JobStatusRunning + conversionStore.Set(job.ID, job) + + go runConversion(job, req.Files, req.TargetExt, req.Codec, req.DeleteOriginal, av1CRF) + + c.JSON(http.StatusAccepted, gin.H{"job_id": job.ID}) +} + +func runConversion(job *conversionJob, files []string, targetExt, codec string, deleteOriginal bool, av1CRF int) { + for i, source := range files { + // Check for cancellation before each file + select { + case <-job.Ctx().Done(): + job.Status = shared.JobStatusCancelled + job.CurrentFile = "" + job.DoneAt = time.Now() + return + default: + } + + job.CurrentFile = source + + result := map[string]interface{}{ + "source": source, + "source_deleted": false, + } + + origSize := int64(0) + if info, err := os.Stat(source); err == nil { + origSize = info.Size() + } + + dest, err := converter.ConvertFile(source, targetExt, codec, "", av1CRF) + if err != nil { + result["destination"] = nil + result["status"] = "error" + result["error"] = err.Error() + } else { + newSize := int64(0) + if info, err := os.Stat(dest); err == nil { + newSize = info.Size() + } + + result["destination"] = dest + result["status"] = "ok" + result["original_size"] = origSize + result["new_size"] = newSize + result["original_size_fmt"] = shared.FormatFileSize(origSize) + result["new_size_fmt"] = shared.FormatFileSize(newSize) + + // Delete original only if explicitly requested and safe + if deleteOriginal && dest != "" { + absSrc, _ := absPath(source) + absDest, _ := absPath(dest) + if absSrc != absDest && newSize > 0 { + if err := os.Remove(source); err == nil { + result["source_deleted"] = true + } + } + } + } + + job.Results = append(job.Results, result) + job.Completed = i + 1 + } + + job.Status = shared.JobStatusDone + job.CurrentFile = "" + job.DoneAt = time.Now() +} + +func handleConverterProgress(c *gin.Context) { + id := c.Param("id") + job, ok := conversionStore.Get(id) + if !ok { + c.JSON(http.StatusNotFound, gin.H{"error": "Job not found"}) + return + } + c.JSON(http.StatusOK, gin.H{ + "id": job.ID, + "status": job.Status, + "total": job.Total, + "completed": job.Completed, + "current_file": job.CurrentFile, + "results": job.Results, + "error": job.Error, + }) +} + +func handleConverterPoll(c *gin.Context) { + id := c.Param("id") + job, ok := conversionStore.Get(id) + if !ok { + c.JSON(http.StatusNotFound, gin.H{"error": "Job not found"}) + return + } + + lastStr := c.Query("last") + last := -1 + if lastStr != "" { + for i := 0; i < len(lastStr); i++ { + if lastStr[i] >= '0' && lastStr[i] <= '9' { + last = last*10 + int(lastStr[i]-'0') + } + } + } + + deadline := time.Now().Add(10 * time.Second) + for time.Now().Before(deadline) { + if job.Completed != last || job.Status == shared.JobStatusDone { + break + } + time.Sleep(300 * time.Millisecond) + } + + c.JSON(http.StatusOK, gin.H{ + "id": job.ID, + "status": job.Status, + "total": job.Total, + "completed": job.Completed, + "current_file": job.CurrentFile, + "results": job.Results, + "error": job.Error, + }) +} + +func handleConverterDelete(c *gin.Context) { + var req struct { + Files []string `json:"files"` + } + if err := c.ShouldBindJSON(&req); err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) + return + } + if len(req.Files) == 0 { + c.JSON(http.StatusBadRequest, gin.H{"error": "No files specified"}) + return + } + + var results []map[string]interface{} + totalFreed := int64(0) + + for _, fp := range req.Files { + info, err := os.Stat(fp) + if err != nil { + results = append(results, map[string]interface{}{"path": fp, "status": "not_found"}) + continue + } + if info.IsDir() { + results = append(results, map[string]interface{}{"path": fp, "status": "not_found"}) + continue + } + size := info.Size() + if err := os.Remove(fp); err != nil { + if os.IsPermission(err) { + results = append(results, map[string]interface{}{"path": fp, "status": "permission_denied"}) + } else { + results = append(results, map[string]interface{}{"path": fp, "status": "error", "error": err.Error()}) + } + } else { + totalFreed += size + results = append(results, map[string]interface{}{"path": fp, "status": "deleted", "size_freed": size}) + } + } + + c.JSON(http.StatusOK, gin.H{ + "results": results, + "total_freed": totalFreed, + "total_freed_formatted": shared.FormatFileSize(totalFreed), + }) +} + +func handleConverterCancel(c *gin.Context) { + id := c.Param("id") + job, ok := conversionStore.Get(id) + if !ok { + c.JSON(http.StatusNotFound, gin.H{"error": "Job not found"}) + return + } + job.Cancel() + c.JSON(http.StatusOK, gin.H{"status": "cancelling"}) +} + +func absPath(p string) (string, error) { + abs, err := os.Getwd() + if err != nil { + return p, err + } + if len(p) > 0 && p[0] == '/' { + return p, nil + } + return abs + "/" + p, nil +} diff --git a/web/routes_dupfinder.go b/web/routes_dupfinder.go new file mode 100644 index 0000000..3b2d0b9 --- /dev/null +++ b/web/routes_dupfinder.go @@ -0,0 +1,162 @@ +package web + +import ( + "net/http" + "os" + "time" + + "github.com/exterex/morphic/internal/dupfinder" + "github.com/exterex/morphic/internal/shared" + "github.com/gin-gonic/gin" +) + +func registerDupfinderRoutes(r *gin.Engine) { + g := r.Group("/api/dupfinder") + { + g.POST("/scan", handleDupfinderScan) + g.GET("/scan/:id/status", handleDupfinderStatus) + g.GET("/scan/:id/results", handleDupfinderResults) + g.POST("/scan/:id/cancel", handleDupfinderCancel) + g.POST("/delete", handleDupfinderDelete) + } +} + +func handleDupfinderScan(c *gin.Context) { + var req struct { + Folder string `json:"folder"` + Type string `json:"type"` + ImageThreshold float64 `json:"image_threshold"` + VideoThreshold float64 `json:"video_threshold"` + } + if err := c.ShouldBindJSON(&req); err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) + return + } + if req.Folder == "" || !isDir(req.Folder) { + c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid folder: " + req.Folder}) + return + } + if req.Type == "" { + req.Type = "both" + } + if req.Type != "images" && req.Type != "videos" && req.Type != "both" { + c.JSON(http.StatusBadRequest, gin.H{"error": "type must be images, videos, or both"}) + return + } + if req.ImageThreshold == 0 { + req.ImageThreshold = shared.DefaultImageThreshold + } + if req.VideoThreshold == 0 { + req.VideoThreshold = shared.DefaultVideoThreshold + } + + jobID := dupfinder.StartJob(req.Folder, req.Type, req.ImageThreshold, req.VideoThreshold) + c.JSON(http.StatusAccepted, gin.H{"job_id": jobID}) +} + +func handleDupfinderStatus(c *gin.Context) { + id := c.Param("id") + job, ok := dupfinder.GetJob(id) + if !ok { + c.JSON(http.StatusNotFound, gin.H{"error": "Job not found"}) + return + } + + elapsed := 0.0 + if !job.StartedAt.IsZero() { + end := job.DoneAt + if end.IsZero() { + end = time.Now() + } + elapsed = end.Sub(job.StartedAt).Seconds() + } + + c.JSON(http.StatusOK, gin.H{ + "id": job.ID, + "status": job.Status, + "progress": job.Progress, + "message": job.Message, + "error": job.Error, + "total_files_found": job.TotalFound, + "total_files_processed": job.TotalProcessed, + "elapsed_seconds": round1(elapsed), + }) +} + +func handleDupfinderResults(c *gin.Context) { + id := c.Param("id") + job, ok := dupfinder.GetJob(id) + if !ok { + c.JSON(http.StatusNotFound, gin.H{"error": "Job not found"}) + return + } + + if job.Status != "done" && job.Status != "failed" { + c.JSON(http.StatusConflict, gin.H{"error": "Scan not finished yet"}) + return + } + + c.JSON(http.StatusOK, gin.H{ + "image_groups": job.ImageGroups, + "video_groups": job.VideoGroups, + "space_savings": job.SpaceSavings, + "space_savings_formatted": shared.FormatFileSize(job.SpaceSavings), + }) +} + +func handleDupfinderCancel(c *gin.Context) { + id := c.Param("id") + job, ok := dupfinder.GetJob(id) + if !ok { + c.JSON(http.StatusNotFound, gin.H{"error": "Job not found"}) + return + } + job.Cancel() + c.JSON(http.StatusOK, gin.H{"status": "cancelling"}) +} + +func handleDupfinderDelete(c *gin.Context) { + var req struct { + Files []string `json:"files"` + } + if err := c.ShouldBindJSON(&req); err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) + return + } + if len(req.Files) == 0 { + c.JSON(http.StatusBadRequest, gin.H{"error": "No files specified"}) + return + } + + var results []map[string]interface{} + totalFreed := int64(0) + + for _, fp := range req.Files { + info, err := os.Stat(fp) + if err != nil { + results = append(results, map[string]interface{}{"path": fp, "status": "not_found"}) + continue + } + if info.IsDir() { + results = append(results, map[string]interface{}{"path": fp, "status": "not_found"}) + continue + } + size := info.Size() + if err := os.Remove(fp); err != nil { + if os.IsPermission(err) { + results = append(results, map[string]interface{}{"path": fp, "status": "permission_denied"}) + } else { + results = append(results, map[string]interface{}{"path": fp, "status": "error", "error": err.Error()}) + } + } else { + totalFreed += size + results = append(results, map[string]interface{}{"path": fp, "status": "deleted", "size_freed": size}) + } + } + + c.JSON(http.StatusOK, gin.H{ + "results": results, + "total_freed": totalFreed, + "total_freed_formatted": shared.FormatFileSize(totalFreed), + }) +} diff --git a/web/routes_organizer.go b/web/routes_organizer.go new file mode 100644 index 0000000..1b1f29f --- /dev/null +++ b/web/routes_organizer.go @@ -0,0 +1,121 @@ +package web + +import ( + "net/http" + + "github.com/exterex/morphic/internal/organizer" + "github.com/gin-gonic/gin" +) + +func registerOrganizerRoutes(r *gin.Engine) { + g := r.Group("/api/organizer") + { + g.POST("/plan", handleOrganizerPlan) + g.POST("/execute", handleOrganizerExecute) + g.GET("/status/:id", handleOrganizerStatus) + g.POST("/cancel/:id", handleOrganizerCancel) + } +} + +func handleOrganizerPlan(c *gin.Context) { + var req struct { + Folder string `json:"folder"` + Mode string `json:"mode"` + Template string `json:"template"` + Destination string `json:"destination"` + Operation string `json:"operation"` + StartSeq int `json:"start_seq"` + } + + if err := c.ShouldBindJSON(&req); err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) + return + } + + if req.Folder == "" { + c.JSON(http.StatusBadRequest, gin.H{"error": "folder is required"}) + return + } + if req.StartSeq <= 0 { + req.StartSeq = 1 + } + + jobID := organizer.StartPlanJob( + req.Folder, req.Mode, req.Template, + req.Destination, req.Operation, req.StartSeq, + ) + + c.JSON(http.StatusAccepted, gin.H{"job_id": jobID}) +} + +func handleOrganizerExecute(c *gin.Context) { + var req struct { + JobID string `json:"job_id"` + } + if err := c.ShouldBindJSON(&req); err != nil || req.JobID == "" { + c.JSON(http.StatusBadRequest, gin.H{"error": "job_id required"}) + return + } + + if !organizer.ExecuteJob(req.JobID) { + c.JSON(http.StatusNotFound, gin.H{"error": "job not found or not in planned state"}) + return + } + + c.JSON(http.StatusAccepted, gin.H{"status": "executing", "job_id": req.JobID}) +} + +func handleOrganizerStatus(c *gin.Context) { + id := c.Param("id") + job, ok := organizer.GetJob(id) + if !ok { + c.JSON(http.StatusNotFound, gin.H{"error": "job not found"}) + return + } + + resp := gin.H{ + "id": job.ID, + "status": job.Status, + "phase": job.Phase, + "mode": job.Mode, + "operation": job.Operation, + "progress": job.Progress, + "message": job.Message, + "error": job.Error, + } + + // Include plan when planning is done (matches Python's response) + if job.Phase == "planned" || job.Phase == "executing" || job.Phase == "done" { + plan := organizer.GetUnifiedPlan(job) + resp["plan"] = plan + resp["plan_count"] = len(plan) + + conflicts := 0 + for _, entry := range plan { + if _, ok := entry["conflict"]; ok { + if entry["conflict"] == true { + conflicts++ + } + } + } + resp["conflicts"] = conflicts + } + + // Include execution results when done + if job.Phase == "done" { + resp["execution"] = organizer.GetExecutionResult(job) + } + + c.JSON(http.StatusOK, resp) +} + +func handleOrganizerCancel(c *gin.Context) { + id := c.Param("id") + job, ok := organizer.GetJob(id) + if !ok { + c.JSON(http.StatusNotFound, gin.H{"error": "job not found"}) + return + } + job.Cancel() + c.JSON(http.StatusOK, gin.H{"status": "cancelling"}) +} diff --git a/web/routes_shared.go b/web/routes_shared.go new file mode 100644 index 0000000..845934b --- /dev/null +++ b/web/routes_shared.go @@ -0,0 +1,205 @@ +package web + +import ( + "math" + "mime" + "net/http" + "os" + "os/exec" + "path/filepath" + "runtime" + "sort" + "strings" + + "github.com/exterex/morphic/internal/shared" + "github.com/gin-gonic/gin" +) + +func registerSharedRoutes(r *gin.Engine) { + r.GET("/api/browse", handleBrowseDirectory) + r.POST("/api/browse/native", handleBrowseNative) + r.GET("/api/thumbnail", handleThumbnail) + r.GET("/api/system_info", handleSystemInfo) + r.GET("/api/media", handleMedia) +} + +// handleBrowseDirectory lists directories for the in-page folder browser. +func handleBrowseDirectory(c *gin.Context) { + path := c.Query("path") + if path == "" { + home, _ := os.UserHomeDir() + path = home + } + + path = filepath.Clean(path) + info, err := os.Stat(path) + if err != nil || !info.IsDir() { + c.JSON(http.StatusBadRequest, gin.H{"error": "Not a directory"}) + return + } + + entries, _ := os.ReadDir(path) + type dirEntry struct { + Name string `json:"name"` + Path string `json:"path"` + Type string `json:"type"` + } + var dirs []dirEntry + for _, e := range entries { + if strings.HasPrefix(e.Name(), ".") { + continue + } + if e.IsDir() { + dirs = append(dirs, dirEntry{ + Name: e.Name(), + Path: filepath.Join(path, e.Name()), + Type: "directory", + }) + } + } + sort.Slice(dirs, func(i, j int) bool { + return strings.ToLower(dirs[i].Name) < strings.ToLower(dirs[j].Name) + }) + + parent := filepath.Dir(path) + var parentPtr interface{} = parent + if parent == path { + parentPtr = nil + } + + c.JSON(http.StatusOK, gin.H{ + "current": path, + "parent": parentPtr, + "entries": dirs, + }) +} + +// handleBrowseNative opens the OS-native folder picker dialog. +func handleBrowseNative(c *gin.Context) { + folder, available, err := shared.OpenNativeFolderDialog() + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + return + } + if !available { + c.JSON(http.StatusOK, gin.H{ + "folder": nil, + "available": false, + }) + return + } + if folder == "" { + c.JSON(http.StatusOK, gin.H{ + "folder": nil, + "available": true, + "cancelled": true, + }) + return + } + c.JSON(http.StatusOK, gin.H{"folder": folder, "available": true}) +} + +func handleThumbnail(c *gin.Context) { + path := c.Query("path") + if path == "" { + c.Status(http.StatusBadRequest) + return + } + + var data []byte + var err error + + if shared.IsVideoFile(path) { + data, err = shared.GenerateVideoThumbnail(path, shared.DefaultThumbnailSize) + } else { + data, err = shared.GenerateImageThumbnail(path, shared.DefaultThumbnailSize) + } + + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": "thumbnail generation failed", "detail": err.Error()}) + return + } + + c.Data(http.StatusOK, "image/jpeg", data) +} + +func handleSystemInfo(c *gin.Context) { + ffmpegInfo := gin.H{ + "installed": false, + "hwaccels": []string{}, + "encoders": []string{}, + "nvenc_available": false, + } + + if _, err := exec.LookPath("ffmpeg"); err == nil { + ffmpegInfo["installed"] = true + + if out, err := exec.Command("ffmpeg", "-hide_banner", "-encoders"). + CombinedOutput(); err == nil { + var encoders []string + for _, line := range strings.Split(string(out), "\n") { + line = strings.TrimSpace(line) + if len(line) > 0 && (line[0] == 'V' || line[0] == 'A') { + encoders = append(encoders, line) + } + } + ffmpegInfo["encoders"] = encoders + for _, e := range encoders { + if strings.Contains(e, "nvenc") { + ffmpegInfo["nvenc_available"] = true + break + } + } + } + } + + c.JSON(http.StatusOK, gin.H{ + "version": shared.Version, + "platform": runtime.GOOS, + "arch": runtime.GOARCH, + "go": runtime.Version(), + "cpus": runtime.NumCPU(), + "ffmpeg": ffmpegInfo, + }) +} + +// handleMedia serves a media file for full-size preview. +func handleMedia(c *gin.Context) { + filePath := c.Query("path") + if filePath == "" { + c.Status(http.StatusBadRequest) + return + } + + filePath = filepath.Clean(filePath) + info, err := os.Stat(filePath) + if err != nil || info.IsDir() { + c.Status(http.StatusNotFound) + return + } + + ext := shared.NormaliseExt(filepath.Ext(filePath)) + _, isImg := shared.ImageExtensions[ext] + _, isVid := shared.VideoExtensions[ext] + if !isImg && !isVid { + c.Status(http.StatusForbidden) + return + } + + contentType := mime.TypeByExtension(filepath.Ext(filePath)) + if contentType == "" { + contentType = "application/octet-stream" + } + c.File(filePath) +} + +// isDir returns true when path exists and is a directory. +func isDir(path string) bool { + info, err := os.Stat(path) + return err == nil && info.IsDir() +} + +// round1 rounds f to one decimal place. +func round1(f float64) float64 { + return math.Round(f*10) / 10 +} diff --git a/web/server.go b/web/server.go new file mode 100644 index 0000000..1645bca --- /dev/null +++ b/web/server.go @@ -0,0 +1,48 @@ +package web + +import ( + "embed" + "html/template" + "io/fs" + "net/http" + + "github.com/gin-gonic/gin" +) + +//go:embed templates static +var webFS embed.FS + +// SetupRouter creates and configures the gin router with all routes. +func SetupRouter() *gin.Engine { + r := gin.Default() + + // Parse templates from embedded FS + tmpl := template.Must(template.ParseFS(webFS, "templates/*.html")) + r.SetHTMLTemplate(tmpl) + + // Serve static files from embedded FS + staticFS, _ := fs.Sub(webFS, "static") + r.StaticFS("/static", http.FS(staticFS)) + + // No-cache middleware (mirrors Python's @app.after_request) + r.Use(func(c *gin.Context) { + c.Header("Cache-Control", "no-cache, no-store, must-revalidate") + c.Header("Pragma", "no-cache") + c.Header("Expires", "0") + c.Next() + }) + + // Index route + r.GET("/", func(c *gin.Context) { + initialFolder := "" + c.HTML(http.StatusOK, "index.html", gin.H{"initial_folder": initialFolder}) + }) + + // Register API route groups + registerSharedRoutes(r) + registerOrganizerRoutes(r) + registerConverterRoutes(r) + registerDupfinderRoutes(r) + + return r +} diff --git a/src/morphic/frontend/static/app.js b/web/static/app.js similarity index 66% rename from src/morphic/frontend/static/app.js rename to web/static/app.js index 5261a06..6cedd60 100644 --- a/src/morphic/frontend/static/app.js +++ b/web/static/app.js @@ -12,13 +12,21 @@ let convScanData = null; // last scan result let convFilterType = 'both'; // images|videos|both let convFilterExt = null; // filter by specific extension let convSelectedFiles = []; // files selected for conversion -let convAvailableTargets = []; // all targets currently available for batch conversion +let convAvailableTargets = []; // image format targets for the image dropdown let convAv1Available = false; // AV1 support from ffmpeg -let convLastFailedFiles = new Set(); // set of last conversion failures +let convVideoFormats = []; // VideoContainerConfig[] from backend +let convCodecLabels = { // codec ID → display label + h264: 'H.264 (AVC)', h265: 'H.265 (HEVC)', + av1: 'AV1', vp8: 'VP8', vp9: 'VP9', +}; +let convFileResults = new Map(); // path → {status:'ok'|'error'|'converting', ...} let convBatchMode = 'intersection'; // union|intersection -let convAv1Crf = 32; +const convAv1Crf = 35; let convJobId = null; let convPollTimer = null; +let convScanning = false; // true while folder scan is in flight +let convScanController = null; // AbortController for active scan request +let convScanElapsedTimer = null; // interval for scan elapsed counter let showFullPaths = false; async function convLoadFormats() { @@ -28,22 +36,11 @@ async function convLoadFormats() { convBatchMode = savedMode; } - const savedAv1Crf = Number(localStorage.getItem('convAv1Crf')); - if (!Number.isNaN(savedAv1Crf) && savedAv1Crf >= 10 && savedAv1Crf <= 63) { - convAv1Crf = savedAv1Crf; - } - const modeSelect = document.getElementById('convBatchMode'); if (modeSelect) { modeSelect.value = convBatchMode; } - const av1CrfInput = document.getElementById('convAv1Crf'); - if (av1CrfInput) { - av1CrfInput.value = convAv1Crf; - document.getElementById('convAv1CrfValue').textContent = convAv1Crf; - } - const resp = await fetch('/api/converter/formats'); const data = await resp.json(); @@ -57,44 +54,182 @@ async function convLoadFormats() { convAv1Available = false; } - const targets = new Set(); - if (data.image) { - Object.values(data.image).flat().forEach(t => targets.add(t)); - } - if (data.video) { - Object.values(data.video).flat().forEach(t => targets.add(t)); - } + // Parse structured video container config + convVideoFormats = (data.video && data.video.containers) || []; - if (convAv1Available) { - targets.add('.mp4-av1'); - targets.add('.mkv-av1'); - targets.add('.webm-av1'); + // Collect image format targets for the image dropdown + const imagetargets = new Set(); + if (data.image) { + Object.values(data.image).flat().forEach(t => imagetargets.add(t)); } + convAvailableTargets = [...imagetargets].sort(); - convAvailableTargets = [...targets].sort(); + convInitBatchVideoDropdowns(); convSetBatchTargets(convAvailableTargets); } catch (e) { console.error('Failed to load converter formats:', e); } } +function convInitBatchVideoDropdowns() { + const containerSel = document.getElementById('convBatchContainer'); + if (!containerSel || convVideoFormats.length === 0) return; + containerSel.innerHTML = convVideoFormats.map(c => + `` + ).join(''); + convOnBatchContainerChange(); +} + +function convOnBatchContainerChange() { + const containerSel = document.getElementById('convBatchContainer'); + const codecSel = document.getElementById('convBatchCodec'); + const extSel = document.getElementById('convBatchExt'); + if (containerSel && codecSel && extSel) { + convFilterCodecExt(containerSel.value, codecSel, extSel); + } +} + +function convFilterCodecExt(containerName, codecSel, extSel) { + const container = convVideoFormats.find(c => c.name === containerName); + if (!container) return; + + const prevCodec = codecSel.value; + const prevExt = extSel.value; + + codecSel.innerHTML = container.codecs.map(codec => { + const label = convCodecLabels[codec] || codec.toUpperCase(); + const disabled = codec === 'av1' && !convAv1Available; + return ``; + }).join(''); + + if (container.codecs.includes(prevCodec) && !(prevCodec === 'av1' && !convAv1Available)) { + codecSel.value = prevCodec; + } + + extSel.innerHTML = container.extensions.map(ext => + `` + ).join(''); + + if (container.extensions.includes(prevExt)) { + extSel.value = prevExt; + } +} + +function convGetSelectedByType() { + if (!convScanData || !convScanData.files) return { videos: [], images: [] }; + const selectedSet = new Set(convSelectedFiles); + const videos = [], images = []; + for (const f of convScanData.files) { + if (!selectedSet.has(f.path)) continue; + if (f.type === 'video') videos.push(f.path); + else images.push(f.path); + } + return { videos, images }; +} + +function convUpdateBatchDropdowns() { + const { videos, images } = convGetSelectedByType(); + const videoDiv = document.getElementById('convVideoDropdowns'); + const imageDiv = document.getElementById('convImageDropdown'); + + if (videoDiv) videoDiv.style.display = videos.length > 0 ? 'flex' : 'none'; + if (imageDiv) imageDiv.style.display = images.length > 0 ? 'flex' : 'none'; + + if (images.length > 0) { + const targets = convGetBatchTargets(); + convSetBatchTargets(targets); + } + + const batchBtn = document.getElementById('convBatchBtn'); + if (batchBtn) { + batchBtn.disabled = videos.length === 0 && images.length === 0; + } +} + +function convOnRowContainerChange(containerSel) { + const group = containerSel.closest('.conv-video-format-group'); + const codecSel = group.querySelector('.conv-vid-codec'); + const extSel = group.querySelector('.conv-vid-ext'); + convFilterCodecExt(containerSel.value, codecSel, extSel); +} + +function convBuildVideoSelectsHtml() { + if (convVideoFormats.length === 0) { + return ``; + } + const first = convVideoFormats[0]; + const selStyle = 'font-size:12px;padding:3px 6px;background:var(--surface2);color:var(--text);border:1px solid var(--border);border-radius:4px;'; + const containerOpts = convVideoFormats.map(c => + `` + ).join(''); + const codecOpts = first.codecs.map(codec => { + const label = convCodecLabels[codec] || codec.toUpperCase(); + const disabled = codec === 'av1' && !convAv1Available; + return ``; + }).join(''); + const extOpts = first.extensions.map(ext => ``).join(''); + return `
+ + + +
`; +} + +// Thumbnail lazy-loading helper +let lazyThumbnailObserver = null; + +function initLazyThumbnailObserver() { + if (lazyThumbnailObserver) { + return; + } + if (!('IntersectionObserver' in window)) { + return; + } + + lazyThumbnailObserver = new IntersectionObserver((entries) => { + for (const entry of entries) { + if (!entry.isIntersecting) { + continue; + } + const img = entry.target; + const dataSrc = img.dataset.src; + if (dataSrc) { + img.src = dataSrc; + img.removeAttribute('data-src'); + } + lazyThumbnailObserver.unobserve(img); + } + }, { + rootMargin: '400px', + threshold: 0.01, + }); +} + +function observeThumbnails(container) { + if (!lazyThumbnailObserver) { + initLazyThumbnailObserver(); + } + if (!lazyThumbnailObserver) { + return; + } + const images = container.querySelectorAll('img[data-src]'); + images.forEach(img => lazyThumbnailObserver.observe(img)); +} + // Dupfinder state let dupJobId = null; let dupPollTimer = null; let dupAllGroups = []; let dupSelectedFiles = new Set(); - -// Inspector state -let inspJobId = null; -let inspPollTimer = null; - -// Resizer state -let resJobId = null; -let resPollTimer = null; +let dupRunning = false; // Organizer state let orgJobId = null; let orgPollTimer = null; +let orgRunning = false; + +// Converter convert state +let convConvertJobId = null; // ===================================================================== // Tabs @@ -116,7 +251,6 @@ function switchTab(tab) { const _browserPrefixes = { converter: 'conv', dupfinder: 'dup', - inspector: 'insp', resizer: 'res', organizer: 'org', }; @@ -159,7 +293,7 @@ function _loadLastFolder() { function loadFolderPreferences() { lastSelectedFolder = _loadLastFolder(); - const tabs = ['converter', 'dupfinder', 'inspector', 'resizer', 'organizer']; + const tabs = ['converter', 'dupfinder', 'organizer']; for (const tab of tabs) { const input = document.getElementById(_bp(tab) + 'Folder'); if (!input) continue; @@ -220,6 +354,11 @@ async function openNativeFolderExplorer(tab) { }); const data = await resp.json(); + if (data.available === false) { + // No native dialog tool — fall back to in-page browser + browseTo(initialDir, tab); + return; + } if (data.folder) { if (input) { input.value = data.folder; @@ -227,7 +366,7 @@ async function openNativeFolderExplorer(tab) { } showToast('Folder selected: ' + data.folder, 'success'); } else { - showToast(data.message || 'Native folder dialog was canceled', 'info'); + showToast('Native folder dialog was cancelled', 'info'); } } catch (e) { showToast('Native folder open failed: ' + e.message, 'error'); @@ -287,15 +426,22 @@ async function browseTo(path, tab) { // ===================================================================== async function convScan() { + // Toggle: if scan already running, abort it + if (convScanning) { + if (convScanController) convScanController.abort(); + return; + } + const folder = document.getElementById('convFolder').value.trim(); if (!folder) { showToast('Enter a folder path', 'error'); return; } _storeFolder('converter', folder); - convLastFailedFiles = new Set(); + convFileResults = new Map(); const includeSubfolders = document.getElementById('convSubfolders').checked; const filterType = document.getElementById('convFilterType').value; - document.getElementById('convScanBtn').disabled = true; + convScanController = new AbortController(); + convSetScanStopMode(); document.getElementById('convResults').style.display = 'none'; try { @@ -305,22 +451,67 @@ async function convScan() { body: JSON.stringify({ folder, include_subfolders: includeSubfolders, filter_type: filterType, }), + signal: convScanController.signal, }); const data = await resp.json(); - if (data.error) { showToast(data.error, 'error'); return; } + if (data.error) { showToast(data.error, 'error'); convRestoreScanBtn(); return; } convScanData = data; convFilterType = filterType; convFilterExt = null; convSelectedFiles = []; + convRestoreScanBtn(); renderConvResults(); } catch (e) { - showToast('Scan failed: ' + e.message, 'error'); - } finally { - document.getElementById('convScanBtn').disabled = false; + if (e.name === 'AbortError') { + convShowScanInterrupted(); + } else { + showToast('Scan failed: ' + e.message, 'error'); + } + convRestoreScanBtn(); } } +function convSetScanStopMode() { + convScanning = true; + const btn = document.getElementById('convScanBtn'); + btn.textContent = '⏹ Stop Scan'; + btn.classList.remove('btn-primary'); + btn.classList.add('btn-stop'); + btn.disabled = false; + + // Show scan progress panel with elapsed timer + let elapsed = 0; + document.getElementById('convScanElapsed').textContent = '0s'; + document.getElementById('convScanProgressMsg').textContent = 'Walking folder tree...'; + document.getElementById('convScanProgress').classList.add('active'); + convScanElapsedTimer = setInterval(() => { + elapsed++; + document.getElementById('convScanElapsed').textContent = formatDuration(elapsed); + }, 1000); +} + +function convRestoreScanBtn() { + convScanning = false; + convScanController = null; + clearInterval(convScanElapsedTimer); + convScanElapsedTimer = null; + document.getElementById('convScanProgress').classList.remove('active'); + const btn = document.getElementById('convScanBtn'); + btn.disabled = false; + btn.textContent = '🔍 Scan Folder'; + btn.classList.remove('btn-stop'); + btn.classList.add('btn-primary'); +} + +function convShowScanInterrupted() { + document.getElementById('convResults').style.display = 'block'; + document.getElementById('convSummary').innerHTML = + '
⏹ Scan was interrupted — no results to display.
'; + document.getElementById('convFileTable').innerHTML = ''; + document.getElementById('convBulkBar').style.display = 'none'; +} + function renderConvResults() { if (!convScanData) return; const section = document.getElementById('convResults'); @@ -363,6 +554,7 @@ function renderConvResults() { Type Size + Result Convert to `; @@ -370,35 +562,45 @@ function renderConvResults() { for (const f of files) { const thumbUrl = `/api/thumbnail?path=${encodeURIComponent(f.path)}`; const displayName = showFullPaths ? f.path : f.name; - const failed = convLastFailedFiles.has(f.path); + const result = convFileResults.get(f.path); + const hasError = result?.status === 'error'; + + let resultCell = ''; + if (result) { + if (result.status === 'converting') { + resultCell = 'Converting…'; + } else if (result.status === 'ok') { + const pct = result.original_size > 0 + ? Math.round((1 - result.new_size / result.original_size) * 100) : 0; + const sign = pct >= 0 ? '−' : '+'; + resultCell = ` ${result.original_size_fmt} → ${result.new_size_fmt} (${sign}${Math.abs(pct)}%)`; + } else if (result.status === 'error') { + const short = escapeHtml((result.error || 'unknown').slice(0, 80)); + resultCell = `✗ ${short}`; + } + } - thtml += ` - + thtml += ` + - ${failed ? 'Failed' : ''} ${f.ext} ${formatBytes(f.size)} + ${resultCell} - + ${f.type === 'video' + ? convBuildVideoSelectsHtml() + : ``} - ${failed ? `` : ''} + ${hasError ? `` : ''} `; @@ -406,11 +608,13 @@ function renderConvResults() { thtml += ''; table.innerHTML = thtml; - convUpdateBatchTargets(); + observeThumbnails(table); + convUpdateBatchDropdowns(); } function convSetBatchTargets(targets) { const select = document.getElementById('convBatchTarget'); + if (!select) return; const prevValue = select.value; const targetArray = Array.isArray(targets) ? targets.filter(Boolean) : []; @@ -421,7 +625,7 @@ function convSetBatchTargets(targets) { select.disabled = true; const opt = document.createElement('option'); opt.value = ''; - opt.textContent = 'No compatible target formats'; + opt.textContent = 'No compatible image formats'; opt.disabled = true; select.appendChild(opt); return; @@ -446,73 +650,49 @@ function convGetBatchTargets() { const selectedFilePaths = new Set(convSelectedFiles || []); const modeSelect = document.getElementById('convBatchMode'); if (modeSelect) { - convBatchMode = modeSelect.value || 'union'; + convBatchMode = modeSelect.value || 'intersection'; localStorage.setItem('convBatchMode', convBatchMode); } - const av1CrfInput = document.getElementById('convAv1Crf'); - if (av1CrfInput) { - const val = Number(av1CrfInput.value); - if (!Number.isNaN(val) && val >= 10 && val <= 63) { - convAv1Crf = val; - localStorage.setItem('convAv1Crf', convAv1Crf); - const av1CrfValue = document.getElementById('convAv1CrfValue'); - if (av1CrfValue) { - av1CrfValue.textContent = String(convAv1Crf); - } - } - } - const modeHint = document.getElementById('convBatchModeHint'); if (modeHint) { modeHint.textContent = convBatchMode === 'intersection' - ? 'Intersection = formats supported by each selected file; Union = any selected file' - : 'Union = formats supported by at least one selected file; Intersection = common to all'; + ? 'Image formats: common to all selected images' + : 'Image formats: supported by any selected image'; } - // If no scan data yet, show global format list from converter/formats. + // Only image files contribute to the image format dropdown if (!convScanData || !convScanData.files || convScanData.files.length === 0) { return [...new Set(convAvailableTargets)].sort(); } - const files = convScanData.files.filter(f => selectedFilePaths.size === 0 || selectedFilePaths.has(f.path)); - if (files.length === 0) { + const imageFiles = convScanData.files.filter(f => + f.type === 'image' && (selectedFilePaths.size === 0 || selectedFilePaths.has(f.path)) + ); + + if (imageFiles.length === 0) { return [...new Set(convAvailableTargets)].sort(); } - const hasVideo = files.some(f => f.type === 'video'); - const fileTargets = files.map(f => new Set((f.targets || []).map(t => t.toLowerCase()))); + const fileTargets = imageFiles.map(f => new Set((f.targets || []).map(t => t.toLowerCase()))); if (convBatchMode === 'intersection') { let intersection = new Set(fileTargets[0]); for (let i = 1; i < fileTargets.length; i++) { intersection = new Set([...intersection].filter(t => fileTargets[i].has(t))); } - if (hasVideo) { - ['.mp4-av1', '.mkv-av1', '.webm-av1'].forEach(v => intersection.add(v)); - } return [...intersection].sort(); } - // union const union = new Set(); for (const targetSet of fileTargets) { for (const t of targetSet) union.add(t); } - if (hasVideo) { - ['.mp4-av1', '.mkv-av1', '.webm-av1'].forEach(v => union.add(v)); - } return [...union].sort(); } function convUpdateBatchTargets() { - const targets = convGetBatchTargets(); - convSetBatchTargets(targets); - - const batchBtn = document.getElementById('convBatchBtn'); - if (batchBtn) { - batchBtn.disabled = !targets || targets.length === 0; - } + convUpdateBatchDropdowns(); } function convSetExtFilter(ext) { @@ -539,7 +719,7 @@ function convUpdateSelection() { bar.style.display = 'none'; } - convUpdateBatchTargets(); + convUpdateBatchDropdowns(); } function toggleFullPaths() { @@ -553,30 +733,41 @@ function toggleFullPaths() { async function convConvertSingle(filePath, btnEl) { const row = btnEl.closest('tr'); - const targetSelect = row.querySelector('.conv-target'); - const targetExt = targetSelect.value; const deleteOrig = document.getElementById('convDeleteOrig').checked; + let targetExt, codec; + const videoContainer = row.querySelector('.conv-vid-container'); + if (videoContainer) { + targetExt = row.querySelector('.conv-vid-ext').value; + codec = row.querySelector('.conv-vid-codec').value; + } else { + targetExt = row.querySelector('.conv-target').value; + } + btnEl.disabled = true; btnEl.textContent = '...'; try { + const body = { + files: [filePath], + target_ext: targetExt, + delete_original: deleteOrig, + av1_crf: convAv1Crf, + }; + if (codec) body.codec = codec; + const resp = await fetch('/api/converter/convert', { method: 'POST', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - files: [filePath], - target_ext: targetExt, - delete_original: deleteOrig, - av1_crf: convAv1Crf, - }), + body: JSON.stringify(body), }); const data = await resp.json(); if (data.job_id) { await convWaitForJob(data.job_id); } } catch (e) { - showToast('Convert failed: ' + e.message, 'error'); + convFileResults.set(filePath, { status: 'error', error: e.message }); + renderConvResults(); } finally { btnEl.disabled = false; btnEl.textContent = 'Convert'; @@ -585,64 +776,144 @@ async function convConvertSingle(filePath, btnEl) { async function convConvertBatch() { if (convSelectedFiles.length === 0) return; - const targetExt = document.getElementById('convBatchTarget').value; - if (!targetExt) { - showToast('No valid target format available for selected files', 'error'); - return; - } + const { videos, images } = convGetSelectedByType(); const deleteOrig = document.getElementById('convDeleteOrig').checked; document.getElementById('convBatchBtn').disabled = true; try { - const resp = await fetch('/api/converter/convert', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - files: convSelectedFiles, - target_ext: targetExt, - delete_original: deleteOrig, - av1_crf: convAv1Crf, - }), - }); - const data = await resp.json(); - if (data.job_id) { - convShowProgress(); - convPollProgress(data.job_id); + if (videos.length > 0) { + const targetExt = document.getElementById('convBatchExt').value; + const codec = document.getElementById('convBatchCodec').value; + if (!targetExt || !codec) { + showToast('Please select a video format', 'error'); + return; + } + const resp = await fetch('/api/converter/convert', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + files: videos, + target_ext: targetExt, + codec, + delete_original: deleteOrig, + av1_crf: convAv1Crf, + }), + }); + const data = await resp.json(); + if (data.job_id) { + convConvertJobId = data.job_id; + convShowProgress(); + await convPollProgress(data.job_id); + } + } + + if (images.length > 0) { + const targetExt = document.getElementById('convBatchTarget').value; + if (!targetExt) { + showToast('No valid image target format selected', 'error'); + } else { + const resp = await fetch('/api/converter/convert', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + files: images, + target_ext: targetExt, + delete_original: deleteOrig, + }), + }); + const data = await resp.json(); + if (data.job_id) { + convConvertJobId = data.job_id; + convShowProgress(); + await convPollProgress(data.job_id); + } + } } } catch (e) { showToast('Batch convert failed: ' + e.message, 'error'); + } finally { document.getElementById('convBatchBtn').disabled = false; } } function convShowProgress() { + const stopBtn = document.getElementById('convStopBtn'); + if (stopBtn) { stopBtn.disabled = false; stopBtn.textContent = '\u23f9 Stop'; } document.getElementById('convProgress').classList.add('active'); } function convPollProgress(jobId) { - let lastCompleted = -1; - convPollTimer = setInterval(async () => { - try { - const resp = await fetch(`/api/converter/progress/${jobId}/poll?last=${lastCompleted}`); - const data = await resp.json(); - if (data.error) return; + return new Promise(resolve => { + let lastCompleted = -1; + convPollTimer = setInterval(async () => { + try { + const resp = await fetch(`/api/converter/progress/${jobId}/poll?last=${lastCompleted}`); + const data = await resp.json(); + if (data.error) return; + + lastCompleted = data.completed; + const pct = data.total > 0 ? Math.round((data.completed / data.total) * 100) : 0; + document.getElementById('convProgressBar').style.width = pct + '%'; + document.getElementById('convProgressPct').textContent = pct + '%'; + document.getElementById('convProgressMsg').textContent = + data.current_file ? `Converting: ${data.current_file}` : 'Processing...'; + + _convSyncResults(data); + + if (data.status === 'done') { + clearInterval(convPollTimer); + document.getElementById('convProgress').classList.remove('active'); + convConvertJobId = null; + resolve('done'); + } else if (data.status === 'cancelled') { + clearInterval(convPollTimer); + document.getElementById('convProgress').classList.remove('active'); + convConvertJobId = null; + showToast('Conversion was stopped', 'warning'); + resolve('cancelled'); + } + } catch (e) { /* retry */ } + }, 500); + }); +} - lastCompleted = data.completed; - const pct = data.total > 0 ? Math.round((data.completed / data.total) * 100) : 0; - document.getElementById('convProgressBar').style.width = pct + '%'; - document.getElementById('convProgressPct').textContent = pct + '%'; - document.getElementById('convProgressMsg').textContent = - data.current_file ? `Converting: ${data.current_file}` : 'Processing...'; +// Sync convFileResults from a job poll/progress response and re-render the table. +function _convSyncResults(data) { + for (const r of data.results || []) { + const existing = convFileResults.get(r.source); + if (existing && existing.status !== 'converting') continue; // already finalised + if (r.status === 'ok') { + convFileResults.set(r.source, { + status: 'ok', + original_size_fmt: r.original_size_fmt, + new_size_fmt: r.new_size_fmt, + original_size: r.original_size, + new_size: r.new_size, + }); + } else { + convFileResults.set(r.source, { status: 'error', error: r.error || 'unknown' }); + } + } + // Keep at most one 'converting' marker (the current file) + for (const [path, res] of convFileResults) { + if (res.status === 'converting') convFileResults.delete(path); + } + if (data.current_file && !convFileResults.has(data.current_file)) { + convFileResults.set(data.current_file, { status: 'converting' }); + } + renderConvResults(); +} - if (data.status === 'done') { - clearInterval(convPollTimer); - convShowConversionResults(data); - document.getElementById('convProgress').classList.remove('active'); - document.getElementById('convBatchBtn').disabled = false; - } - } catch (e) { /* retry */ } - }, 500); +async function convStopConvert() { + if (!convConvertJobId) return; + const btn = document.getElementById('convStopBtn'); + btn.disabled = true; + btn.textContent = 'Stopping…'; + try { + await fetch(`/api/converter/progress/${convConvertJobId}/cancel`, { method: 'POST' }); + } catch (e) { /* ignore */ } + // Poll loop will detect 'cancelled' } async function convWaitForJob(jobId) { @@ -653,12 +924,15 @@ async function convWaitForJob(jobId) { if (data.status === 'done') { const r = data.results[0]; if (r.status === 'ok') { - const sizeInfo = r.original_size_fmt + ' → ' + r.new_size_fmt; - showToast(`Converted! (${sizeInfo})`, 'success'); - convLastFailedFiles.delete(r.source); + convFileResults.set(r.source, { + status: 'ok', + original_size_fmt: r.original_size_fmt, + new_size_fmt: r.new_size_fmt, + original_size: r.original_size, + new_size: r.new_size, + }); } else { - showToast('Error: ' + (r.error || 'unknown'), 'error'); - convLastFailedFiles.add(r.source); + convFileResults.set(r.source, { status: 'error', error: r.error || 'unknown' }); } renderConvResults(); return; @@ -669,10 +943,17 @@ async function convWaitForJob(jobId) { async function convRetrySingle(filePath, btnEl) { const row = btnEl.closest('tr'); - const targetSelect = row.querySelector('.conv-target'); - const targetExt = targetSelect?.value; const deleteOrig = document.getElementById('convDeleteOrig').checked; + let targetExt, codec; + const videoContainer = row.querySelector('.conv-vid-container'); + if (videoContainer) { + targetExt = row.querySelector('.conv-vid-ext').value; + codec = row.querySelector('.conv-vid-codec').value; + } else { + targetExt = row.querySelector('.conv-target')?.value; + } + if (!targetExt) { showToast('No target selected for retry', 'error'); return; @@ -682,17 +963,21 @@ async function convRetrySingle(filePath, btnEl) { btnEl.textContent = 'Retrying...'; try { + const body = { files: [filePath], target_ext: targetExt, delete_original: deleteOrig, av1_crf: convAv1Crf }; + if (codec) body.codec = codec; + const resp = await fetch('/api/converter/convert', { method: 'POST', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ files: [filePath], target_ext: targetExt, delete_original: deleteOrig, av1_crf: convAv1Crf }), + body: JSON.stringify(body), }); const data = await resp.json(); if (data.job_id) { await convWaitForJob(data.job_id); } } catch (e) { - showToast('Retry failed: ' + e.message, 'error'); + convFileResults.set(filePath, { status: 'error', error: e.message }); + renderConvResults(); } finally { btnEl.disabled = false; btnEl.textContent = 'Retry'; @@ -700,29 +985,8 @@ async function convRetrySingle(filePath, btnEl) { } function convShowConversionResults(data) { - let ok = 0, fail = 0; - const failedFiles = []; - convLastFailedFiles = new Set(); - - for (const r of data.results) { - if (r.status === 'ok') { - ok++; - } else { - fail++; - failedFiles.push(`${r.source} (${r.error || 'unknown error'})`); - } - } - - if (fail > 0) { - const feed = failedFiles.slice(0, 5).join(', '); - const rest = failedFiles.length > 5 ? `, +${failedFiles.length - 5} more` : ''; - showToast(`Done! ${ok} converted, ${fail} failed: ${feed}${rest}`, 'error'); - } else { - showToast(`Done! ${ok} converted`, 'success'); - } - - // Refresh list with failure markers - renderConvResults(); + // results are already synced live via _convSyncResults during polling; + // this is kept as a no-op hook for any future post-completion logic. } async function convDeleteSingle(filePath) { @@ -800,6 +1064,12 @@ function copyPath(path) { // ===================================================================== async function dupStartScan() { + // If a scan is already running, act as stop + if (dupRunning) { + await dupStopScan(); + return; + } + const folder = document.getElementById('dupFolder').value.trim(); if (!folder) { showToast('Enter a folder path', 'error'); return; } _storeFolder('dupfinder', folder); @@ -808,7 +1078,7 @@ async function dupStartScan() { const imgThreshold = parseInt(document.getElementById('dupImgThreshold').value) / 100; const vidThreshold = parseInt(document.getElementById('dupVidThreshold').value) / 100; - document.getElementById('dupScanBtn').disabled = true; + dupSetStopMode(); document.getElementById('dupResults').classList.remove('active'); document.getElementById('dupProgress').classList.add('active'); document.getElementById('dupProgressBar').style.width = '0%'; @@ -828,17 +1098,56 @@ async function dupStartScan() { const data = await resp.json(); if (data.error) { showToast(data.error, 'error'); - dupResetUI(); + dupRestoreBtn(); return; } dupJobId = data.job_id; dupPollProgress(); } catch (e) { showToast('Scan failed: ' + e.message, 'error'); - dupResetUI(); + dupRestoreBtn(); } } +async function dupStopScan() { + if (!dupJobId) { dupRestoreBtn(); return; } + const btn = document.getElementById('dupScanBtn'); + btn.disabled = true; + btn.textContent = 'Stopping…'; + try { + await fetch(`/api/dupfinder/scan/${dupJobId}/cancel`, { method: 'POST' }); + } catch (e) { /* ignore */ } + // Poll loop will detect 'cancelled' and call dupRestoreBtn +} + +function dupSetStopMode() { + dupRunning = true; + const btn = document.getElementById('dupScanBtn'); + btn.textContent = '⏹ Stop Scan'; + btn.classList.remove('btn-primary'); + btn.classList.add('btn-stop'); + btn.disabled = false; +} + +function dupRestoreBtn() { + dupRunning = false; + dupJobId = null; + const btn = document.getElementById('dupScanBtn'); + btn.disabled = false; + btn.textContent = '🔍 Start Scan'; + btn.classList.remove('btn-stop'); + btn.classList.add('btn-primary'); +} + +function dupShowInterrupted() { + document.getElementById('dupResults').classList.add('active'); + document.getElementById('dupGroups').innerHTML = ''; + document.getElementById('dupNoResults').style.display = 'none'; + document.getElementById('dupTitle').textContent = ''; + document.getElementById('dupSummary').innerHTML = + '
⏹ Scan was interrupted — no results to display.
'; +} + function dupPollProgress() { if (!dupJobId) return; dupPollTimer = setInterval(async () => { @@ -855,10 +1164,16 @@ function dupPollProgress() { if (data.status === 'done') { clearInterval(dupPollTimer); await dupLoadResults(); - } else if (data.status === 'error') { + } else if (data.status === 'cancelled') { + clearInterval(dupPollTimer); + document.getElementById('dupProgress').classList.remove('active'); + dupShowInterrupted(); + dupRestoreBtn(); + } else if (data.status === 'failed') { clearInterval(dupPollTimer); showToast('Scan failed: ' + (data.error || 'Unknown'), 'error'); - dupResetUI(); + document.getElementById('dupProgress').classList.remove('active'); + dupRestoreBtn(); } } catch (e) { /* retry */ } }, 500); @@ -877,18 +1192,18 @@ async function dupLoadResults() { dupRenderResults(data); document.getElementById('dupProgress').classList.remove('active'); document.getElementById('dupResults').classList.add('active'); - dupResetUI(); + dupRestoreBtn(); document.getElementById('dupNoResults').style.display = dupAllGroups.length === 0 ? 'block' : 'none'; } catch (e) { showToast('Failed to load results', 'error'); - dupResetUI(); + dupRestoreBtn(); } } function dupResetUI() { - document.getElementById('dupScanBtn').disabled = false; + dupRestoreBtn(); } // ===================================================================== @@ -913,6 +1228,7 @@ function dupRenderResults(data) { for (const group of dupAllGroups) { container.appendChild(dupCreateGroupCard(group)); } + observeThumbnails(container); } function dupCreateGroupCard(group) { @@ -948,7 +1264,7 @@ function dupCreateFileCard(item, isBest) { if (item.type === 'video') { thumb = ``; } else { - thumb = `${escapeAttr(item.filename)}`; + thumb = `${escapeAttr(item.filename)}`; } const badges = []; @@ -1145,255 +1461,6 @@ async function dupExecuteDelete() { } } -// ===================================================================== -// Inspector Tab -// ===================================================================== - -async function inspStartScan() { - const folder = document.getElementById('inspFolder').value.trim(); - if (!folder) { showToast('Enter a folder path', 'error'); return; } - _storeFolder('inspector', folder); - const mode = document.getElementById('inspMode').value; - - document.getElementById('inspScanBtn').disabled = true; - document.getElementById('inspResults').style.display = 'none'; - document.getElementById('inspProgress').style.display = 'block'; - document.getElementById('inspProgressBar').style.width = '0%'; - document.getElementById('inspProgressPct').textContent = '0%'; - document.getElementById('inspProgressMsg').textContent = 'Starting...'; - - try { - const resp = await fetch('/api/inspector/scan', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ folder, mode }), - }); - const data = await resp.json(); - if (data.error) { showToast(data.error, 'error'); return; } - inspJobId = data.job_id; - inspPollTimer = setInterval(inspPollProgress, 800); - } catch (e) { - showToast('Scan failed: ' + e.message, 'error'); - document.getElementById('inspProgress').style.display = 'none'; - } finally { - document.getElementById('inspScanBtn').disabled = false; - } -} - -async function inspPollProgress() { - try { - const resp = await fetch(`/api/inspector/scan/${inspJobId}/status`); - const data = await resp.json(); - const total = data.total_files || 0; - const processed = data.processed_files || 0; - const pct = total > 0 ? Math.round((processed / total) * 100) : 0; - document.getElementById('inspProgressBar').style.width = pct + '%'; - document.getElementById('inspProgressPct').textContent = pct + '%'; - document.getElementById('inspProgressMsg').textContent = `${processed} / ${total} files`; - - if (data.status === 'done' || data.status === 'error') { - clearInterval(inspPollTimer); - document.getElementById('inspProgress').style.display = 'none'; - if (data.status === 'error') { - showToast('Scan error: ' + (data.error || 'Unknown'), 'error'); - } else { - inspLoadResults(); - } - } - } catch (e) { - clearInterval(inspPollTimer); - showToast('Poll error: ' + e.message, 'error'); - } -} - -async function inspLoadResults() { - try { - const resp = await fetch(`/api/inspector/scan/${inspJobId}/results`); - const data = await resp.json(); - const container = document.getElementById('inspResultsContent'); - const section = document.getElementById('inspResults'); - section.style.display = 'block'; - - if (!data.results || data.results.length === 0) { - container.innerHTML = '
📂

No files found

'; - return; - } - - const mode = document.getElementById('inspMode').value; - if (mode === 'exif') { - inspRenderExif(data.results, container); - } else { - inspRenderIntegrity(data.results, container); - } - } catch (e) { - showToast('Failed to load results: ' + e.message, 'error'); - } -} - -function inspRenderExif(results, container) { - let html = `
📊 EXIF Results — ${results.length} file(s)
`; - for (const item of results) { - const fname = item.file.split('/').pop(); - const exif = item.exif || {}; - const keys = Object.keys(exif).filter(k => !k.startsWith('_')); - html += `
-
- 📄 ${escapeHtml(fname)} ${keys.length} tags -
-
-
- -
- `; - for (const k of keys) { - const val = typeof exif[k] === 'object' ? JSON.stringify(exif[k]) : String(exif[k]); - html += ``; - } - if (exif._gps_lat && exif._gps_lng) { - html += ``; - } - html += `
${escapeHtml(k)}${escapeHtml(val.substring(0, 200))}
📍 GPS${exif._gps_lat.toFixed(6)}, ${exif._gps_lng.toFixed(6)}
`; - } - container.innerHTML = html; -} - -function inspRenderIntegrity(results, container) { - let ok = 0, bad = 0; - for (const r of results) { if (r.ok) ok++; else bad++; } - - let html = `
-
📊 Integrity Results
-
-
${ok}
OK
-
${bad}
Errors
-
${results.length}
Total
-
`; - - if (bad > 0) { - html += '
⚠️ Failed Files
'; - for (const r of results.filter(x => !x.ok)) { - html += `
${escapeHtml(r.file.split('/').pop())}${escapeHtml(r.error || 'Unknown error')}
`; - } - html += '
'; - } - container.innerHTML = html; -} - -async function inspStripOne(filePath) { - try { - const resp = await fetch('/api/inspector/exif/strip', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ files: [filePath] }), - }); - const data = await resp.json(); - if (data.error) { showToast(data.error, 'error'); return; } - showToast('EXIF stripped from 1 file', 'success'); - } catch (e) { - showToast('Strip failed: ' + e.message, 'error'); - } -} - -// ===================================================================== -// Resizer Tab -// ===================================================================== - -async function resStartResize() { - const folder = document.getElementById('resFolder').value.trim(); - if (!folder) { showToast('Enter a folder path', 'error'); return; } - _storeFolder('resizer', folder); - - const width = parseInt(document.getElementById('resWidth').value) || 1920; - const height = parseInt(document.getElementById('resHeight').value) || 1080; - const mode = document.getElementById('resMode').value; - const quality = parseInt(document.getElementById('resQuality').value) || 90; - const bgColor = document.getElementById('resBgColor').value; - const outputFolder = document.getElementById('resOutput').value.trim() || null; - - document.getElementById('resScanBtn').disabled = true; - document.getElementById('resResults').style.display = 'none'; - document.getElementById('resProgress').style.display = 'block'; - document.getElementById('resProgressBar').style.width = '0%'; - document.getElementById('resProgressPct').textContent = '0%'; - document.getElementById('resProgressMsg').textContent = 'Starting...'; - - try { - const resp = await fetch('/api/resizer/scan', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ folder, width, height, mode, quality, bg_color: bgColor, output_folder: outputFolder }), - }); - const data = await resp.json(); - if (data.error) { showToast(data.error, 'error'); return; } - resJobId = data.job_id; - resPollTimer = setInterval(resPollProgress, 800); - } catch (e) { - showToast('Resize failed: ' + e.message, 'error'); - document.getElementById('resProgress').style.display = 'none'; - } finally { - document.getElementById('resScanBtn').disabled = false; - } -} - -async function resPollProgress() { - try { - const resp = await fetch(`/api/resizer/scan/${resJobId}/status`); - const data = await resp.json(); - const total = data.total_files || 0; - const processed = data.processed_files || 0; - const pct = total > 0 ? Math.round((processed / total) * 100) : 0; - document.getElementById('resProgressBar').style.width = pct + '%'; - document.getElementById('resProgressPct').textContent = pct + '%'; - document.getElementById('resProgressMsg').textContent = `${processed} / ${total} images`; - - if (data.status === 'done' || data.status === 'error') { - clearInterval(resPollTimer); - document.getElementById('resProgress').style.display = 'none'; - if (data.status === 'error') { - showToast('Resize error: ' + (data.error || 'Unknown'), 'error'); - } else { - resLoadResults(); - } - } - } catch (e) { - clearInterval(resPollTimer); - showToast('Poll error: ' + e.message, 'error'); - } -} - -async function resLoadResults() { - try { - const resp = await fetch(`/api/resizer/scan/${resJobId}/results`); - const data = await resp.json(); - const container = document.getElementById('resResultsContent'); - const section = document.getElementById('resResults'); - section.style.display = 'block'; - - const results = data.results || []; - let ok = 0, errors = 0; - for (const r of results) { if (r.status === 'ok') ok++; else errors++; } - - let html = `
-
📊 Resize Results
-
-
${ok}
Resized
-
${errors}
Errors
-
${results.length}
Total
-
`; - - if (errors > 0) { - html += '
⚠️ Errors
'; - for (const r of results.filter(x => x.status !== 'ok')) { - html += `
${escapeHtml(r.input.split('/').pop())}${escapeHtml(r.error || 'Unknown')}
`; - } - html += '
'; - } - container.innerHTML = html; - } catch (e) { - showToast('Failed to load results: ' + e.message, 'error'); - } -} - // ===================================================================== // Organizer Tab // ===================================================================== @@ -1408,6 +1475,12 @@ function orgModeChanged() { } async function orgStartPlan() { + // If planning is running, act as stop + if (orgRunning) { + await orgStopScan(); + return; + } + const folder = document.getElementById('orgFolder').value.trim(); if (!folder) { showToast('Enter a folder path', 'error'); return; } _storeFolder('organizer', folder); @@ -1424,7 +1497,7 @@ async function orgStartPlan() { body.start_seq = parseInt(document.getElementById('orgStartSeq').value) || 1; } - document.getElementById('orgPlanBtn').disabled = true; + orgSetStopMode(); document.getElementById('orgExecBtn').style.display = 'none'; document.getElementById('orgPlanResults').style.display = 'none'; document.getElementById('orgExecResults').style.display = 'none'; @@ -1439,17 +1512,52 @@ async function orgStartPlan() { body: JSON.stringify(body), }); const data = await resp.json(); - if (data.error) { showToast(data.error, 'error'); return; } + if (data.error) { showToast(data.error, 'error'); orgRestoreBtn(); return; } orgJobId = data.job_id; orgPollTimer = setInterval(orgPollStatus, 600); } catch (e) { showToast('Plan failed: ' + e.message, 'error'); document.getElementById('orgProgress').style.display = 'none'; - } finally { - document.getElementById('orgPlanBtn').disabled = false; + orgRestoreBtn(); } } +async function orgStopScan() { + if (!orgJobId) { orgRestoreBtn(); return; } + const btn = document.getElementById('orgPlanBtn'); + btn.disabled = true; + btn.textContent = 'Stopping…'; + try { + await fetch(`/api/organizer/cancel/${orgJobId}`, { method: 'POST' }); + } catch (e) { /* ignore */ } + // Poll loop will detect 'cancelled' and call orgRestoreBtn +} + +function orgSetStopMode() { + orgRunning = true; + const btn = document.getElementById('orgPlanBtn'); + btn.textContent = '⏹ Stop Scan'; + btn.classList.remove('btn-primary'); + btn.classList.add('btn-stop'); + btn.disabled = false; +} + +function orgRestoreBtn() { + orgRunning = false; + const btn = document.getElementById('orgPlanBtn'); + btn.disabled = false; + btn.textContent = '📋 Preview Plan'; + btn.classList.remove('btn-stop'); + btn.classList.add('btn-primary'); +} + +function orgShowInterrupted() { + document.getElementById('orgPlanResults').style.display = 'block'; + document.getElementById('orgPlanContent').innerHTML = + '
⏹ Scan was interrupted — no results to display.
'; + document.getElementById('orgExecBtn').style.display = 'none'; +} + async function orgPollStatus() { try { const resp = await fetch(`/api/organizer/status/${orgJobId}`); @@ -1459,18 +1567,25 @@ async function orgPollStatus() { document.getElementById('orgProgressBar').style.width = pct + '%'; document.getElementById('orgProgressPct').textContent = pct + '%'; - if (data.phase === 'planned') { + if (data.status === 'cancelled') { + clearInterval(orgPollTimer); + document.getElementById('orgProgress').style.display = 'none'; + orgShowInterrupted(); + orgRestoreBtn(); + } else if (data.phase === 'planned') { clearInterval(orgPollTimer); document.getElementById('orgProgress').style.display = 'none'; + orgRestoreBtn(); orgRenderPlan(data); } else if (data.phase === 'done') { clearInterval(orgPollTimer); document.getElementById('orgProgress').style.display = 'none'; orgRenderExecResults(data); - } else if (data.phase === 'error' || data.status === 'error') { + } else if (data.phase === 'error' || data.status === 'failed') { clearInterval(orgPollTimer); document.getElementById('orgProgress').style.display = 'none'; showToast('Error: ' + (data.error || 'Unknown'), 'error'); + orgRestoreBtn(); } else { document.getElementById('orgProgressMsg').textContent = data.phase === 'executing' ? `Executing...` : `Planning...`; @@ -1478,6 +1593,7 @@ async function orgPollStatus() { } catch (e) { clearInterval(orgPollTimer); showToast('Poll error: ' + e.message, 'error'); + orgRestoreBtn(); } } diff --git a/src/morphic/frontend/static/style.css b/web/static/style.css similarity index 92% rename from src/morphic/frontend/static/style.css rename to web/static/style.css index 39ecaac..06ab1e0 100644 --- a/src/morphic/frontend/static/style.css +++ b/web/static/style.css @@ -297,6 +297,17 @@ input[type="range"] { width: 0%; } +.progress-bar-fill.indeterminate { + width: 35%; + transition: none; + animation: progress-indeterminate 1.4s cubic-bezier(0.4, 0, 0.6, 1) infinite; +} + +@keyframes progress-indeterminate { + 0% { transform: translateX(-100%); } + 100% { transform: translateX(390%); } +} + .progress-info { display: flex; justify-content: space-between; @@ -407,6 +418,17 @@ input[type="range"] { /* ── Converter Results ───────────────────────────────────────────── */ +.conv-video-format-group { + display: flex; + gap: 4px; + align-items: center; + flex-wrap: wrap; +} + +.conv-video-format-group select { + min-width: 70px; +} + .conv-result { display: flex; align-items: center; @@ -416,14 +438,27 @@ input[type="range"] { font-size: 13px; } +/* Inside table cells the list-style padding/border is not needed */ +td .conv-result { + padding: 0; + border-bottom: none; + font-size: 12px; + white-space: nowrap; +} + .conv-result .status-ok { color: var(--success); } -.conv-result .status-err { color: var(--danger); } +.conv-result .status-err { color: var(--danger); cursor: help; } .conv-result .size-change { font-size: 12px; color: var(--text-dim); } +.result-converting { + color: var(--text-dim); + font-style: italic; +} + /* ── Dupfinder Results ───────────────────────────────────────────── */ .results-section { display: none; } @@ -812,6 +847,30 @@ input[type="range"] { .stat-value { font-size: 28px; font-weight: 700; } .stat-label { font-size: 12px; color: var(--text-dim); margin-top: 4px; } +/* ── Stop Scan Button ─────────────────────────────────────────────── */ +.btn-stop { + background: var(--danger); + color: #fff; +} +.btn-stop:hover:not(:disabled) { + background: var(--danger-hover, #b91c1c); +} + +/* ── Scan Interrupted Notice ─────────────────────────────────────── */ +.scan-interrupted { + display: flex; + align-items: center; + gap: 10px; + padding: 16px 20px; + background: rgba(251, 191, 36, 0.1); + border: 1px solid rgba(251, 191, 36, 0.3); + border-radius: var(--radius); + color: var(--warning, #f59e0b); + font-size: 14px; + font-weight: 500; + margin-bottom: 16px; +} + /* ── Responsive ──────────────────────────────────────────────────── */ @media (max-width: 768px) { diff --git a/src/morphic/frontend/templates/index.html b/web/templates/index.html similarity index 68% rename from src/morphic/frontend/templates/index.html rename to web/templates/index.html index eaef53d..92685b8 100644 --- a/src/morphic/frontend/templates/index.html +++ b/web/templates/index.html @@ -4,7 +4,7 @@ Morphic - + @@ -14,8 +14,6 @@

⚡ Morphic

@@ -37,7 +35,7 @@

⚡ Morphic

- +
@@ -83,6 +81,19 @@

⚡ Morphic

+ +
+
Scanning folder...
+
+
+
+
+ + 0s +
+
Walking folder tree...
+
+
Converting...
@@ -93,6 +104,9 @@

⚡ Morphic

0%
Starting...
+
+ +
@@ -110,12 +124,22 @@

⚡ Morphic

0 file(s) selected
-
@@ -151,7 +170,7 @@

⚡ Morphic

- +
@@ -231,160 +250,6 @@

No duplicates found!

- - - -
-
- -
-
🏷️ Inspect Files
- -
-
- -
- - -
-
-
- ~ - -
- -
- -
-
-
- -
-
- - -
-
- - -
- - -
-
Scanning...
-
-
-
-
- 0% -
-
Initializing...
-
- - - - -
-
- - - - -
-
- -
-
📐 Batch Resize
- -
-
- -
- - -
-
-
- ~ - -
- -
- -
-
-
- -
-
- - -
-
- - -
-
- - -
-
- -
-
- - -
-
- - -
-
- - -
-
- - -
- - -
-
Resizing...
-
-
-
-
- 0% -
-
Starting...
-
- - - - -
-
@@ -399,7 +264,7 @@

No duplicates found!

- +
@@ -526,6 +391,6 @@

⚠️ Confirm Deletion

- +