From e7d0d9f21ea9d97223b21a5185ff868ed4111f6c Mon Sep 17 00:00:00 2001 From: Nelson Osacky Date: Thu, 19 Jun 2025 11:13:44 +0200 Subject: [PATCH] Add duplicate file analysis for Android --- src/launchpad/analyzers/android.py | 35 +++++++++++++++++--- src/launchpad/insights/insight.py | 11 +++---- src/launchpad/models/android.py | 14 ++++++-- tests/unit/test_android_analyzer.py | 51 +++++++++++++++++++++++++++++ 4 files changed, 97 insertions(+), 14 deletions(-) create mode 100644 tests/unit/test_android_analyzer.py diff --git a/src/launchpad/analyzers/android.py b/src/launchpad/analyzers/android.py index 156e9c82..9d0344e9 100644 --- a/src/launchpad/analyzers/android.py +++ b/src/launchpad/analyzers/android.py @@ -1,14 +1,19 @@ from __future__ import annotations from datetime import datetime, timezone -from typing import Any from ..artifacts.android.aab import AAB from ..artifacts.android.apk import APK from ..artifacts.android.zipped_aab import ZippedAAB from ..artifacts.android.zipped_apk import ZippedAPK from ..artifacts.artifact import AndroidArtifact -from ..models.android import AndroidAnalysisResults, AndroidAppInfo +from ..insights.common import DuplicateFilesInsight +from ..insights.insight import InsightsInput +from ..models.android import ( + AndroidAnalysisResults, + AndroidAppInfo, + AndroidInsightResults, +) from ..models.common import FileAnalysis, FileInfo from ..models.treemap import FILE_TYPE_TO_TREEMAP_TYPE, TreemapType from ..utils.file_utils import calculate_file_hash @@ -25,8 +30,14 @@ class AndroidAnalyzer: """Analyzer for Android apps (.apk, .aab files).""" - def __init__(self, **_: Any) -> None: - pass + def __init__( + self, + skip_insights: bool = False, + ) -> None: + """Args: + skip_insights: Skip insights generation for faster analysis + """ + self.skip_insights = skip_insights def analyze(self, artifact: AndroidArtifact) -> AndroidAnalysisResults: manifest_dict = artifact.get_manifest().model_dump() @@ -63,11 +74,25 @@ def analyze(self, artifact: AndroidArtifact) -> AndroidAnalysisResults: treemap = treemap_builder.build_file_treemap(file_analysis) + insights: AndroidInsightResults | None = None + if not self.skip_insights: + logger.info("Generating insights from analysis results") + insights_input = InsightsInput( + app_info=app_info, + file_analysis=file_analysis, + treemap=treemap, + binary_analysis=[], + ) + insights = AndroidInsightResults( + duplicate_files=DuplicateFilesInsight().generate(insights_input), + ) + return AndroidAnalysisResults( generated_at=datetime.now(timezone.utc), app_info=app_info, treemap=treemap, file_analysis=file_analysis, + insights=insights, analysis_duration=None, ) @@ -115,7 +140,7 @@ def _get_file_analysis(self, apks: list[APK]) -> FileAnalysis: size=merged_size, file_type=file_type, treemap_type=treemap_type, - # Intentionally igoring hash of merged file + # Intentionally ignoring hash of merged file hash_md5="", ) path_to_file_info[relative_path] = merged_file_info diff --git a/src/launchpad/insights/insight.py b/src/launchpad/insights/insight.py index 58ba53c1..50b4a68d 100644 --- a/src/launchpad/insights/insight.py +++ b/src/launchpad/insights/insight.py @@ -1,20 +1,19 @@ from abc import abstractmethod from dataclasses import dataclass -from typing import Protocol, TypeVar +from typing import List, Protocol, TypeVar -from ..models.apple import AppleAppInfo, MachOBinaryAnalysis -from ..models.common import FileAnalysis -from ..models.treemap import TreemapResults +from launchpad.models.common import BaseAppInfo, BaseBinaryAnalysis, FileAnalysis +from launchpad.models.treemap import TreemapResults T_co = TypeVar("T_co", covariant=True) @dataclass class InsightsInput: - app_info: AppleAppInfo + app_info: BaseAppInfo file_analysis: FileAnalysis treemap: TreemapResults | None - binary_analysis: list[MachOBinaryAnalysis] + binary_analysis: List[BaseBinaryAnalysis] class Insight(Protocol[T_co]): diff --git a/src/launchpad/models/android.py b/src/launchpad/models/android.py index a24dd754..aeb88b69 100644 --- a/src/launchpad/models/android.py +++ b/src/launchpad/models/android.py @@ -1,6 +1,7 @@ -from pydantic import ConfigDict, Field +from pydantic import BaseModel, ConfigDict, Field from .common import BaseAnalysisResults, BaseAppInfo +from .insights import DuplicateFilesInsightResult class AndroidAppInfo(BaseAppInfo): @@ -8,8 +9,15 @@ class AndroidAppInfo(BaseAppInfo): package_name: str = Field(..., description="Android package name") -class AndroidAnalysisResults(BaseAnalysisResults): - """Complete Android analysis results.""" +class AndroidInsightResults(BaseModel): + model_config = ConfigDict(frozen=True) + + duplicate_files: DuplicateFilesInsightResult | None = Field(None, description="Duplicate files analysis") + +class AndroidAnalysisResults(BaseAnalysisResults): model_config = ConfigDict(frozen=True) app_info: AndroidAppInfo = Field(..., description="Android app information") + insights: AndroidInsightResults | None = Field( + description="Generated insights from the analysis", + ) diff --git a/tests/unit/test_android_analyzer.py b/tests/unit/test_android_analyzer.py new file mode 100644 index 00000000..d74084f7 --- /dev/null +++ b/tests/unit/test_android_analyzer.py @@ -0,0 +1,51 @@ +"""Tests for Android analyzer with duplicate file detection.""" + +from pathlib import Path + +import pytest + +from launchpad.analyzers.android import AndroidAnalyzer +from launchpad.artifacts.artifact_factory import ArtifactFactory + + +@pytest.fixture +def test_apk_path() -> Path: + return Path("tests/_fixtures/android/hn.apk") + + +@pytest.fixture +def android_analyzer() -> AndroidAnalyzer: + return AndroidAnalyzer() + + +class TestAndroidAnalyzer: + def test_analyze_with_duplicate_detection(self, test_apk_path: Path, android_analyzer: AndroidAnalyzer) -> None: + """Test that Android analyzer includes duplicate file detection.""" + artifact = ArtifactFactory.from_path(test_apk_path) + results = android_analyzer.analyze(artifact) + + assert results.app_info.name == "Hacker News" + assert results.app_info.package_name == "com.emergetools.hackernews" + assert results.file_analysis is not None + assert len(results.file_analysis.files) > 0 + + assert results.insights is not None + assert results.insights.duplicate_files is not None + + duplicate_insight = results.insights.duplicate_files + assert hasattr(duplicate_insight, "files") + assert hasattr(duplicate_insight, "total_savings") + assert hasattr(duplicate_insight, "duplicate_count") + assert isinstance(duplicate_insight.total_savings, int) + assert isinstance(duplicate_insight.duplicate_count, int) + assert duplicate_insight.total_savings == 51709 + assert duplicate_insight.duplicate_count == 52 + + def test_duplicate_files_have_hashes(self, test_apk_path: Path, android_analyzer: AndroidAnalyzer) -> None: + """Test that all files have MD5 hashes for duplicate detection.""" + artifact = ArtifactFactory.from_path(test_apk_path) + results = android_analyzer.analyze(artifact) + + for file_info in results.file_analysis.files: + assert file_info.hash_md5 is not None + assert len(file_info.hash_md5) > 0