Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 30 additions & 5 deletions src/launchpad/analyzers/android.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
from __future__ import annotations

from datetime import datetime, timezone
from typing import Any

from ..artifacts.android.aab import AAB
from ..artifacts.android.apk import APK
from ..artifacts.android.zipped_aab import ZippedAAB
from ..artifacts.android.zipped_apk import ZippedAPK
from ..artifacts.artifact import AndroidArtifact
from ..models.android import AndroidAnalysisResults, AndroidAppInfo
from ..insights.common import DuplicateFilesInsight
from ..insights.insight import InsightsInput
from ..models.android import (
AndroidAnalysisResults,
AndroidAppInfo,
AndroidInsightResults,
)
from ..models.common import FileAnalysis, FileInfo
from ..models.treemap import FILE_TYPE_TO_TREEMAP_TYPE, TreemapType
from ..utils.file_utils import calculate_file_hash
Expand All @@ -25,8 +30,14 @@
class AndroidAnalyzer:
"""Analyzer for Android apps (.apk, .aab files)."""

def __init__(self, **_: Any) -> None:
pass
def __init__(
self,
skip_insights: bool = False,
) -> None:
"""Args:
skip_insights: Skip insights generation for faster analysis
"""
self.skip_insights = skip_insights

def analyze(self, artifact: AndroidArtifact) -> AndroidAnalysisResults:
manifest_dict = artifact.get_manifest().model_dump()
Expand Down Expand Up @@ -63,11 +74,25 @@ def analyze(self, artifact: AndroidArtifact) -> AndroidAnalysisResults:

treemap = treemap_builder.build_file_treemap(file_analysis)

insights: AndroidInsightResults | None = None
if not self.skip_insights:
logger.info("Generating insights from analysis results")
insights_input = InsightsInput(
app_info=app_info,
file_analysis=file_analysis,
treemap=treemap,
binary_analysis=[],
)
insights = AndroidInsightResults(
duplicate_files=DuplicateFilesInsight().generate(insights_input),
)

return AndroidAnalysisResults(
generated_at=datetime.now(timezone.utc),
app_info=app_info,
treemap=treemap,
file_analysis=file_analysis,
insights=insights,
analysis_duration=None,
)

Expand Down Expand Up @@ -115,7 +140,7 @@ def _get_file_analysis(self, apks: list[APK]) -> FileAnalysis:
size=merged_size,
file_type=file_type,
treemap_type=treemap_type,
# Intentionally igoring hash of merged file
# Intentionally ignoring hash of merged file
hash_md5="",
)
path_to_file_info[relative_path] = merged_file_info
Expand Down
11 changes: 5 additions & 6 deletions src/launchpad/insights/insight.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,19 @@
from abc import abstractmethod
from dataclasses import dataclass
from typing import Protocol, TypeVar
from typing import List, Protocol, TypeVar

from ..models.apple import AppleAppInfo, MachOBinaryAnalysis
from ..models.common import FileAnalysis
from ..models.treemap import TreemapResults
from launchpad.models.common import BaseAppInfo, BaseBinaryAnalysis, FileAnalysis
from launchpad.models.treemap import TreemapResults

T_co = TypeVar("T_co", covariant=True)


@dataclass
class InsightsInput:
app_info: AppleAppInfo
app_info: BaseAppInfo
file_analysis: FileAnalysis
treemap: TreemapResults | None
binary_analysis: list[MachOBinaryAnalysis]
binary_analysis: List[BaseBinaryAnalysis]


class Insight(Protocol[T_co]):
Expand Down
14 changes: 11 additions & 3 deletions src/launchpad/models/android.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,23 @@
from pydantic import ConfigDict, Field
from pydantic import BaseModel, ConfigDict, Field

from .common import BaseAnalysisResults, BaseAppInfo
from .insights import DuplicateFilesInsightResult


class AndroidAppInfo(BaseAppInfo):
model_config = ConfigDict(frozen=True)
package_name: str = Field(..., description="Android package name")


class AndroidAnalysisResults(BaseAnalysisResults):
"""Complete Android analysis results."""
class AndroidInsightResults(BaseModel):
model_config = ConfigDict(frozen=True)

duplicate_files: DuplicateFilesInsightResult | None = Field(None, description="Duplicate files analysis")


class AndroidAnalysisResults(BaseAnalysisResults):
model_config = ConfigDict(frozen=True)
app_info: AndroidAppInfo = Field(..., description="Android app information")
insights: AndroidInsightResults | None = Field(
description="Generated insights from the analysis",
)
51 changes: 51 additions & 0 deletions tests/unit/test_android_analyzer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""Tests for Android analyzer with duplicate file detection."""

from pathlib import Path

import pytest

from launchpad.analyzers.android import AndroidAnalyzer
from launchpad.artifacts.artifact_factory import ArtifactFactory


@pytest.fixture
def test_apk_path() -> Path:
return Path("tests/_fixtures/android/hn.apk")


@pytest.fixture
def android_analyzer() -> AndroidAnalyzer:
return AndroidAnalyzer()


class TestAndroidAnalyzer:
def test_analyze_with_duplicate_detection(self, test_apk_path: Path, android_analyzer: AndroidAnalyzer) -> None:
"""Test that Android analyzer includes duplicate file detection."""
artifact = ArtifactFactory.from_path(test_apk_path)
results = android_analyzer.analyze(artifact)

assert results.app_info.name == "Hacker News"
assert results.app_info.package_name == "com.emergetools.hackernews"
assert results.file_analysis is not None
assert len(results.file_analysis.files) > 0

assert results.insights is not None
assert results.insights.duplicate_files is not None

duplicate_insight = results.insights.duplicate_files
assert hasattr(duplicate_insight, "files")
assert hasattr(duplicate_insight, "total_savings")
assert hasattr(duplicate_insight, "duplicate_count")
assert isinstance(duplicate_insight.total_savings, int)
assert isinstance(duplicate_insight.duplicate_count, int)
assert duplicate_insight.total_savings == 51709
assert duplicate_insight.duplicate_count == 52
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

turns out there were some dupes in hn.apk. it is mostly duplicated META-INF files.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would be curious how users can remove those, if they can't remove them or mitigate we should try to add ignores on our end to not show. Any idea behind these?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup! Here’s how you can remove them: EmergeTools/hackernews#489

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Appears this would just filter out the META-INF/ files, not just duplicates? I don't think that's what we'd want to do here as I know some tools package files in there for tooling & runtime analysis.

Copy link
Contributor Author

@runningcode runningcode Jun 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah yeah you're right. the kotlin .module files are important.

Copy link
Contributor Author

@runningcode runningcode Jun 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I updated the PR to just remove the duplicate .version files and the duplicate LICENSE.txt files. This way it keeps the .module files which are needed. (and also the .module files were not duplicates)


def test_duplicate_files_have_hashes(self, test_apk_path: Path, android_analyzer: AndroidAnalyzer) -> None:
"""Test that all files have MD5 hashes for duplicate detection."""
artifact = ArtifactFactory.from_path(test_apk_path)
results = android_analyzer.analyze(artifact)

for file_info in results.file_analysis.files:
assert file_info.hash_md5 is not None
assert len(file_info.hash_md5) > 0
Loading