Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
188 changes: 128 additions & 60 deletions src/launchpad/utils/apple/cwl_demangle.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import json
import multiprocessing
import os
import shutil
import subprocess
import tempfile
import uuid

from dataclasses import dataclass
from typing import Dict, List
from typing import Dict, List, Tuple

from launchpad.utils.logging import get_logger

Expand All @@ -29,7 +31,11 @@ class CwlDemangleResult:
class CwlDemangler:
"""A class to demangle Swift symbol names using the cwl-demangle tool."""

def __init__(self, is_type: bool = False, continue_on_error: bool = True):
def __init__(
self,
is_type: bool = False,
continue_on_error: bool = True,
):
"""
Initialize the CwlDemangler.

Expand All @@ -40,7 +46,11 @@ def __init__(self, is_type: bool = False, continue_on_error: bool = True):
self.is_type = is_type
self.queue: List[str] = []
self.continue_on_error = continue_on_error
self.uuid = uuid.uuid4()
self.uuid = str(uuid.uuid4())

# Disable parallel processing if LAUNCHPAD_NO_PARALLEL_DEMANGLE=true
env_disable = os.environ.get("LAUNCHPAD_NO_PARALLEL_DEMANGLE", "").lower() == "true"
self.use_parallel = not env_disable

def add_name(self, name: str) -> None:
"""
Expand All @@ -63,73 +73,131 @@ def demangle_all(self) -> Dict[str, CwlDemangleResult]:

names = self.queue.copy()
self.queue.clear()
results: Dict[str, CwlDemangleResult] = {}

# Process in chunks to avoid potential issues with large inputs
chunk_size = 500
chunk_size = 5000
total_chunks = (len(names) + chunk_size - 1) // chunk_size

chunks: List[Tuple[List[str], int]] = []
for i in range(0, len(names), chunk_size):
chunk = names[i : i + chunk_size]
chunk_results = self._demangle_chunk(chunk, i)
chunk_idx = i // chunk_size
chunks.append((chunk, chunk_idx))

# Only use parallel processing if workload justifies multiprocessing overhead (≥4 chunks = ≥20K symbols)
do_in_parallel = self.use_parallel and total_chunks >= 4

logger.debug(
f"Starting Swift demangling: {len(names)} symbols in {total_chunks} chunks "
f"of {chunk_size} ({'parallel' if do_in_parallel else 'sequential'} mode)"
)

return self._demangle_parallel(chunks) if do_in_parallel else self._demangle_sequential(chunks)

def _demangle_parallel(self, chunks: List[Tuple[List[str], int]]) -> Dict[str, CwlDemangleResult]:
"""Demangle chunks in parallel using multiprocessing"""
results: Dict[str, CwlDemangleResult] = {}

try:
# Prepare arguments for starmap
worker_args = [
(chunk, chunk_idx, self.is_type, self.continue_on_error, self.uuid) for chunk, chunk_idx in chunks
]

# Process chunks in parallel
# NOTE: starmap pickles the function and arguments to send to worker processes.
# Current arguments are all safe to pickle:
# - chunk: List[str] (standard containers with primitives)
# - chunk_idx: int (primitive)
# - is_type: bool (primitive)
# - continue_on_error: bool (primitive)
# - uuid: str (primitive)
with multiprocessing.Pool(processes=4) as pool:
chunk_results = pool.starmap(_demangle_chunk_worker, worker_args)

for chunk_result in chunk_results:
results.update(chunk_result)

except Exception:
logger.exception("Parallel demangling failed, falling back to sequential")
results = self._demangle_sequential(chunks)

return results

def _demangle_sequential(self, chunks: List[Tuple[List[str], int]]) -> Dict[str, CwlDemangleResult]:
"""Demangle chunks sequentially"""
results: Dict[str, CwlDemangleResult] = {}

for chunk, chunk_idx in chunks:
chunk_results = self._demangle_chunk(chunk, chunk_idx)
results.update(chunk_results)

return results

def _demangle_chunk(self, names: List[str], i: int) -> Dict[str, CwlDemangleResult]:
if not names:
logger.warning("No names to demangle")
return _demangle_chunk_worker(names, i, self.is_type, self.continue_on_error, self.uuid)


def _demangle_chunk_worker(
chunk: List[str],
chunk_idx: int,
is_type: bool,
continue_on_error: bool,
demangle_uuid: str,
) -> Dict[str, CwlDemangleResult]:
"""Demangle a chunk of symbols. Arguments must be picklable for multiprocessing."""
if not chunk:
return {}

binary_path = shutil.which("cwl-demangle")
if binary_path is None:
logger.error("cwl-demangle binary not found in PATH")
return {}

chunk_set = set(chunk)
results: Dict[str, CwlDemangleResult] = {}

with tempfile.NamedTemporaryFile(
mode="w", prefix=f"cwl-demangle-{demangle_uuid}-chunk-{chunk_idx}-", suffix=".txt"
) as temp_file:
temp_file.write("\n".join(chunk))
temp_file.flush()

command_parts = [
binary_path,
"batch",
"--input",
temp_file.name,
"--json",
]

if is_type:
command_parts.append("--isType")

if continue_on_error:
command_parts.append("--continue-on-error")

try:
result = subprocess.run(command_parts, capture_output=True, text=True, check=True)
except subprocess.CalledProcessError:
logger.exception(f"cwl-demangle failed for chunk {chunk_idx}")
return {}

binary_path = self._get_binary_path()
results: Dict[str, CwlDemangleResult] = {}
batch_result = json.loads(result.stdout)

for symbol_result in batch_result.get("results", []):
mangled = symbol_result.get("mangled", "")
if mangled in chunk_set:
demangle_result = CwlDemangleResult(
name=symbol_result["name"],
type=symbol_result["type"],
identifier=symbol_result["identifier"],
module=symbol_result["module"],
testName=symbol_result["testName"],
typeName=symbol_result["typeName"],
description=symbol_result["description"],
mangled=mangled,
)
results[mangled] = demangle_result

with tempfile.NamedTemporaryFile(
mode="w", prefix=f"cwl-demangle-{self.uuid}-chunk-{i}-", suffix=".txt"
) as temp_file:
temp_file.write("\n".join(names))
temp_file.flush()

command_parts = [
binary_path,
"batch",
"--input",
temp_file.name,
"--json",
]

if self.is_type:
command_parts.append("--isType")

if self.continue_on_error:
command_parts.append("--continue-on-error")

try:
result = subprocess.run(command_parts, capture_output=True, text=True, check=True)
except subprocess.CalledProcessError:
logger.exception("cwl-demangle failed")
return {}

batch_result = json.loads(result.stdout)

for symbol_result in batch_result.get("results", []):
mangled = symbol_result.get("mangled", "")
if mangled in names:
demangle_result = CwlDemangleResult(
name=symbol_result["name"],
type=symbol_result["type"],
identifier=symbol_result["identifier"],
module=symbol_result["module"],
testName=symbol_result["testName"],
typeName=symbol_result["typeName"],
description=symbol_result["description"],
mangled=mangled,
)
results[mangled] = demangle_result

return results

def _get_binary_path(self) -> str:
"""Get the path to the cwl-demangle binary."""
path = shutil.which("cwl-demangle")
assert path is not None
return path
return results
57 changes: 36 additions & 21 deletions tests/integration/test_cwl_demangle.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
import os

from unittest import mock

from launchpad.utils.apple.cwl_demangle import CwlDemangler, CwlDemangleResult


Expand Down Expand Up @@ -65,36 +69,47 @@ def test_demangle_all_success(self):
== "_$s6Sentry0A18UserFeedbackWidgetC18RootViewControllerC6config6buttonAeA0abC13ConfigurationC_AA0abcd6ButtonF0Ctcfc"
)

def test_demangle_all_chunked_processing(self):
"""Test that chunked processing works with many names."""
def test_parallel_processing(self):
"""Test demangling with 20k+ symbols (covers chunking and parallel mode)."""
demangler = CwlDemangler(continue_on_error=True)

# Generate Swift mangled names by cycling through letters
letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
symbols_needed = 600

for i in range(symbols_needed):
letter1 = letters[i % len(letters)]
letter2 = letters[(i // len(letters)) % len(letters)]
letter3 = letters[(i // (len(letters) * len(letters))) % len(letters)]
module_name = f"Test{letter1}{letter2}"
symbol_name = f"Symbol{letter3}{i % 100}"
mangled_name = f"_$s{len(module_name)}{module_name}{len(symbol_name)}{symbol_name}"
demangler.add_name(mangled_name)
# Generate 20k symbols (4 chunks at 5k each)
symbols_needed = 20000
symbols = self._generate_symbols(symbols_needed)
for symbol in symbols:
demangler.add_name(symbol)

result = demangler.demangle_all()

assert len(result) == symbols_needed
for i in range(symbols_needed):
# Spot check some symbols
for symbol in symbols[::1000]: # Every 1000th symbol
assert symbol in result
assert isinstance(result[symbol], CwlDemangleResult)

def test_environment_variable_disables_parallel(self):
"""Test LAUNCHPAD_NO_PARALLEL_DEMANGLE env var disables parallel."""
# Test with env var unset
with mock.patch.dict(os.environ, {}, clear=False):
os.environ.pop("LAUNCHPAD_NO_PARALLEL_DEMANGLE", None)
demangler = CwlDemangler()
assert demangler.use_parallel is True

# Test with "true"
with mock.patch.dict(os.environ, {"LAUNCHPAD_NO_PARALLEL_DEMANGLE": "true"}):
demangler = CwlDemangler()
assert demangler.use_parallel is False

def _generate_symbols(self, count: int) -> list[str]:
"""Generate valid Swift mangled symbols."""
letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
symbols = []
for i in range(count):
letter1 = letters[i % len(letters)]
letter2 = letters[(i // len(letters)) % len(letters)]
letter3 = letters[(i // (len(letters) * len(letters))) % len(letters)]

module_name = f"Test{letter1}{letter2}"
symbol_name = f"Symbol{letter3}{i % 100}"
mangled_name = f"_$s{len(module_name)}{module_name}{len(symbol_name)}{symbol_name}"

assert mangled_name in result
# Check that each result is a CwlDemangleResult instance
assert isinstance(result[mangled_name], CwlDemangleResult)
assert result[mangled_name].mangled == mangled_name
symbols.append(mangled_name)
return symbols
Loading