Skip to content

Commit

Permalink
Android: Add target graph to dependency analysis
Browse files Browse the repository at this point in the history
Added a new target graph alongside class and package graphs. This new
graph allows easy access to dependency relationships between build
targets (e.g. targetA depends on targetB because class A1 depends on B3
and class A2 depends on B8). This target graph will later be used by
dep_operations.py and associated dependency management commands to
better understand actual relationships between build targets and speed
up the process of identifying unused dependencies (by not attempting to
test removal of already known necessary deps).

Some other improvements include:
- Speed up calls to jdeps by caching jdeps output in the output dir and
  invalidating the cache when the jar file is updated.
- Use string matching for identifying package/class/nested names. This
  cuts the time used by these methods from 3s to 1.5s.
- Fix all type issues as identified by Pylance in VS Code. Including
  using Generics for graph.Graph.
- Add automatic output dir detection, so the only required arg is now
  just the output file (-o output.json).
- Add support for specifying which packages are interesting as well as
  support for generating graphs that include all known classes via
  `--prefixes`.
- Add logging support and `-v` for full logs.
- Remove some no-longer needed pylint comments.
- Ensure that all missing jars are expected, namely either
  _bundle_module targets or one of the ones listed at the top.
- Add automatic re-building of sub-targets when jars are missing.
- Added unittests for new codepaths.

Bug: 1258168
Change-Id: I3e92fd0de3d14d2f50dabdae7bea7644d5c08e37
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/4003034
Commit-Queue: Peter Wen <wnwen@chromium.org>
Reviewed-by: Henrique Nakashima <hnakashima@chromium.org>
Auto-Submit: Peter Wen <wnwen@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1069778}
  • Loading branch information
Peter Wen authored and Chromium LUCI CQ committed Nov 10, 2022
1 parent 69b93e4 commit d3eff6d
Show file tree
Hide file tree
Showing 16 changed files with 679 additions and 266 deletions.
37 changes: 18 additions & 19 deletions tools/android/dependency_analysis/class_dependency.py
Expand Up @@ -4,29 +4,28 @@
# found in the LICENSE file.
"""Implementation of the graph module for a [Java class] dependency graph."""

import re
from typing import Set, Tuple
from typing import Optional, Set, Tuple

import graph
import class_json_consts

# Matches w/o parens: (some.package.name).(class)$($optional$nested$class)
JAVA_CLASS_FULL_NAME_REGEX = re.compile(
r'^(?P<package>.*)\.(?P<class_name>.*?)(\$(?P<nested>.*))?$')


def java_class_params_to_key(package: str, class_name: str):
"""Returns the unique key created from a package and class name."""
return f'{package}.{class_name}'


def split_nested_class_from_key(key: str) -> Tuple[str, str]:
"""Splits a jdeps class name into its key and nested class, if any."""
re_match = JAVA_CLASS_FULL_NAME_REGEX.match(key)
package = re_match.group('package')
class_name = re_match.group('class_name')
nested = re_match.group('nested')
return java_class_params_to_key(package, class_name), nested
def split_nested_class_from_key(key: str) -> Tuple[str, Optional[str]]:
"""Splits a jdeps class name into its key and nested class, if any.
E.g. package.class => 'package.class', None
package.class$nested => 'package.class', 'nested'
"""
first_dollar_sign = key.find('$')
if first_dollar_sign == -1:
return key, None
else:
return key[:first_dollar_sign], key[first_dollar_sign + 1:]


class JavaClass(graph.Node):
Expand Down Expand Up @@ -113,14 +112,14 @@ def get_node_metadata(self):
}


class JavaClassDependencyGraph(graph.Graph):
class JavaClassDependencyGraph(graph.Graph[JavaClass]):
"""A graph representation of the dependencies between Java classes.
A directed edge A -> B indicates that A depends on B.
"""
def create_node_from_key(self, key: str):
"""See comment above the regex definition."""
re_match = JAVA_CLASS_FULL_NAME_REGEX.match(key)
package = re_match.group('package')
class_name = re_match.group('class_name')
return JavaClass(package, class_name)
"""Splits the key into package and class_name."""
key_without_nested_class, _ = split_nested_class_from_key(key)
last_period = key_without_nested_class.rfind('.')
return JavaClass(package=key_without_nested_class[:last_period],
class_name=key_without_nested_class[last_period + 1:])
Expand Up @@ -4,6 +4,7 @@
# found in the LICENSE file.
"""Unit tests for dependency_analysis.class_dependency."""

import unittest
import unittest.mock

import class_dependency
Expand Down
181 changes: 140 additions & 41 deletions tools/android/dependency_analysis/generate_json_dependency_graph.py
Expand Up @@ -6,33 +6,50 @@

import argparse
import functools
import logging
import math
import multiprocessing
import pathlib
import os
import subprocess
import sys

from typing import List, Tuple
from typing import List, Tuple, Union

import class_dependency
import git_utils
import package_dependency
import serialization
import subprocess_utils
import target_dependency

DEFAULT_ROOT_TARGET = 'chrome/android:monochrome_public_bundle'
_SRC_PATH = pathlib.Path(__file__).parents[3].resolve()
sys.path.append(str(_SRC_PATH / 'build' / 'android'))
from pylib import constants

_DEFAULT_ROOT_TARGET = 'chrome/android:monochrome_public_bundle'
_DEFAULT_PREFIX = 'org.chromium.'
_TARGETS_WITH_NO_SOURCE_FILES = set([
'//components/module_installer/android:module_interface_java',
'//base:jni_java'
])

def class_is_interesting(name: str):

def _relsrc(path: Union[str, pathlib.Path]):
return pathlib.Path(path).relative_to(_SRC_PATH)


def class_is_interesting(name: str, prefixes: Tuple[str]):
"""Checks if a jdeps class is a class we are actually interested in."""
if name.startswith('org.chromium.'):
if not prefixes or name.startswith(prefixes):
return True
return False


# pylint: disable=useless-object-inheritance
class JavaClassJdepsParser(object):
class JavaClassJdepsParser:
"""A parser for jdeps class-level dependency output."""
def __init__(self): # pylint: disable=missing-function-docstring

def __init__(self):
self._graph = class_dependency.JavaClassDependencyGraph()

@property
Expand All @@ -43,12 +60,16 @@ def graph(self):
"""
return self._graph

def parse_raw_jdeps_output(self, build_target: str, jdeps_output: str):
def parse_raw_jdeps_output(self, build_target: str, jdeps_output: str,
prefixes: Tuple[str]):
"""Parses the entirety of the jdeps output."""
for line in jdeps_output.split('\n'):
self.parse_line(build_target, line)
self.parse_line(build_target, line, prefixes)

def parse_line(self, build_target: str, line: str):
def parse_line(self,
build_target: str,
line: str,
prefixes: Tuple[str] = (_DEFAULT_PREFIX, )):
"""Parses a line of jdeps output.
The assumed format of the line starts with 'name_1 -> name_2'.
Expand All @@ -63,7 +84,7 @@ def parse_line(self, build_target: str, line: str):

dep_from = parsed[0]
dep_to = parsed[2]
if not class_is_interesting(dep_from):
if not class_is_interesting(dep_from, prefixes):
return

key_from, nested_from = class_dependency.split_nested_class_from_key(
Expand All @@ -72,7 +93,7 @@ def parse_line(self, build_target: str, line: str):
key_from)
from_node.add_build_target(build_target)

if not class_is_interesting(dep_to):
if not class_is_interesting(dep_to, prefixes):
return

key_to, nested_to = class_dependency.split_nested_class_from_key(
Expand All @@ -87,11 +108,36 @@ def parse_line(self, build_target: str, line: str):
from_node.add_nested_class(nested_to)


def _run_jdeps(jdeps_path: str, filepath: pathlib.Path) -> str:
"""Runs jdeps on the given filepath and returns the output."""
print(f'Running jdeps and parsing output for {filepath}')
return subprocess_utils.run_command(
[jdeps_path, '-R', '-verbose:class', filepath])
def _run_jdeps(jdeps_path: pathlib.Path, filepath: pathlib.Path) -> str:
"""Runs jdeps on the given filepath and returns the output.
Uses a simple file cache for the output of jdeps. If the jar file's mtime is
older than the jdeps cache then just use the cached content instead.
Otherwise jdeps is run again and the output used to update the file cache.
Tested Nov 2nd, 2022:
- With all cache hits, script takes 13 seconds.
- Without the cache, script takes 1 minute 14 seconds.
"""
assert filepath.exists(), (
f'Jar file missing for jdeps {filepath}, perhaps some targets need to '
'be added to _TARGETS_WITH_NO_SOURCE_FILES?')

cache_path = filepath.with_suffix('.jdeps_cache')
if (cache_path.exists()
and cache_path.stat().st_mtime > filepath.stat().st_mtime):
logging.debug(f'Found valid jdeps cache at {_relsrc(cache_path)}')
with cache_path.open() as f:
return f.read()

# Cache either doesn't exist or is older than the jar file.
logging.debug(f'Running jdeps and parsing output for {_relsrc(filepath)}')
output = subprocess_utils.run_command(
[str(jdeps_path), '-R', '-verbose:class',
str(filepath)])
with cache_path.open('w') as f:
f.write(output)
return output


def _run_gn_desc_list_dependencies(build_output_dir: str, target: str,
Expand Down Expand Up @@ -124,19 +170,25 @@ def list_original_targets_and_jars(gn_desc_output: str, build_output_dir: str,
original_build_target = build_target.replace('__compile_java', '')
jar_path = _get_jar_path_for_target(build_output_dir, build_target,
cr_position)
# Bundle module targets have no javac jars.
if (original_build_target.endswith('_bundle_module')
or original_build_target in _TARGETS_WITH_NO_SOURCE_FILES):
assert not jar_path.exists(), (
f'Perhaps a source file was added to {original_build_target}?')
continue
jar_tuples.append((original_build_target, jar_path))
return jar_tuples


def _get_jar_path_for_target(build_output_dir: str, build_target: str,
cr_position: int) -> str:
cr_position: int) -> pathlib.Path:
"""Calculates the output location of a jar for a java build target."""
if cr_position == 0: # Not running on main branch, use current convention.
subdirectory = 'obj'
elif cr_position < 761560: # crrev.com/c/2161205
subdirectory = 'gen'
else:
subdirectory = 'obj'
"""Calculates the output location of a jar for a java build target."""
target_path, target_name = build_target.split(':')
assert target_path.startswith('//'), \
f'Build target should start with "//" but is: "{build_target}"'
Expand All @@ -153,22 +205,30 @@ def main():
description='Runs jdeps (dependency analysis tool) on all JARs a root '
'build target depends on and writes the resulting dependency graph '
'into a JSON file. The default root build target is '
'chrome/android:monochrome_public_bundle.')
'chrome/android:monochrome_public_bundle and the default prefix is '
'"org.chromium.".')
required_arg_group = arg_parser.add_argument_group('required arguments')
required_arg_group.add_argument('-C',
'--build_output_dir',
required=True,
help='Build output directory.')
required_arg_group.add_argument(
'-o',
'--output',
required=True,
help='Path to the file to write JSON output to. Will be created '
'if it does not yet exist and overwrite existing '
'content if it does.')
'if it does not yet exist and overwrite existing content if it does.')
arg_parser.add_argument(
'-C',
'--build_output_dir',
help='Build output directory, will guess if not provided.')
arg_parser.add_argument(
'-p',
'--prefixes',
default=_DEFAULT_PREFIX,
help='A comma-separated list of prefixes to filter '
'classes. Class paths that do not match any of the '
'prefixes are ignored in the graph. Pass in an '
'empty string to turn off filtering.')
arg_parser.add_argument('-t',
'--target',
default=DEFAULT_ROOT_TARGET,
default=_DEFAULT_ROOT_TARGET,
help='Root build target.')
arg_parser.add_argument('-d',
'--checkout-dir',
Expand All @@ -180,8 +240,19 @@ def main():
'--gn-path',
default='gn',
help='Path to the gn executable.')
arg_parser.add_argument('-v',
'--verbose',
action='store_true',
help='Used to display detailed logging.')
arguments = arg_parser.parse_args()

if arguments.verbose:
level = logging.DEBUG
else:
level = logging.INFO
logging.basicConfig(
level=level, format='%(levelname).1s %(relativeCreated)6d %(message)s')

if arguments.checkout_dir:
src_path = pathlib.Path(arguments.checkout_dir)
else:
Expand All @@ -198,39 +269,67 @@ def main():
cr_position_str = git_utils.get_last_commit_cr_position()
cr_position = int(cr_position_str) if cr_position_str else 0

print('Getting list of dependency jars...')
if arguments.build_output_dir:
constants.SetOutputDirectory(arguments.build_output_dir)
constants.CheckOutputDirectory()
arguments.build_output_dir = constants.GetOutDirectory()
logging.info(f'Using output dir: {_relsrc(arguments.build_output_dir)}')

logging.info('Getting list of dependency jars...')
gn_desc_output = _run_gn_desc_list_dependencies(arguments.build_output_dir,
arguments.target,
arguments.gn_path)
target_jars: JarTargetList = list_original_targets_and_jars(
gn_desc_output, arguments.build_output_dir, cr_position)

print('Running jdeps...')
# Need to trim off leading // to convert gn target to ninja target.
missing_targets = [
target_name[2:] for target_name, path in target_jars
if not path.exists()
]
if missing_targets:
logging.warning(
f'Missing {len(missing_targets)} jars, re-building the targets.')
subprocess.run(['autoninja', '-C', arguments.build_output_dir] +
missing_targets,
check=True)

logging.info('Running jdeps...')
# jdeps already has some parallelism
jdeps_process_number = math.ceil(multiprocessing.cpu_count() / 2)
with multiprocessing.Pool(jdeps_process_number) as pool:
jar_paths = [target_jar for _, target_jar in target_jars]
jdeps_outputs = pool.map(functools.partial(_run_jdeps, jdeps_path),
jar_paths)

print('Parsing jdeps output...')
logging.info('Parsing jdeps output...')
prefixes = tuple(arguments.prefixes.split(','))
jdeps_parser = JavaClassJdepsParser()
for raw_jdeps_output, (build_target, _) in zip(jdeps_outputs, target_jars):
jdeps_parser.parse_raw_jdeps_output(build_target, raw_jdeps_output)
logging.debug(f'Parsing jdeps for {build_target}')
jdeps_parser.parse_raw_jdeps_output(build_target,
raw_jdeps_output,
prefixes=prefixes)

class_graph = jdeps_parser.graph
print(f'Parsed class-level dependency graph, '
f'got {class_graph.num_nodes} nodes '
f'and {class_graph.num_edges} edges.')
logging.info(f'Parsed class-level dependency graph, '
f'got {class_graph.num_nodes} nodes '
f'and {class_graph.num_edges} edges.')

package_graph = package_dependency.JavaPackageDependencyGraph(class_graph)
print(f'Created package-level dependency graph, '
f'got {package_graph.num_nodes} nodes '
f'and {package_graph.num_edges} edges.')

print(f'Dumping JSON representation to {arguments.output}.')
serialization.dump_class_and_package_graphs_to_file(
class_graph, package_graph, arguments.output)
logging.info(f'Created package-level dependency graph, '
f'got {package_graph.num_nodes} nodes '
f'and {package_graph.num_edges} edges.')

target_graph = target_dependency.JavaTargetDependencyGraph(class_graph)
logging.info(f'Created target-level dependency graph, '
f'got {target_graph.num_nodes} nodes '
f'and {target_graph.num_edges} edges.')

logging.info(f'Dumping JSON representation to {arguments.output}.')
serialization.dump_class_and_package_and_target_graphs_to_file(
class_graph, package_graph, target_graph, arguments.output)
logging.info('Done')


if __name__ == '__main__':
Expand Down

0 comments on commit d3eff6d

Please sign in to comment.