Skip to content

Commit

Permalink
cov: segment tree for querying individual points
Browse files Browse the repository at this point in the history
get a list of matching code coverage elements from a given point.
  • Loading branch information
haxscramper committed Apr 22, 2024
1 parent 46955d3 commit d981f4e
Show file tree
Hide file tree
Showing 7 changed files with 152 additions and 42 deletions.
8 changes: 4 additions & 4 deletions scripts/cxx_codegen/profdata_merger/profdata_merger.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -767,10 +767,10 @@ struct queries {
"CovSegment",
{
"Id", // 1
"StartLine", // 2
"StartCol", // 3
"EndLine", // 4
"EndCol", // 5
"LineStart", // 2
"ColStart", // 3
"LineEnd", // 4
"ColEnd", // 5
"StartCount", // 6
"EndCount", // 7
"HasCount", // 8
Expand Down
8 changes: 4 additions & 4 deletions scripts/cxx_codegen/profdata_merger/profdata_merger.sql
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,10 @@ CREATE TABLE "CovFunctionInstantiation" (

CREATE TABLE "CovSegment" (
"Id" INTEGER NOT NULL,
"StartLine" INTEGER NOT NULL,
"StartCol" INTEGER NOT NULL,
"EndLine" INTEGER NOT NULL,
"EndCol" INTEGER NOT NULL,
"LineStart" INTEGER NOT NULL,
"ColStart" INTEGER NOT NULL,
"LineEnd" INTEGER NOT NULL,
"ColEnd" INTEGER NOT NULL,
"StartCount" INTEGER NOT NULL,
"EndCount" INTEGER NOT NULL,
"HasCount" BOOLEAN NOT NULL,
Expand Down
88 changes: 81 additions & 7 deletions scripts/py_repository/py_repository/gen_coverage_cxx.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
#!/usr/env/bin python
from beartype.typing import Optional, Any, List, Tuple
from beartype.typing import Optional, Any, List, Tuple, Iterable
from pydantic import Field, BaseModel

from sqlalchemy import create_engine, Column
from sqlalchemy import create_engine, Column, select, Select
from sqlalchemy import Enum as SqlEnum
from sqlalchemy.schema import CreateTable
from sqlalchemy.orm import declarative_base
from sqlalchemy.orm import declarative_base, Session
from py_scriptutils.sqlalchemy_utils import IdColumn, ForeignId, IntColumn, StrColumn, BoolColumn
from py_scriptutils.repo_files import get_haxorg_repo_root_path
from sqlalchemy.types import JSON
import enum
from beartype import beartype
from pathlib import Path

CoverageSchema = declarative_base()

Expand Down Expand Up @@ -100,10 +101,10 @@ class CovSegmentFlat(CoverageSchema):
class CovSegment(CoverageSchema):
__tablename__ = "CovSegment"
Id = IdColumn()
StartLine = IntColumn()
StartCol = IntColumn()
EndLine = IntColumn()
EndCol = IntColumn()
LineStart = IntColumn()
ColStart = IntColumn()
LineEnd = IntColumn()
ColEnd = IntColumn()
StartCount = IntColumn()
EndCount = IntColumn()
HasCount = BoolColumn()
Expand All @@ -113,6 +114,10 @@ class CovSegment(CoverageSchema):
NestedIn = ForeignId("CovSegment.Id", nullable=True)
IsLeaf = BoolColumn()

def intersects(self, line: int, col: int) -> bool:
return (self.LineStart <= line <= self.LineEnd) and (self.ColStart <= col <=
self.ColEnd)


class CovInstantiationGroup(CoverageSchema):
__tablename__ = "CovInstantiationGroup"
Expand Down Expand Up @@ -171,6 +176,75 @@ def extract_text(lines: List[str], start: Tuple[int, int], end: Tuple[int, int])
return "\n".join(extracted_lines)


@beartype
class CoverageSegmentTree:

def __init__(self, segments: Iterable[CovSegment]):
self.root = None
self.segments = sorted(segments, key=lambda x: (x.LineStart, x.ColStart))
if self.segments:
self.root = self.build_tree(0, len(self.segments) - 1)

@beartype
class Node:

def __init__(self, start: int, end: int, segments: Iterable[CovSegment]):
self.start = start
self.end = end
self.segments = segments
self.left: Optional['CoverageSegmentTree.Node'] = None
self.right: Optional['CoverageSegmentTree.Node'] = None

def build_tree(self, start: int, end: int) -> Node:
if start > end:
return None
if start == end:
return self.Node(start, end, [self.segments[start]])

mid = (start + end) // 2
node = self.Node(start, end, self.segments[start:end + 1])
node.left = self.build_tree(start, mid)
node.right = self.build_tree(mid + 1, end)
return node

def query(self,
line: int,
col: int,
node: Optional[Node] = None) -> Iterable[CovSegment]:
if node is None:
node = self.root
if node is None:
return []

# If the point is outside the bounds of the segments in this node
if node.start > line or node.end < line:
return []

# Check for intersection with segments at this node
result = [seg for seg in node.segments if seg.intersects(line, col)]

# Recurse on child nodes
if node.left and line <= (node.left.start + node.left.end) // 2:
result.extend(self.query(line, col, node.left))
if node.right and line >= (node.right.start + node.right.end) // 2 + 1:
result.extend(self.query(line, col, node.right))

return result


@beartype
def get_coverage_of(session: Session, path: Path) -> Select[Tuple[CovSegment]]:
target_id = session.execute(
select(CovFile).where(CovFile.Path == str(path))).fetchall()

if len(target_id) != 1:
raise ValueError(
f"{len(target_id)} files matched for given path '{path}', expected exactly one match"
)

return select(CovSegment).where(CovSegment.File == target_id[0][0].Id)


if __name__ == "__main__":
sql_url = "sqlite:///:memory:"
db_engine = create_engine(sql_url)
Expand Down
3 changes: 2 additions & 1 deletion scripts/py_scriptutils/py_scriptutils/json_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,8 @@ def get_path(value: Json) -> Json:
def assert_subset(main: Json, subset: Json, message: Optional[str] = None):
diff = get_subset_diff(main_set=main, expected_subset=subset)

compare = "\n".join([
compare = "Could not find expected subset of values in the main set\n\n"
compare += "\n".join([
"[{}]{}".format(
idx,
describe_diff(
Expand Down
5 changes: 4 additions & 1 deletion scripts/py_scriptutils/py_scriptutils/sqlalchemy_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,13 @@ def format_rich_table(engine: Engine,

@beartype
def format_rich_query(
engine: Engine,
engine: Union[Engine, Session],
query: Executable,
column_labels: List[str] = [],
) -> Table:

if isinstance(engine, Session):
engine = engine.get_bind()

rich_table = Table(show_header=True, header_style="bold blue")
with engine.connect() as connection:
Expand Down
82 changes: 57 additions & 25 deletions tests/python/repo/test_code_coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,8 +292,8 @@ def test_file_coverage_filter():

assert len(df) == 2
assert_frame(df, [
dict(StartLine=1, EndLine=1, Path="file1.cpp"),
dict(StartLine=5, EndLine=8, Path="main.cpp"),
dict(LineStart=1, LineEnd=1, Path="file1.cpp"),
dict(LineStart=5, LineEnd=8, Path="main.cpp"),
])


Expand All @@ -302,7 +302,47 @@ def cleanup_test_code(code: str) -> str:
return re.sub(r"\s+", " ", code.replace("\n", " "))


def test_file_segmentation():
@beartype
def add_cov_segment_text(df: pd.DataFrame, lines: List[str]):
df["Text"] = df.apply(
lambda row: cleanup_test_code(
cov.extract_text(
lines,
start=(row["LineStart"], row["ColStart"]),
end=(row["LineEnd"], row["ColEnd"]),
)),
axis=1,
)


def test_file_segmentation_1():
with TemporaryDirectory() as tmp:
dir = Path(tmp)
dir = Path("/tmp/test_base_run_coverage")
code = corpus_base.joinpath("test_file_segmentation_1.cpp").read_text()
cmd = ProfileRunParams(dir=dir, main="main.cpp", files={"main.cpp": code})
cmd.run()

session = open_sqlite_session(cmd.get_sqlite(), cov.CoverageSchema)
main_cov = cov.get_coverage_of(session, cmd.get_code("main.cpp"))
lines = code.split("\n")

segtree = cov.CoverageSegmentTree(it[0] for it in session.execute(main_cov))
df = pd.read_sql(main_cov, session.get_bind())
add_cov_segment_text(df, lines)

# print(render_rich(dataframe_to_rich_table(df)))

# Coverage segments only overlay executable blocks and do not
# account for extraneous elements such as function headers etc.
assert segtree.query(line=1, col=15)
assert not segtree.query(line=1, col=14)
assert_frame(df[df["LineStart"] == 1], [
dict(IsLeaf=True, Text="{}", ColStart=15, ColEnd=17),
])


def test_file_segmentation_2():
with TemporaryDirectory() as tmp:
dir = Path(tmp)
dir = Path("/tmp/test_base_run_coverage")
Expand All @@ -317,15 +357,7 @@ def test_file_segmentation():
lines = code.split("\n")

df = pd.read_sql(select(cov.CovSegment), session.get_bind())
df["Text"] = df.apply(
lambda row: cleanup_test_code(
cov.extract_text(
lines,
start=(row["StartLine"], row["StartCol"]),
end=(row["EndLine"], row["EndCol"]),
)),
axis=1,
)
add_cov_segment_text(df, lines)

table = dataframe_to_rich_table(df)
table.show_lines = True
Expand All @@ -339,42 +371,42 @@ def test_file_segmentation():

assert_frame(df, [
dict(
StartLine=1,
EndLine=1,
LineStart=1,
LineEnd=1,
SegmentIndex=0,
Text="{}",
IsLeaf=True,
),
dict(
StartLine=3,
EndLine=5,
LineStart=3,
LineEnd=5,
SegmentIndex=1,
Id=2,
Text="{ if (true || false) { action(); } }",
IsLeaf=False,
),
dict(
StartLine=4,
EndLine=4,
LineStart=4,
LineEnd=4,
SegmentIndex=2,
Text="true",
StartCol=9,
EndCol=13,
ColStart=9,
ColEnd=13,
NestedIn=2,
IsLeaf=True,
),
dict(
StartLine=4,
EndLine=4,
LineStart=4,
LineEnd=4,
SegmentIndex=3,
Text="false",
StartCol=17,
EndCol=22,
ColStart=17,
ColEnd=22,
NestedIn=2,
IsLeaf=True,
),
dict(
StartLine=4,
LineStart=4,
SegmentIndex=4,
Text="{ action(); }",
NestedIn=2,
Expand Down

0 comments on commit d981f4e

Please sign in to comment.