Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Graph - reuse parse content dto, skip empty packs #4154

Merged
merged 24 commits into from
Mar 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions .changelog/4154.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
changes:
- description: Fixes an issue in **graph** commands where empty pack folders were not being ignored.
type: fix
pr_number: 4154
3 changes: 3 additions & 0 deletions TestSuite/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from demisto_sdk.commands.content_graph.interface import (
ContentGraphInterface,
)
from demisto_sdk.commands.content_graph.objects.repository import from_path
from TestSuite.conf_json import ConfJSON
from TestSuite.docker_native_image_config import DockerNativeImageConfiguration
from TestSuite.global_secrets import GlobalSecrets
Expand All @@ -35,6 +36,8 @@ class Repo:
"""

def __init__(self, tmpdir: Path, init_git: bool = False):
# clear the cache of the content DTO if we create a repo parser
from_path.cache_clear()
self.packs: List[Pack] = list()
self._tmpdir = tmpdir
self._packs_path: Path = tmpdir / "Packs"
Expand Down
2 changes: 2 additions & 0 deletions demisto_sdk/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -825,6 +825,8 @@ def validate(ctx, config, file_paths: str, **kwargs):
kwargs["post_commit"] = True
exit_code = 0
if not kwargs["skip_old_validate"]:
if kwargs["run_new_validate"]:
kwargs["graph"] = False
validator = OldValidateManager(
is_backward_check=not kwargs["no_backward_comp"],
only_committed_files=kwargs["post_commit"],
Expand Down
3 changes: 2 additions & 1 deletion demisto_sdk/commands/content_graph/commands/common.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os

from neo4j.exceptions import (
ClientError,
DatabaseError,
ServiceUnavailable,
TransactionError,
Expand All @@ -14,7 +15,7 @@ def recover_if_fails(func):
def func_wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except (ServiceUnavailable, DatabaseError, TransactionError) as e:
except (ServiceUnavailable, DatabaseError, TransactionError, ClientError) as e:
if os.getenv("CI"):
logger.error(
"Failed to communicate with Neo4j in CI environment", exc_info=True
Expand Down
9 changes: 7 additions & 2 deletions demisto_sdk/commands/content_graph/commands/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from demisto_sdk.commands.common.tools import (
get_all_repo_pack_ids,
is_external_repository,
string_to_bool,
)
from demisto_sdk.commands.content_graph.commands.common import recover_if_fails
from demisto_sdk.commands.content_graph.commands.create import create_content_graph
Expand Down Expand Up @@ -72,7 +73,10 @@ def update_content_graph(
dependencies (bool): Whether to create the dependencies.
output_path (Path): The path to export the graph zip to.
"""
if os.getenv("DEMISTO_SDK_GRAPH_FORCE_CREATE"):
force_create_graph = os.getenv("DEMISTO_SDK_GRAPH_FORCE_CREATE")
logger.debug(f"DEMISTO_SDK_GRAPH_FORCE_CREATE = {force_create_graph}")

if string_to_bool(force_create_graph, False):
logger.info("DEMISTO_SDK_GRAPH_FORCE_CREATE is set. Will create a new graph")
create_content_graph(
content_graph_interface, marketplace, dependencies, output_path
Expand Down Expand Up @@ -134,7 +138,8 @@ def update_content_graph(

packs_str = "\n".join([f"- {p}" for p in packs_to_update])
logger.info(f"Updating the following packs:\n{packs_str}")
builder.update_graph(packs_to_update)

builder.update_graph(tuple(packs_to_update) if packs_to_update else None)

if dependencies:
content_graph_interface.create_pack_dependencies()
Expand Down
41 changes: 10 additions & 31 deletions demisto_sdk/commands/content_graph/content_graph_builder.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
import gc
from typing import List, Optional

import more_itertools
import tqdm
from typing import Optional, Tuple

from demisto_sdk.commands.content_graph.common import Nodes, Relationships
from demisto_sdk.commands.content_graph.interface.graph import ContentGraphInterface
from demisto_sdk.commands.content_graph.objects.repository import ContentDTO
from demisto_sdk.commands.content_graph.parsers.repository import RepositoryParser
from demisto_sdk.commands.content_graph.objects.repository import (
ContentDTO,
)

PACKS_PER_BATCH = 600

Expand All @@ -27,7 +25,7 @@ def __init__(self, content_graph: ContentGraphInterface) -> None:

def update_graph(
self,
packs_to_update: Optional[List[str]] = None,
packs_to_update: Optional[Tuple[str, ...]] = None,
) -> None:
"""Imports a content graph from files and updates the given pack nodes.

Expand All @@ -44,38 +42,19 @@ def init_database(self) -> None:
self.content_graph.create_indexes_and_constraints()

def _parse_and_model_content(
self, packs_to_parse: Optional[List[str]] = None
self, packs_to_parse: Optional[Tuple[str, ...]] = None
) -> None:
content_dto: ContentDTO = self._create_content_dto(packs_to_parse)
self._collect_nodes_and_relationships_from_model(content_dto)

content_dtos: List[ContentDTO] = self._create_content_dtos(packs_to_parse)
for content_dto in content_dtos:
self._collect_nodes_and_relationships_from_model(content_dto)

def _create_content_dtos(self, packs: Optional[List[str]]) -> List[ContentDTO]:
def _create_content_dto(self, packs: Optional[Tuple[str, ...]]) -> ContentDTO:
"""Parses the repository, then creates and returns a repository model.

Args:
path (Path): The repository path.
packs_to_parse (Optional[List[str]]): A list of packs to parse. If not provided, parses all packs.
"""
content_dtos = []
repository_parser = RepositoryParser(self.content_graph.repo_path)
packs_to_parse = tuple(repository_parser.iter_packs(packs))
# parse the content packs with a progress bar
with tqdm.tqdm(
total=len(packs_to_parse),
unit="packs",
desc="Parsing packs",
position=0,
leave=True,
) as progress_bar:
for packs_batch in more_itertools.chunked(packs_to_parse, PACKS_PER_BATCH):
repository_parser.parse(packs_batch, progress_bar)
content_dtos.append(ContentDTO.from_orm(repository_parser))

repository_parser.clear()
gc.collect()
return content_dtos
return ContentDTO.from_path(packs_to_parse=packs)

def _collect_nodes_and_relationships_from_model(
self, content_dto: ContentDTO
Expand Down
40 changes: 28 additions & 12 deletions demisto_sdk/commands/content_graph/objects/repository.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import shutil
import time
from functools import lru_cache
from multiprocessing.pool import Pool
from pathlib import Path
from typing import List, Optional
from typing import List, Optional, Tuple

import tqdm
from pydantic import BaseModel, DirectoryPath
Expand All @@ -17,25 +18,40 @@
USE_MULTIPROCESSING = False # toggle this for better debugging


@lru_cache
def from_path(path: Path = CONTENT_PATH, packs_to_parse: Optional[Tuple[str]] = None):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why isn't it a class method of ContentDTO?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I documented, for some reason I get cache miss whin it's in the class.

"""
Returns a ContentDTO object with all the packs of the content repository.

This function is outside of the class for better caching.
The class function uses this function so the behavior is the same.
"""
repo_parser = RepositoryParser(path)
with tqdm.tqdm(
total=len(tuple(repo_parser.iter_packs()))
if not packs_to_parse
else len(packs_to_parse),
unit="packs",
desc="Parsing packs",
position=0,
leave=True,
) as progress_bar:
repo_parser.parse(progress_bar=progress_bar)
return ContentDTO.from_orm(repo_parser)


class ContentDTO(BaseModel):
path: DirectoryPath = Path(CONTENT_PATH) # type: ignore
packs: List[Pack]

@staticmethod
def from_path(path: Path = CONTENT_PATH):
def from_path(
path: Path = CONTENT_PATH, packs_to_parse: Optional[Tuple[str, ...]] = None
):
"""
Returns a ContentDTO object with all the packs of the content repository.
"""
repo_parser = RepositoryParser(path)
with tqdm.tqdm(
total=len(tuple(repo_parser.iter_packs())),
unit="packs",
desc="Parsing packs",
position=0,
leave=True,
) as progress_bar:
repo_parser.parse(progress_bar=progress_bar)
return ContentDTO.from_orm(repo_parser)
return from_path(path, packs_to_parse)

def dump(
self,
Expand Down
22 changes: 18 additions & 4 deletions demisto_sdk/commands/content_graph/parsers/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
from demisto_sdk.commands.common.constants import PACKS_FOLDER
from demisto_sdk.commands.common.cpu_count import cpu_count
from demisto_sdk.commands.common.logger import logger
from demisto_sdk.commands.content_graph.parsers.content_item import (
NotAContentItemException,
)
from demisto_sdk.commands.content_graph.parsers.pack import PackParser

IGNORED_PACKS_FOR_PARSING = ["NonSupported"]
Expand Down Expand Up @@ -42,14 +45,25 @@ def parse(
try:
logger.debug("Parsing packs...")
with multiprocessing.Pool(processes=cpu_count()) as pool:
for pack in pool.imap_unordered(PackParser, packs_to_parse):
self.packs.append(pack)
if progress_bar:
progress_bar.update(1)
for pack in pool.imap_unordered(
RepositoryParser.parse_pack, packs_to_parse
):
if pack:
self.packs.append(pack)
if progress_bar:
progress_bar.update(1)
except Exception:
logger.error(traceback.format_exc())
raise

@staticmethod
def parse_pack(pack_path: Path) -> Optional[PackParser]:
try:
return PackParser(pack_path)
except NotAContentItemException:
logger.error(f"Pack {pack_path.name} is not a valid pack. Skipping")
return None

@staticmethod
def should_parse_pack(path: Path) -> bool:
return (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ def repository(mocker):
packs=[],
)
mocker.patch(
"demisto_sdk.commands.content_graph.content_graph_builder.ContentGraphBuilder._create_content_dtos",
return_value=[repository],
"demisto_sdk.commands.content_graph.content_graph_builder.ContentGraphBuilder._create_content_dto",
return_value=repository,
)
return repository

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -290,15 +290,15 @@ def repository(mocker, repo) -> ContentDTO:
)
repository.packs.extend([pack1, pack2])

def mock__create_content_dto(packs_to_update: List[str]) -> List[ContentDTO]:
def mock__create_content_dto(packs_to_update: List[str]) -> ContentDTO:
if not packs_to_update:
return [repository]
return repository
repo_copy = repository.copy()
repo_copy.packs = [p for p in repo_copy.packs if p.object_id in packs_to_update]
return [repo_copy]
return repo_copy

mocker.patch(
"demisto_sdk.commands.content_graph.content_graph_builder.ContentGraphBuilder._create_content_dtos",
"demisto_sdk.commands.content_graph.content_graph_builder.ContentGraphBuilder._create_content_dto",
side_effect=mock__create_content_dto,
)
return repository
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,15 +79,15 @@ def repository(mocker, repo) -> ContentDTO:
pack.content_items.playbook.append(mock_playbook(USED_BY_PLAYBOOK))
repository.packs.extend([pack])

def mock__create_content_dto(packs_to_update: List[str]) -> List[ContentDTO]:
def mock__create_content_dto(packs_to_update: List[str]) -> ContentDTO:
if not packs_to_update:
return [repository]
return repository
repo_copy = repository.copy()
repo_copy.packs = [p for p in repo_copy.packs if p.object_id in packs_to_update]
return [repo_copy]
return repo_copy

mocker.patch(
"demisto_sdk.commands.content_graph.content_graph_builder.ContentGraphBuilder._create_content_dtos",
"demisto_sdk.commands.content_graph.content_graph_builder.ContentGraphBuilder._create_content_dto",
side_effect=mock__create_content_dto,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -373,8 +373,8 @@ def repository(mocker) -> ContentDTO:
pack4.content_items.playbook.append(mock_playbook("SamplePlaybook"))
repository.packs.extend([pack1, pack2, pack3, pack4])
mocker.patch(
"demisto_sdk.commands.content_graph.content_graph_builder.ContentGraphBuilder._create_content_dtos",
return_value=[repository],
"demisto_sdk.commands.content_graph.content_graph_builder.ContentGraphBuilder._create_content_dto",
return_value=repository,
)
return repository

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def repository(mocker):
return repository


def test_pack_metadata_xsoar(repo: Repo, tmp_path: Path, mocker):
def test_pack_metadata_xsoar(git_repo: Repo, tmp_path: Path, mocker):
"""
Given:
- A repository with a pack TestPack, containing multiple content items.
Expand All @@ -65,7 +65,7 @@ def test_pack_metadata_xsoar(repo: Repo, tmp_path: Path, mocker):
)
mocker.patch.object(PackMetadata, "_get_tags_from_landing_page", return_value=set())

pack = repo.create_pack("TestPack")
pack = git_repo.create_pack("TestPack")
pack.pack_metadata.write_json(load_json("pack_metadata.json"))

integration = pack.create_integration()
Expand Down Expand Up @@ -105,12 +105,11 @@ def test_pack_metadata_xsoar(repo: Repo, tmp_path: Path, mocker):
test_playbook = pack.create_test_playbook()
test_playbook.create_default_test_playbook(name="TestTestPlaybook")
test_playbook.yml.update({"fromversion": "6.2.0"})
with ChangeCWD(git_repo.path):
with ContentGraphInterface() as interface:
create_content_graph(interface, output_path=tmp_path)
content_cto = interface.marshal_graph(MarketplaceVersions.XSOAR)

with ContentGraphInterface() as interface:
create_content_graph(interface, output_path=tmp_path)
content_cto = interface.marshal_graph(MarketplaceVersions.XSOAR)

with ChangeCWD(repo.path):
content_cto.dump(tmp_path, MarketplaceVersions.XSOAR, zip=False)

assert (tmp_path / "TestPack" / "metadata.json").exists()
Expand Down Expand Up @@ -177,7 +176,7 @@ def test_pack_metadata_xsoar(repo: Repo, tmp_path: Path, mocker):
assert metadata_playbook.get("name") == "MyPlaybook"


def test_pack_metadata_marketplacev2(repo: Repo, tmp_path: Path, mocker):
def test_pack_metadata_marketplacev2(git_repo: Repo, tmp_path: Path, mocker):
"""
Given:
- A repository with a pack TestPack, containing multiple content items.
Expand All @@ -193,7 +192,7 @@ def test_pack_metadata_marketplacev2(repo: Repo, tmp_path: Path, mocker):
)
mocker.patch.object(PackMetadata, "_get_tags_from_landing_page", return_value=set())

pack = repo.create_pack("TestPack")
pack = git_repo.create_pack("TestPack")
pack.pack_metadata.write_json(load_json("pack_metadata2.json"))

integration = pack.create_integration()
Expand Down Expand Up @@ -236,12 +235,12 @@ def test_pack_metadata_marketplacev2(repo: Repo, tmp_path: Path, mocker):
"fromversion": "6.10.0",
},
)
with ChangeCWD(git_repo.path):

with ContentGraphInterface() as interface:
create_content_graph(interface, output_path=tmp_path)
content_cto = interface.marshal_graph(MarketplaceVersions.MarketplaceV2)
with ContentGraphInterface() as interface:
create_content_graph(interface, output_path=tmp_path)
content_cto = interface.marshal_graph(MarketplaceVersions.MarketplaceV2)

with ChangeCWD(repo.path):
content_cto.dump(tmp_path, MarketplaceVersions.MarketplaceV2, zip=False)

assert (tmp_path / "TestPack" / "metadata.json").exists()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ def repository(mocker):
packs=[],
)
mocker.patch(
"demisto_sdk.commands.content_graph.content_graph_builder.ContentGraphBuilder._create_content_dtos",
return_value=[repository],
"demisto_sdk.commands.content_graph.content_graph_builder.ContentGraphBuilder._create_content_dto",
return_value=repository,
)
return repository

Expand Down