diff --git a/minecode_pipelines/__init__.py b/minecode_pipelines/__init__.py index 01a59395..1d0ca7c8 100644 --- a/minecode_pipelines/__init__.py +++ b/minecode_pipelines/__init__.py @@ -8,4 +8,4 @@ # -VERSION = "0.0.1b22" +VERSION = "0.0.1b23" diff --git a/minecode_pipelines/pipes/composer.py b/minecode_pipelines/pipes/composer.py index fa7575e9..fcc19e5c 100644 --- a/minecode_pipelines/pipes/composer.py +++ b/minecode_pipelines/pipes/composer.py @@ -32,9 +32,7 @@ get_checkpoint_from_file, update_checkpoints_in_github, ) -from scanpipe.pipes.federatedcode import commit_changes -from scanpipe.pipes.federatedcode import push_changes -from minecode_pipelines import VERSION +from scanpipe.pipes.federatedcode import commit_and_push_changes from minecode_pipelines.utils import cycle_from_index, grouper PACKAGE_BATCH_SIZE = 100 @@ -89,23 +87,15 @@ def mine_and_publish_composer_purls(packages, cloned_data_repo, cloned_config_re purl_files.append(purl_file_full_path) purls.append(str(base_purl)) - if purl_files: - commit_changes( - repo=cloned_data_repo, - files_to_commit=purl_files, - purls=purls, - mine_type="packageURL", - tool_name="pkg:composer/minecode-pipelines", - tool_version=VERSION, - ) - push_changes(repo=cloned_data_repo) - - settings_data = { - "date": str(datetime.now()), - "start_index": start_index + (batch_index + 1) * PACKAGE_BATCH_SIZE, - } - update_checkpoints_in_github( - checkpoint=settings_data, - cloned_repo=cloned_config_repo, - path=COMPOSER_CHECKPOINT_PATH, - ) + if purls and purl_files: + commit_and_push_changes(repo=cloned_data_repo, files_to_commit=purl_files, purls=purls) + + settings_data = { + "date": str(datetime.now()), + "start_index": start_index + (batch_index + 1) * PACKAGE_BATCH_SIZE, + } + update_checkpoints_in_github( + checkpoint=settings_data, + cloned_repo=cloned_config_repo, + path=COMPOSER_CHECKPOINT_PATH, + ) diff --git a/minecode_pipelines/pipes/cran.py b/minecode_pipelines/pipes/cran.py index 0ec65a6f..a71288fe 100644 --- a/minecode_pipelines/pipes/cran.py +++ b/minecode_pipelines/pipes/cran.py @@ -20,15 +20,14 @@ # ScanCode.io is a free software code scanning tool from nexB Inc. and others. # Visit https://github.com/aboutcode-org/scancode.io for support and download. -from aboutcode.hashid import get_package_purls_yml_file_path, get_core_purl -from scanpipe.pipes.federatedcode import commit_changes -from scanpipe.pipes.federatedcode import push_changes -from minecode_pipelines import VERSION +from aboutcode.hashid import get_package_purls_yml_file_path +from aboutcode.hashid import get_core_purl +from scanpipe.pipes.federatedcode import commit_and_push_changes from minecode_pipelines.miners.cran import extract_cran_packages from minecode_pipelines.pipes import write_data_to_yaml_file from minecode_pipelines.utils import grouper -PACKAGE_BATCH_SIZE = 1000 +PACKAGE_BATCH_SIZE = 100 def mine_and_publish_cran_packageurls(cloned_data_repo, db_path, logger): @@ -38,7 +37,7 @@ def mine_and_publish_cran_packageurls(cloned_data_repo, db_path, logger): """ packages_to_sync = list(extract_cran_packages(db_path)) - for package_batch in grouper(packages_to_sync, PACKAGE_BATCH_SIZE): + for package_batch in grouper(n=PACKAGE_BATCH_SIZE, iterable=packages_to_sync): purl_files = [] base_purls = [] @@ -62,12 +61,8 @@ def mine_and_publish_cran_packageurls(cloned_data_repo, db_path, logger): # After finishing the batch, commit & push if there’s something to save if purl_files and base_purls: - commit_changes( + commit_and_push_changes( repo=cloned_data_repo, files_to_commit=purl_files, purls=base_purls, - mine_type="packageURL", - tool_name="pkg:pypi/minecode-pipelines", - tool_version=VERSION, ) - push_changes(repo=cloned_data_repo) diff --git a/minecode_pipelines/pipes/swift.py b/minecode_pipelines/pipes/swift.py index d8769069..2703c529 100644 --- a/minecode_pipelines/pipes/swift.py +++ b/minecode_pipelines/pipes/swift.py @@ -28,7 +28,8 @@ from aboutcode.hashid import get_core_purl from packageurl import PackageURL -from minecode_pipelines.miners.swift import fetch_git_tags_raw, get_tags_and_commits_from_git_output +from minecode_pipelines.miners.swift import fetch_git_tags_raw +from minecode_pipelines.miners.swift import get_tags_and_commits_from_git_output from minecode_pipelines.miners.swift import split_org_repo from minecode_pipelines.pipes import update_checkpoints_in_github @@ -38,9 +39,7 @@ from minecode_pipelines.pipes import get_checkpoint_from_file from scanpipe.pipes.federatedcode import clone_repository -from scanpipe.pipes.federatedcode import commit_changes -from scanpipe.pipes.federatedcode import push_changes -from minecode_pipelines import VERSION +from scanpipe.pipes.federatedcode import commit_and_push_changes from minecode_pipelines.utils import cycle_from_index PACKAGE_BATCH_SIZE = 100 @@ -49,7 +48,7 @@ MINECODE_DATA_SWIFT_REPO = os.environ.get( "MINECODE_DATA_SWIFT_REPO", "https://github.com/aboutcode-data/minecode-data-swift-test" ) -MINECODE_SWIFT_INDEX_REPO = "https://github.com/SwiftPackageIndex/" +MINECODE_SWIFT_INDEX_REPO = "https://github.com/SwiftPackageIndex/PackageList" def store_swift_packages(package_repo_url, tags_and_commits, cloned_data_repo): @@ -133,16 +132,13 @@ def mine_and_publish_swift_packageurls(logger): counter += 1 if counter >= PACKAGE_BATCH_SIZE: - commit_changes( - repo=cloned_data_repo, - files_to_commit=purl_files, - purls=purls, - mine_type="packageURL", - tool_name="pkg:pypi/minecode-pipelines", - tool_version=VERSION, - ) + if purls and purl_files: + commit_and_push_changes( + repo=cloned_data_repo, + files_to_commit=purl_files, + purls=purls, + ) - push_changes(repo=cloned_data_repo) purl_files = [] purls = [] counter = 0 @@ -161,14 +157,11 @@ def mine_and_publish_swift_packageurls(logger): path=SWIFT_CHECKPOINT_PATH, ) - commit_changes( - repo=cloned_data_repo, - files_to_commit=purl_files, - purls=purls, - mine_type="packageURL", - tool_name="pkg:pypi/minecode-pipelines", - tool_version=VERSION, - ) + if purls and purl_files: + commit_and_push_changes( + repo=cloned_data_repo, + files_to_commit=purl_files, + purls=purls, + ) - push_changes(repo=cloned_data_repo) return [swift_index_repo, cloned_data_repo, cloned_config_repo] diff --git a/pyproject-minecode_pipelines.toml b/pyproject-minecode_pipelines.toml index 93f34f1d..1db10685 100644 --- a/pyproject-minecode_pipelines.toml +++ b/pyproject-minecode_pipelines.toml @@ -4,7 +4,7 @@ build-backend = "flot.buildapi" [project] name = "minecode_pipelines" -version = "0.0.1b22" +version = "0.0.1b23" description = "A library for mining packageURLs and package metadata from ecosystem repositories." readme = "minecode_pipelines/README.rst" license = { text = "Apache-2.0" } @@ -60,7 +60,7 @@ mine_swift = "minecode_pipelines.pipelines.mine_swift:MineSwift" mine_composer = "minecode_pipelines.pipelines.mine_composer:MineComposer" [tool.bumpversion] -current_version = "0.0.1b22" +current_version = "0.0.1b23" allow_dirty = true files = [