Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ Changelog
Unreleased
----------

- Include layers in docker image data.
https://github.com/nexB/scancode.io/issues/175

- Fix a server error on resource details view when the compliance alert is "missing".
https://github.com/nexB/scancode.io/issues/344

Expand Down
71 changes: 58 additions & 13 deletions scanpipe/pipes/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,48 +36,93 @@

def extract_images_from_inputs(project):
"""
Collects all the tarballs from the `project` input/ work directory, extracts each
tarball to the tmp/ work directory and collects the images.
Returns the `images` and `errors` that may have happen during the extraction.
Collects all the tarballs from the `project` input/ work directory, extracts
each tarball to the tmp/ work directory and collects the images.

Returns the `images` and an `errors` list of error messages that may have
happen during the extraction.
"""
target_path = project.tmp_path
images = []
errors = []

for input_tarball in project.inputs(pattern="*.tar*"):
extract_target = target_path / f"{input_tarball.name}-extract"
extract_errors = extract_archive(input_tarball, extract_target)
images.extend(Image.get_images_from_dir(extract_target))
errors.extend(extract_errors)
imgs, errs = extract_image_from_tarball(input_tarball, extract_target)
images.extend(imgs)
errors.extend(errs)

return images, errors


def extract_image_from_tarball(input_tarball, extract_target, verify=True):
"""
Extract images from an ``input_tarball`` to an ``extract_target`` directory
Path object and collects the extracted images.

Returns the `images` and an `errors` list of error messages that may have
happen during the extraction.
"""
errors = list(extract_archive(location=input_tarball, target=extract_target))
images = Image.get_images_from_dir(
extracted_location=str(extract_target),
verify=verify,
)
return images, errors


def extract_layers_from_images(project, images):
"""
Extracts all layers from the provided `images` into the `project` codebase/ work
Extracts all layers from the provided `images` into the `project` codebase
work directory.

Returns an `errors` list of error messages that may occur during the
extraction.
"""
return extract_layers_from_images_to_base_path(
base_path=project.codebase_path,
images=images,
)


def extract_layers_from_images_to_base_path(base_path, images):
"""
Extracts all layers from the provided `images` into the `base_path` work
directory.
Returns the `errors` that may happen during the extraction.

Returns an `errors` list of error messages that may occur during the
extraction.
"""
errors = []
base_path = Path(base_path)

for image in images:
image_dirname = Path(image.extracted_location).name
target_path = project.codebase_path / image_dirname
target_path = base_path / image_dirname

for layer in image.layers:
extract_target = target_path / layer.layer_id
extract_errors = extract_archive(layer.archive_location, extract_target)
extract_errors = extract_archive(
location=layer.archive_location,
target=extract_target,
)
errors.extend(extract_errors)
layer.extracted_location = str(extract_target)

return errors


def get_image_data(image):
def get_image_data(image, layer_path_segments=2):
"""
Returns a mapping of image-related data given an `image`.
Keep only ``layer_path_segments`` trailing layer location segments (or keep
the locations unmodified if ``layer_path_segments`` is 0).
"""
exclude = ["extracted_location", "archive_location", "layers"]
exclude_from_img = ["extracted_location", "archive_location"]
image_data = {
key: value for key, value in image.to_dict().items() if key not in exclude
key: value
for key, value in image.to_dict(layer_path_segments=layer_path_segments).items()
if key not in exclude_from_img
}
return image_data

Expand Down
Binary file added scanpipe/tests/data/docker-images.tar.gz
Binary file not shown.
394 changes: 394 additions & 0 deletions scanpipe/tests/data/docker-images.tar.gz-expected-data-1.json

Large diffs are not rendered by default.

394 changes: 394 additions & 0 deletions scanpipe/tests/data/docker-images.tar.gz-expected-data-2.json

Large diffs are not rendered by default.

12 changes: 0 additions & 12 deletions scanpipe/tests/test_pipes.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
from scanpipe.models import DiscoveredPackage
from scanpipe.models import Project
from scanpipe.pipes import codebase
from scanpipe.pipes import docker
from scanpipe.pipes import fetch
from scanpipe.pipes import filename_now
from scanpipe.pipes import make_codebase_resource
Expand Down Expand Up @@ -744,17 +743,6 @@ def test_scanpipe_pipes_fetch_fetch_urls(self, mock_get):
self.assertEqual(2, len(errors))
self.assertEqual(urls, errors)

def test_scanpipe_pipes_docker_tag_whiteout_codebase_resources(self):
p1 = Project.objects.create(name="Analysis")
resource1 = CodebaseResource.objects.create(project=p1, path="filename.ext")
resource2 = CodebaseResource.objects.create(project=p1, name=".wh.filename2")

docker.tag_whiteout_codebase_resources(p1)
resource1.refresh_from_db()
resource2.refresh_from_db()
self.assertEqual("", resource1.status)
self.assertEqual("ignored-whiteout", resource2.status)

def test_scanpipe_pipes_rootfs_from_project_codebase_class_method(self):
p1 = Project.objects.create(name="Analysis")
root_filesystems = list(rootfs.RootFs.from_project_codebase(p1))
Expand Down
88 changes: 88 additions & 0 deletions scanpipe/tests/test_pipes_docker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# SPDX-License-Identifier: Apache-2.0
#
# http://nexb.com and https://github.com/nexB/scancode.io
# The ScanCode.io software is licensed under the Apache License version 2.0.
# Data generated with ScanCode.io is provided as-is without warranties.
# ScanCode is a trademark of nexB Inc.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
# for any legal advice.
#
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/scancode.io for support and download.

import json
import tempfile
from pathlib import Path

from django.apps import apps
from django.test import TestCase

from scanpipe.models import CodebaseResource
from scanpipe.models import Project
from scanpipe.pipes import docker

scanpipe_app = apps.get_app_config("scanpipe")


class ScanPipeDockerPipesTest(TestCase):
data_path = Path(__file__).parent / "data"

def assertResultsEqual(self, expected_file, results, regen=False):
"""
Set `regen` to True to regenerate the expected results.
"""
if regen:
expected_file.write_text(results)

expected_data = expected_file.read_text()
self.assertEqual(expected_data, results)

def test_pipes_docker_get_image_data_contains_layers_with_relative_paths(self):
extract_target = str(Path(tempfile.mkdtemp()) / "tempdir")
input_tarball = str(self.data_path / "docker-images.tar.gz")

# Extract the image first
images, errors = docker.extract_image_from_tarball(
input_tarball,
extract_target,
verify=False,
)
self.assertEqual([], errors)

images_data = [docker.get_image_data(i) for i in images]
results = json.dumps(images_data, indent=2)
expected_location = self.data_path / "docker-images.tar.gz-expected-data-1.json"
self.assertResultsEqual(expected_location, results, regen=False)

# Extract the layers second
errors = docker.extract_layers_from_images_to_base_path(
base_path=extract_target,
images=images,
)
self.assertEqual([], errors)

images_data = [docker.get_image_data(i) for i in images]
results = json.dumps(images_data, indent=2)
expected_location = self.data_path / "docker-images.tar.gz-expected-data-2.json"
self.assertResultsEqual(expected_location, results, regen=False)

def test_pipes_docker_tag_whiteout_codebase_resources(self):
p1 = Project.objects.create(name="Analysis")
resource1 = CodebaseResource.objects.create(project=p1, path="filename.ext")
resource2 = CodebaseResource.objects.create(project=p1, name=".wh.filename2")

docker.tag_whiteout_codebase_resources(p1)
resource1.refresh_from_db()
resource2.refresh_from_db()
self.assertEqual("", resource1.status)
self.assertEqual("ignored-whiteout", resource2.status)
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ install_requires =
# WSGI server
gunicorn==20.1.0
# Docker
container_inspector==21.6.10
container_inspector==30.0.0
# ScanCode-toolkit
scancode-toolkit[packages]==30.1.0
extractcode[full]==30.0.0
Expand Down