From 3aeb2ec68d313b75430539d9e4d2e57c53ef6998 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Mon, 31 May 2021 11:24:39 +0200 Subject: [PATCH 01/13] Update format Signed-off-by: Philippe Ombredanne --- setup.cfg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.cfg b/setup.cfg index f791084..f192f22 100644 --- a/setup.cfg +++ b/setup.cfg @@ -20,7 +20,7 @@ classifiers = Programming Language :: Python :: 3 :: Only Topic :: Software Development Topic :: Utilities -keywords = +keywords = utilities [options] @@ -43,4 +43,4 @@ testing = docs= Sphinx>=3.3.1 sphinx-rtd-theme>=0.5.0 - doc8>=0.8.1 \ No newline at end of file + doc8>=0.8.1 From 2c412e8222d4d615384a24e2ddc472b0c9703916 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Mon, 31 May 2021 11:24:57 +0200 Subject: [PATCH 02/13] Add Python 3.9 to Travis Signed-off-by: Philippe Ombredanne --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 1b52eb2..1a90a38 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,6 +13,7 @@ python: - "3.6" - "3.7" - "3.8" + - "3.9" # Scripts to run at install stage install: ./configure --dev From 69eec23792d59dbdc3a3acb1711884560cf27073 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Mon, 31 May 2021 11:27:35 +0200 Subject: [PATCH 03/13] Format and remove spurious spaces From https://github.com/nexB/typecode/pull/20 Reported-by: Pierre Tardy Signed-off-by: Philippe Ombredanne --- configure.bat | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/configure.bat b/configure.bat index 8c497ba..80d0a43 100644 --- a/configure.bat +++ b/configure.bat @@ -9,7 +9,7 @@ @rem ################################ -@rem # A configuration script to set things up: +@rem # A configuration script to set things up: @rem # create a virtualenv and install or update thirdparty packages. @rem # Source this script for initial configuration @rem # Use configure --help for details @@ -48,7 +48,7 @@ set "CFG_BIN_DIR=%CFG_ROOT_DIR%\%VIRTUALENV_DIR%\Scripts" @rem ################################ -@rem # Set the quiet flag to empty if not defined +@rem # Set the quiet flag to empty if not defined if not defined CFG_QUIET ( set "CFG_QUIET= " ) @@ -65,8 +65,8 @@ if "%1" EQU "--dev" ( set "CFG_REQUIREMENTS=%DEV_REQUIREMENTS%" set CFG_DEV_MODE=1 ) -if "%1" EQU "--python" ( - echo "The --python is now DEPRECATED. Use the PYTHON_EXECUTABLE environment +if "%1" EQU "--python"( + echo "The --python option is now DEPRECATED. Use the PYTHON_EXECUTABLE environment" echo "variable instead. Run configure --help for details." exit /b 0 ) @@ -76,7 +76,7 @@ if "%1" EQU "--python" ( @rem # Use environment variables or a file if available. @rem # Otherwise the latest Python by default. if not defined PYTHON_EXECUTABLE ( - @rem # check for a file named PYTHON_EXECUTABLE + @rem # check for a file named PYTHON_EXECUTABLE if exist ""%CFG_ROOT_DIR%\PYTHON_EXECUTABLE"" ( set /p PYTHON_EXECUTABLE=<""%CFG_ROOT_DIR%\PYTHON_EXECUTABLE"" ) else ( From 0e09ad9eb77ca0b580d71baa428955a0a56d19f1 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Mon, 31 May 2021 19:17:43 +0200 Subject: [PATCH 04/13] Bump to more modern version of setuptools_scm And remove v prefix from fallback version Signed-off-by: Philippe Ombredanne --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8eebe91..852f0fc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,11 +1,11 @@ [build-system] -requires = ["setuptools >= 50", "wheel", "setuptools_scm[toml] >= 4"] +requires = ["setuptools >= 50", "wheel", "setuptools_scm[toml] >= 6"] build-backend = "setuptools.build_meta" [tool.setuptools_scm] # this is used populated when creating a git archive # and when there is .git dir and/or there is no git installed -fallback_version = "v9999.$Format:%h-%cs$" +fallback_version = "9999.$Format:%h-%cs$" [tool.pytest.ini_options] norecursedirs = [ From e339a70e1a46b613fa73b9d0a9273fe7640acb8d Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Mon, 31 May 2021 19:18:09 +0200 Subject: [PATCH 05/13] Add space for correct syntax Signed-off-by: Philippe Ombredanne --- configure.bat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.bat b/configure.bat index 80d0a43..c12f937 100644 --- a/configure.bat +++ b/configure.bat @@ -65,7 +65,7 @@ if "%1" EQU "--dev" ( set "CFG_REQUIREMENTS=%DEV_REQUIREMENTS%" set CFG_DEV_MODE=1 ) -if "%1" EQU "--python"( +if "%1" EQU "--python" ( echo "The --python option is now DEPRECATED. Use the PYTHON_EXECUTABLE environment" echo "variable instead. Run configure --help for details." exit /b 0 From c4017677f3c84b7d3b531e0282a12a9a9fdc6c67 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Fri, 4 Jun 2021 15:07:53 +0200 Subject: [PATCH 06/13] Ensure Distro.from_rootfs() works as expected. - Accept empty and non-existing location when creating a Distro; return None - Properly merge base_distro if provided. Accept no base_distro. Raise Exception on inconsistent OS - Do not use "linux" as Distro() default - Do not return "windows" if no distro is found. This was found in https://github.com/nexB/scancode.io Reported-by: tdruez Signed-off-by: Philippe Ombredanne --- CHANGELOG.rst | 13 +++ src/container_inspector/distro.py | 77 +++++++++++++----- src/container_inspector/image.py | 1 + .../data/distro/windows-container-rootfs.tar | Bin 0 -> 18944 bytes .../windows-mini-image.tar.gz.expected.json | 4 +- tests/test_distro.py | 59 ++++++++++++++ tests/test_image.py | 1 + 7 files changed, 131 insertions(+), 24 deletions(-) create mode 100644 tests/data/distro/windows-container-rootfs.tar diff --git a/CHANGELOG.rst b/CHANGELOG.rst index ad1571b..97c5ccd 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,19 @@ Changelog ========= +v21.6.4 +-------- + +This is a minor release with bug fixes and minor API changes. + +API changes +~~~~~~~~~~~ + +The Distro.from_rootfs() now works as expected. It can handle empty location +and works correctly with a base_distro. When a base_distro is provided it +will raise an Exception if the found Distro.os does not match the base Distro.os + + v21.5.25 -------- diff --git a/src/container_inspector/distro.py b/src/container_inspector/distro.py index 4d38a66..9ff6225 100755 --- a/src/container_inspector/distro.py +++ b/src/container_inspector/distro.py @@ -12,11 +12,14 @@ # specific language governing permissions and limitations under the License. import logging -from os import path +import os import shlex +from os import path import attr +from container_inspector import rootfs + logger = logging.getLogger(__name__) # un-comment these lines to enable logging # logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) @@ -46,19 +49,21 @@ class Distro(object): """ os = attr.attrib( - default='linux', + default=None, metadata=dict( - doc='Operating system, default to linux. ' + doc='Operating system. ' 'One of: {}'.format(', '.join(os_choices))) ) architecture = attr.attrib( default=None, - metadata=dict(doc='Processor architecture such as x86, x86_64, arm or amd64.') + metadata=dict( + doc='Processor architecture such as x86, x86_64, arm or amd64.' + ) ) name = attr.attrib( - default='linux', + default=None, metadata=dict(doc='''Based on os-release: https://www.freedesktop.org/software/systemd/man/os-release.html NAME= A string identifying the operating system, without a version @@ -82,7 +87,7 @@ class Distro(object): ) identifier = attr.attrib( - default='linux', + default=None, metadata=dict(doc='''Based on os-release: https://www.freedesktop.org/software/systemd/man/os-release.html ID= A lower-case string (no spaces or other characters outside of @@ -291,7 +296,13 @@ def to_dict(self): def from_os_release_file(cls, location): """ Return a Distro built from a Linux os-release file. + Return None if ``location`` is empty or missing. + Raise an Exception if the os-release file is invalid and cannot be + parsed """ + if not location or not os.path.exists(location): + return + data = parse_os_release(location) or {} new_data = dict( # This idiom looks a tad wierd but we want to always get a linux as @@ -333,13 +344,23 @@ def from_os_release_file(cls, location): @classmethod def from_rootfs(cls, location, base_distro=None): """ - Return a Distro discovered from the rootfs at `location`. - Return None if no OS was found. + Return a Distro discovered from the rootfs at ``location``. Return None + if no OS is found or if ``location`` is empty or missing. Use the optional ``base_distro`` Distro object attributes as a base and - to guide discovery. If provided ``base_distro`` may be returned as-is - if no extra OS details were found. + to guide discovery. + + Raise an Exception if the ``base_distro`` OS does not match the found + distro. + + Providing a ``base_distro`` Distro is useful when the distro information + are already known ahead of time (for instance from a Docker image + manifest) and may be missing from the rootfs proper (for instance of an + /etc/os-release is missing in the rootfs for a Linux-based image). """ + if not location or not os.path.exists(location): + return + finders = { 'linux': cls.find_linux_details, 'windows': cls.find_windows_details, @@ -347,21 +368,27 @@ def from_rootfs(cls, location, base_distro=None): } for finder_os, finder in finders.items(): - if base_distro and base_distro.os != finder_os: - continue found = finder(location) if found: - return base_distro.merge(found) + if base_distro: + if base_distro.os != finder_os: + raise Exception( + f'Inconsistent base distro OS: {base_distro.os} ' + f'and found distro OS : {found.os}' + ) - if base_distro: - return base_distro + return base_distro.merge(found) + else: + return found @classmethod def find_linux_details(cls, location): """ - Find a linux distro details using the os-release file and return a - Distro object or None. + Find a linux distro details using the os-release file at ``location`` + and return a Distro object or None. + + Raise an Exception if an os-release file is found that cannot be parsed. """ # note: /etc/os-release has precedence over /usr/lib/os-release. for candidate_path in ('etc/os-release', 'usr/lib/os-release',): @@ -374,10 +401,12 @@ def find_windows_details(cls, location): """ Find a Windows installation details and return a Distro object or None. """ - return cls( - os='windows', - identifier='identifier', - ) + if rootfs.find_root( + location, + max_depth=2, + root_paths=rootfs.WINDOWS_PATHS, + ): + return cls(os='windows', identifier='windows',) @classmethod def find_freebsd_details(cls, location): @@ -422,8 +451,12 @@ def merge(self, other_distro): """ existing = self.to_dict() if other_distro: - other_non_empty = {k: v for k, v in other_distro.to_dict().items() if v} + other_non_empty = { + k: v for k, v in other_distro.to_dict().items() + if v + } existing.update(other_non_empty) + return type(self)(**existing) diff --git a/src/container_inspector/image.py b/src/container_inspector/image.py index 2f9c973..0f36fcc 100755 --- a/src/container_inspector/image.py +++ b/src/container_inspector/image.py @@ -230,6 +230,7 @@ class Image(ArchiveMixin, ConfigMixin, ToDictMixin): Image objects can be created from these inputs: - an image tarball in docker format (e.g. "docker save"). - a directory that contains an extracted image tarball in these layouts. + OCI format is not yet supported. """ diff --git a/tests/data/distro/windows-container-rootfs.tar b/tests/data/distro/windows-container-rootfs.tar new file mode 100644 index 0000000000000000000000000000000000000000..a3b02f08ed7f778900adad08e368014e03bf5c8f GIT binary patch literal 18944 zcmeHO+in^`63ug@{6ovy2YW>bcW?4W3{H@2;{_ZmTJ1wK(?Bc33_BMb@7M3C?w%VL z2W)2dVJC``VJ_9xbxxhC?tyFRh~U$^LjTm%ANFqd|N3IRRckda8<(w4o7L*IMyK|U zwF~vi7>|OG`*7{8V{K0Et@i#)&tEJ2Z=@yMKvcuuVFBYJG@Vv!AO3Y<-C=qB8?9#j z9eXn-hs@>w9RJ(F)o3^#UQZbP^Vl8r+4$l9{-%H1zngS#*v;Uoe>d)bcz-<{vB$w( zZ}>Q7S3_JI-1SH7Yj@mdgWK*m-0|W4WFZ4Kj#ev2EQK3#hZ$lqoS%6<3m1YtO4s(D z0(Rx$4(SS?edU2*gC(B}c7B$gR%gyGs*DY0?AeQ$q{557e4!LV{-)4wK90zl$$3u=J#GF{*&B7<{BN~?HIKm3$T3~=P zbLV$22-(=&b9PaIF&gStTE0h*CYFaQ;qy?sb2fesLa}7$@F9?%%j&g@%qVA9j>O1p zkKx$SdW{Xd3G0)OD_`{Ty;krvcHEBlDgs>}esb zg^Ekc?S=4#bagIirjjH2QGE?Qx-(n~HL+Ddj3X5aqWeOc1q<#tZ0=!Vu6~?m3+56A zJMyl|zW!w0tE>L~WN`P5O}-DtJ7tGKbRNd~mVQQdl;Tv_&*1~>ejD}sY%*lsn;SN~ zhOIs%Y0@9vj%l{n>Zab1-3=#ZMAp6Q!Pf2l%}*%Wgo0&J(SF&8U> z8hP(fN*K3>ss5w(PsYY1BcNI1{MSXywY|gTbV9QQ@~PAonL%`!K0`Mh5hBaLP8kLa z0wR48f`nkFFIEoJ%xEMLz!z&M269qaL0Se2?+H)3?Cjyb*PZlxXNmrr6JHW-*$V3J znX+@#n5FQcmA_oFj$z_(B!HNxgY2gIgu+_5p5x7*$(xPf>QstC>BxTz5{0$&!w5;| zCE6NjZ51vqc`nsGBK?9y+H(P`&z7W#8-^2q&I77&eU6NceN7kwhtQq6mMJ(#nk9N+ znt`+d7)lPDImCO#F;n(+9udE@a!_RxAeucB!%F%?vq#zWl|UL*OJ-}zyizdtISERF z2BN_%9~qJ5&gRn@eUky|8)SwI24clg?*yCq-ZBH#60t>}q2~2~sx^}HMHnFHG>c8! zB%^VG$3(O*FmKWpZkZT@4{7mxnGFjb0!hn>Ajb^Hj^KtIB-qkYY5 z9#%GjiaM&5Ja<|3R;n1Pn|4V(`V7^)QDA-XF$w4n@B=nQCrC`CfJe%ty@`;5ag-^C za50VqdE$#D%Fqk7%J!^CW$6q{AI+7W6)I;{xO|Gc$v6ncElUJRL^2=}eZhT3@gptgtg7h?e=;P_H1Mu5(aW_!Y=5wZU+m`aWjMfCqtCl9?T| zQ1|ce*Agk8Itwhg-6 ztIv3tScq)5Ov8eVa6bl+0f-kl4E1qQB^1d`j+vCm6p5tFG1g7{n_TaMh?1*0@kv@A z#f@&Sr^+%GFRBY5_1e`>BGQl4)c`A(Cc!tEMm*g0U`l78%%Ng#w49=C-=lR}!$YYw z4HYRa1PLe1(g>7Rhzd0O-byia75OVKK<*Jr8_Y0SkEri3h$wN$h-XK->Sv47o>{5b zZFScN1!?Q4|5=3M)KRS7r;9HI$29M{YoGf9SzvT8ZI-UQC7KLNq6?VBE6@kvKnvU$ z3#^9JN@x_JEn1q@EpW^)fwxywJ+w*4m>a#8#Bw+`NHNX~+$G9T4sK79o+J=m=GOs!4p+#|kaf5b!|PMs88_=m>kR0*tdD1=75d>|iKS{HR4S z=cy4kT^=N9GKdJ>rlP+H!_}8hpZG6cT?U_&31z{IP1K;c;;%(|i2+HJ;;U4vs;s|; zUcs9nqV-+cI{-I?N9(cHZ4V;pGaj=9lOWYZwXhrxKBA5|aw$XAd62y5siQ795p58t zT@{(V)6~Jaw6?8wj$ubJ2 z(a0P(XnVz{77nAt1tX<~2Arx7E~;@1YXZG4O-hmtRMuQNe2U&4)ybY+TB|_@zof_^ zcm$s#)#79#__C5&yh8a_9_>64uj-i#j~rPv5;lfHBl_57pZ`{CH!4`-cdP84+F}tm z%8GI>hjgB$GmXV$_a0+be^WA1OUckr&&8Qef@~lt*s3#OYA9-m;=LJD)D6Z@JCj*p(`ne;bp!&OSmu(^rSYqkK@Bn)y}5-=B`z68xYWTU#6d-VSQz3CT{~JgZ^YKOe z^_$u^bKrRVJM~(-)c;}-&s#kN|Dh|!BBU3|uluIZ(fBuN&DLd!e;%n~J{^MpV}JA} z4LBPAdZ*oLl=v5ec;4!v_>TmZ^kLV>tF*N^k_|`W->5h1W&Y=ZD(KU3`2Rf;(Xr^i zqW^2)3!r8G7le4;?4kHi@CxzBv`+>)I{)jJ*a#~1KM7!g2iy4jVlM4!{qyJg$6BX~ z9n$JD{$}AT3GfR18|D6AfuLthYy9#0(tllVpE0%f@F4un_x~>&HSGV^o0qLJ|Mv#; zgcmga=Ed+U^ZQTk9EX2{@W(GfO8oy6*Hc{2o^*w&{+(E}aRSgYt{;Sd(*INc*KC%*|IGxjz{5TG zn=hcAr2HTG{+IA?*PG@0{{m4@P5%jhS45%Doi_=>LHVEb|Ac=BFMvz^Pf;rF=urHB zcn*Fw^Ew(Fjenz7YnT0BaiC``ABn&IC1(0t^@9j;JpRpQqs;#dO66f0fiePR1Wq4; F{{X#hDU$#I literal 0 HcmV?d00001 diff --git a/tests/data/image/windows-mini-image.tar.gz.expected.json b/tests/data/image/windows-mini-image.tar.gz.expected.json index 50afebe..df10647 100644 --- a/tests/data/image/windows-mini-image.tar.gz.expected.json +++ b/tests/data/image/windows-mini-image.tar.gz.expected.json @@ -18,9 +18,9 @@ "distro": { "os": "windows", "architecture": "amd64", - "name": "linux", + "name": null, "version": "10.0.19042.985", - "identifier": "identifier", + "identifier": "windows", "id_like": [], "version_codename": null, "version_id": null, diff --git a/tests/test_distro.py b/tests/test_distro.py index 5bfc351..280c3ec 100644 --- a/tests/test_distro.py +++ b/tests/test_distro.py @@ -44,3 +44,62 @@ def test_distro_from_os_release_file(self): expected = test_file + '-distro-expected.json' result = Distro.from_os_release_file(test_file).to_dict() check_expected(result, expected, regen=False) + + def test_distro_from_os_release_returns_None_on_empty_or_missing_location(self): + assert Distro.from_os_release_file('') is None + assert Distro.from_os_release_file(None) is None + assert Distro.from_os_release_file('THIS/dir/does/exists') is None + try: + assert Distro.from_os_release_file(__file__) is None + self.fail('An exception should be raised.') + except: + pass + + def test_distro_from_rootfs_returns_None_on_empty_or_missing_location(self): + assert Distro.from_rootfs('') is None + assert Distro.from_rootfs(None) is None + assert Distro.from_rootfs('THIS/dir/does/exists') is None + + def test_distro_from_rootfs_returns_a_distro_even_if_not_found(self): + not_a_rootfs = os.path.dirname(__file__) + distro = Distro.from_rootfs(not_a_rootfs) + # all distro attributes should be empty + assert not distro + + def test_distro_from_rootfs_return_None_if_base_distro_not_found(self): + base = Distro(os='freebsd', architecture='amd64') + not_a_rootfs = os.path.dirname(__file__) + distro = Distro.from_rootfs(not_a_rootfs, base_distro=base) + assert distro is None + + def test_distro_does_not_default_to_linux(self): + # we want to ensure that no attributes values contains linux by default + distro = repr(Distro().to_dict().values()).lower() + assert 'linux' not in distro + + def test_distro_from_rootfs_detects_windows(self): + test_dir = self.extract_test_tar('distro/windows-container-rootfs.tar') + distro = Distro.from_rootfs(test_dir) + expected = {'identifier': 'windows', 'os': 'windows'} + results = {k: v for k, v in sorted(distro.to_dict().items()) if v} + assert results == expected + + def test_distro_from_rootfs_has_base_distro_merged(self): + base = Distro(os='windows', architecture='amd64') + test_dir = self.extract_test_tar('distro/windows-container-rootfs.tar') + distro = Distro.from_rootfs(test_dir, base_distro=base) + expected = { + 'architecture': 'amd64', + 'identifier': 'windows', + 'os': 'windows', + } + results = {k: v for k, v in sorted(distro.to_dict().items()) if v} + assert results == expected + + def test_distro_from_rootfs_raise_exception_if_different_base_distro_os(self): + base = Distro(os='freebsd') + test_dir = self.extract_test_tar('distro/windows-container-rootfs.tar') + try: + Distro.from_rootfs(test_dir, base_distro=base) + except Exception as e: + assert str(e) == 'Inconsistent base distro OS: freebsd and found distro OS : windows' diff --git a/tests/test_image.py b/tests/test_image.py index 0af5663..170bb02 100644 --- a/tests/test_image.py +++ b/tests/test_image.py @@ -65,6 +65,7 @@ def test_Image_get_images_from_tarball_windows(self): extracted_location=extract_dir, verify=False, )[0] + layer_extracted_location = self.get_temp_dir() image.extract_layers(extracted_location=layer_extracted_location) image.get_and_set_distro() From 3053fae078062d410eca2780dfd879cdc2bf91cc Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Fri, 4 Jun 2021 15:13:10 +0200 Subject: [PATCH 07/13] Use standardized comment headers and notices Signed-off-by: Philippe Ombredanne --- NOTICE | 3 ++- src/container_inspector/__init__.py | 15 +++++---------- src/container_inspector/cli.py | 15 +++++---------- src/container_inspector/distro.py | 15 +++++---------- src/container_inspector/dockerfile.py | 15 +++++---------- src/container_inspector/image.py | 15 +++++---------- src/container_inspector/rootfs.py | 15 +++++---------- src/container_inspector/utils.py | 15 +++++---------- tests/test_cli.py | 15 +++++---------- tests/test_distro.py | 15 +++++---------- tests/test_dockerfile.py | 15 +++++---------- tests/test_image.py | 15 +++++---------- tests/test_rootfs.py | 15 +++++---------- tests/utilities.py | 15 +++++---------- 14 files changed, 67 insertions(+), 131 deletions(-) diff --git a/NOTICE b/NOTICE index 65936b2..12de513 100644 --- a/NOTICE +++ b/NOTICE @@ -2,7 +2,8 @@ # Copyright (c) nexB Inc. and others. # SPDX-License-Identifier: Apache-2.0 # -# Visit https://aboutcode.org and https://github.com/nexB/ for support and download. +# Visit https://aboutcode.org and https://github.com/nexB/container-inspector +# for support and download. # ScanCode is a trademark of nexB Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/container_inspector/__init__.py b/src/container_inspector/__init__.py index deffe27..632f0d3 100644 --- a/src/container_inspector/__init__.py +++ b/src/container_inspector/__init__.py @@ -1,15 +1,10 @@ -# Copyright (c) nexB Inc. and others. All rights reserved. -# http://nexb.com and https://github.com/nexB/container-inspector/ # -# This software is licensed under the Apache License version 2.0.# +# Copyright (c) nexB Inc. and others. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/container-inspector for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. # -# You may not use this software except in compliance with the License. -# You may obtain a copy of the License at: -# http://apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software distributed -# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. import re diff --git a/src/container_inspector/cli.py b/src/container_inspector/cli.py index 9385485..916c636 100755 --- a/src/container_inspector/cli.py +++ b/src/container_inspector/cli.py @@ -1,15 +1,10 @@ -# Copyright (c) nexB Inc. and others. All rights reserved. -# http://nexb.com and https://github.com/nexB/container-inspector/ # -# This software is licensed under the Apache License version 2.0.# +# Copyright (c) nexB Inc. and others. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/container-inspector for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. # -# You may not use this software except in compliance with the License. -# You may obtain a copy of the License at: -# http://apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software distributed -# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. import csv as csv_module import json as json_module diff --git a/src/container_inspector/distro.py b/src/container_inspector/distro.py index 9ff6225..8defda8 100755 --- a/src/container_inspector/distro.py +++ b/src/container_inspector/distro.py @@ -1,15 +1,10 @@ -# Copyright (c) nexB Inc. and others. All rights reserved. -# http://nexb.com and https://github.com/nexB/container-inspector/ # -# This software is licensed under the Apache License version 2.0.# +# Copyright (c) nexB Inc. and others. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/container-inspector for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. # -# You may not use this software except in compliance with the License. -# You may obtain a copy of the License at: -# http://apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software distributed -# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. import logging import os diff --git a/src/container_inspector/dockerfile.py b/src/container_inspector/dockerfile.py index 26260d6..23c3b0d 100755 --- a/src/container_inspector/dockerfile.py +++ b/src/container_inspector/dockerfile.py @@ -1,15 +1,10 @@ -# Copyright (c) nexB Inc. and others. All rights reserved. -# http://nexb.com and https://github.com/nexB/container-inspector/ # -# This software is licensed under the Apache License version 2.0.# +# Copyright (c) nexB Inc. and others. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/container-inspector for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. # -# You may not use this software except in compliance with the License. -# You may obtain a copy of the License at: -# http://apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software distributed -# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. import logging import operator diff --git a/src/container_inspector/image.py b/src/container_inspector/image.py index 0f36fcc..4d6d2c0 100755 --- a/src/container_inspector/image.py +++ b/src/container_inspector/image.py @@ -1,15 +1,10 @@ -# Copyright (c) nexB Inc. and others. All rights reserved. -# http://nexb.com and https://github.com/nexB/container-inspector/ # -# This software is licensed under the Apache License version 2.0.# +# Copyright (c) nexB Inc. and others. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/container-inspector for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. # -# You may not use this software except in compliance with the License. -# You may obtain a copy of the License at: -# http://apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software distributed -# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. import logging import os diff --git a/src/container_inspector/rootfs.py b/src/container_inspector/rootfs.py index 593d75e..c35a0ca 100755 --- a/src/container_inspector/rootfs.py +++ b/src/container_inspector/rootfs.py @@ -1,15 +1,10 @@ -# Copyright (c) nexB Inc. and others. All rights reserved. -# http://nexb.com and https://github.com/nexB/container-inspector/ # -# This software is licensed under the Apache License version 2.0.# +# Copyright (c) nexB Inc. and others. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/container-inspector for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. # -# You may not use this software except in compliance with the License. -# You may obtain a copy of the License at: -# http://apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software distributed -# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. import logging import os diff --git a/src/container_inspector/utils.py b/src/container_inspector/utils.py index f496175..32aa4b2 100755 --- a/src/container_inspector/utils.py +++ b/src/container_inspector/utils.py @@ -1,15 +1,10 @@ -# Copyright (c) nexB Inc. and others. All rights reserved. -# http://nexb.com and https://github.com/nexB/container-inspector/ # -# This software is licensed under the Apache License version 2.0.# +# Copyright (c) nexB Inc. and others. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/container-inspector for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. # -# You may not use this software except in compliance with the License. -# You may obtain a copy of the License at: -# http://apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software distributed -# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. import json import logging diff --git a/tests/test_cli.py b/tests/test_cli.py index 4e52b1c..237f38f 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,15 +1,10 @@ -# Copyright (c) nexB Inc. and others. All rights reserved. -# http://nexb.com and https://github.com/nexB/container-inspector/ # -# This software is licensed under the Apache License version 2.0.# +# Copyright (c) nexB Inc. and others. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/container-inspector for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. # -# You may not use this software except in compliance with the License. -# You may obtain a copy of the License at: -# http://apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software distributed -# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. import os import json diff --git a/tests/test_distro.py b/tests/test_distro.py index 280c3ec..d512f81 100644 --- a/tests/test_distro.py +++ b/tests/test_distro.py @@ -1,15 +1,10 @@ -# Copyright (c) nexB Inc. and others. All rights reserved. -# http://nexb.com and https://github.com/nexB/container-inspector/ # -# This software is licensed under the Apache License version 2.0.# +# Copyright (c) nexB Inc. and others. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/container-inspector for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. # -# You may not use this software except in compliance with the License. -# You may obtain a copy of the License at: -# http://apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software distributed -# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. import os diff --git a/tests/test_dockerfile.py b/tests/test_dockerfile.py index 644d30b..e5937c9 100644 --- a/tests/test_dockerfile.py +++ b/tests/test_dockerfile.py @@ -1,15 +1,10 @@ -# Copyright (c) nexB Inc. and others. All rights reserved. -# http://nexb.com and https://github.com/nexB/container-inspector/ # -# This software is licensed under the Apache License version 2.0.# +# Copyright (c) nexB Inc. and others. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/container-inspector for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. # -# You may not use this software except in compliance with the License. -# You may obtain a copy of the License at: -# http://apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software distributed -# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. import os diff --git a/tests/test_image.py b/tests/test_image.py index 170bb02..f355dbe 100644 --- a/tests/test_image.py +++ b/tests/test_image.py @@ -1,15 +1,10 @@ -# Copyright (c) nexB Inc. and others. All rights reserved. -# http://nexb.com and https://github.com/nexB/container-inspector/ # -# This software is licensed under the Apache License version 2.0.# +# Copyright (c) nexB Inc. and others. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/container-inspector for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. # -# You may not use this software except in compliance with the License. -# You may obtain a copy of the License at: -# http://apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software distributed -# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. from os import path diff --git a/tests/test_rootfs.py b/tests/test_rootfs.py index d3a7a5d..0bad7d3 100644 --- a/tests/test_rootfs.py +++ b/tests/test_rootfs.py @@ -1,15 +1,10 @@ -# Copyright (c) nexB Inc. and others. All rights reserved. -# http://nexb.com and https://github.com/nexB/container-inspector/ # -# This software is licensed under the Apache License version 2.0.# +# Copyright (c) nexB Inc. and others. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/container-inspector for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. # -# You may not use this software except in compliance with the License. -# You may obtain a copy of the License at: -# http://apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software distributed -# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. import os diff --git a/tests/utilities.py b/tests/utilities.py index 534197d..24a20f8 100644 --- a/tests/utilities.py +++ b/tests/utilities.py @@ -1,15 +1,10 @@ -# Copyright (c) nexB Inc. and others. All rights reserved. -# http://nexb.com and https://github.com/nexB/container-inspector/ # -# This software is licensed under the Apache License version 2.0.# +# Copyright (c) nexB Inc. and others. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/container-inspector for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. # -# You may not use this software except in compliance with the License. -# You may obtain a copy of the License at: -# http://apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software distributed -# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. import json import os From 2626f6b648e9045fd6f9673ed9c08ba861beea12 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Fri, 4 Jun 2021 15:13:21 +0200 Subject: [PATCH 08/13] Improved README Signed-off-by: Philippe Ombredanne --- README.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/README.rst b/README.rst index f13fc70..685bcd1 100644 --- a/README.rst +++ b/README.rst @@ -77,4 +77,5 @@ Related tools ------------- - Fetching Image from remote registry is available in ScanCode.io - Extracting VM Image filesystems as archives is available in ExtractCode + - Scanning for application and system packages is available in ScanCode Toolkit From 1d19406a231be4c3faf0c3da4692f6be928ac3aa Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Fri, 4 Jun 2021 15:18:06 +0200 Subject: [PATCH 09/13] Correct setuptools alias Signed-off-by: Philippe Ombredanne --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 789e4d2..c0eb6b3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -62,4 +62,4 @@ docs= [aliases] -release = register clean --all sdist bdist_wheel \ No newline at end of file +release = clean --all sdist bdist_wheel \ No newline at end of file From 988aa8d1aee18e6771ca29f7c88339a3821933d1 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Fri, 4 Jun 2021 15:19:41 +0200 Subject: [PATCH 10/13] Add base dev deps to setup Signed-off-by: Philippe Ombredanne --- requirements_dev.txt | 4 ---- setup.cfg | 3 +++ 2 files changed, 3 insertions(+), 4 deletions(-) delete mode 100644 requirements_dev.txt diff --git a/requirements_dev.txt b/requirements_dev.txt deleted file mode 100644 index fa87180..0000000 --- a/requirements_dev.txt +++ /dev/null @@ -1,4 +0,0 @@ -pytest -# used for its tests classes -commoncode --e . diff --git a/setup.cfg b/setup.cfg index c0eb6b3..40e7104 100644 --- a/setup.cfg +++ b/setup.cfg @@ -55,6 +55,9 @@ testing = # upstream pytest >= 6 pytest-xdist >= 2 + twine + restview + docs= Sphinx>=3.3.1 sphinx-rtd-theme>=0.5.0 From d7fc37bc9e5ae18848923bd2f27d099b608f36af Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Fri, 4 Jun 2021 23:51:02 +0200 Subject: [PATCH 11/13] Add debug tracing to diagnose CI failures Signed-off-by: Philippe Ombredanne --- .travis.yml | 2 +- src/container_inspector/distro.py | 36 +++++++++++++++++++++----- src/container_inspector/image.py | 42 +++++++++++++++++++++++-------- src/container_inspector/rootfs.py | 14 ++++++++--- src/container_inspector/utils.py | 12 ++++++--- 5 files changed, 82 insertions(+), 24 deletions(-) diff --git a/.travis.yml b/.travis.yml index 1a90a38..02a1161 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,4 +19,4 @@ python: install: ./configure --dev # Scripts to run at script stage -script: tmp/bin/pytest +script: tmp/bin/pytest -vvs -n2 diff --git a/src/container_inspector/distro.py b/src/container_inspector/distro.py index 8defda8..b0e8e99 100755 --- a/src/container_inspector/distro.py +++ b/src/container_inspector/distro.py @@ -17,8 +17,9 @@ logger = logging.getLogger(__name__) # un-comment these lines to enable logging -# logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) -# logger.setLevel(logging.DEBUG) +import sys +logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) +logger.setLevel(logging.DEBUG) def logger_debug(*args): @@ -296,6 +297,7 @@ def from_os_release_file(cls, location): parsed """ if not location or not os.path.exists(location): + logger.debug(f'from_os_release_file: {location!r} does not exists') return data = parse_os_release(location) or {} @@ -332,6 +334,8 @@ def from_os_release_file(cls, location): if data: new_data['extra_data'] = data + logger.debug(f'from_os_release_file: new_data: {new_data!r}') + return cls(**new_data) from_file = from_os_release_file @@ -353,7 +357,10 @@ def from_rootfs(cls, location, base_distro=None): manifest) and may be missing from the rootfs proper (for instance of an /etc/os-release is missing in the rootfs for a Linux-based image). """ + logger.debug(f'from_rootfs: {location!r} base_distro: {base_distro!r}') + if not location or not os.path.exists(location): + logger.debug(f'from_rootfs: {location!r} does not exists') return finders = { @@ -363,8 +370,10 @@ def from_rootfs(cls, location, base_distro=None): } for finder_os, finder in finders.items(): + logger.debug(f'from_rootfs: trying finder_os: {finder_os!r}') found = finder(location) + logger.debug(f'from_rootfs: trying found: {found!r}') if found: if base_distro: if base_distro.os != finder_os: @@ -373,8 +382,12 @@ def from_rootfs(cls, location, base_distro=None): f'and found distro OS : {found.os}' ) - return base_distro.merge(found) + merged = base_distro.merge(found) + logger.debug(f'from_rootfs: returning merged: {merged!r}') + return merged + else: + logger.debug(f'from_rootfs: returning found: {found!r}') return found @classmethod @@ -398,7 +411,7 @@ def find_windows_details(cls, location): """ if rootfs.find_root( location, - max_depth=2, + max_depth=3, root_paths=rootfs.WINDOWS_PATHS, ): return cls(os='windows', identifier='windows',) @@ -444,6 +457,8 @@ def merge(self, other_distro): Return a new distro based on this Distro data updated with non-empty values from the ``other_distro`` Distro object. """ + logger.debug(f'merge: {self!r} with: {other_distro!r}') + existing = self.to_dict() if other_distro: other_non_empty = { @@ -451,6 +466,9 @@ def merge(self, other_distro): if v } existing.update(other_non_empty) + logger.debug(f'merge: updated data: {existing!r}') + + logger.debug(f'merge: merged data: {existing!r}') return type(self)(**existing) @@ -478,8 +496,14 @@ def parse_os_release(location): """ with open(location) as osrl: lines = (line.strip() for line in osrl) - lines = (line.partition('=') for line in lines if line and not line.startswith('#')) - return {key.strip(): ''.join(shlex.split(value)) for key, _, value in lines} + lines = ( + line.partition('=') for line in lines + if line and not line.startswith('#') + ) + return { + key.strip(): ''.join(shlex.split(value)) + for key, _, value in lines + } def get_debian_details(): diff --git a/src/container_inspector/image.py b/src/container_inspector/image.py index 4d6d2c0..2387c1d 100755 --- a/src/container_inspector/image.py +++ b/src/container_inspector/image.py @@ -23,8 +23,9 @@ logger = logging.getLogger(__name__) # un-comment these lines to enable logging -# logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) -# logger.setLevel(logging.DEBUG) +import sys +logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) +logger.setLevel(logging.DEBUG) def logger_debug(*args): @@ -418,6 +419,8 @@ def get_images_from_tarball( If `verify` is True, perform extra checks on the config data and layers checksums. """ + logger.debug(f'get_images_from_tarball: {archive_location} , extracting to: {extracted_location}') + Image.extract( archive_location=archive_location, extracted_location=extracted_location, @@ -442,10 +445,15 @@ def get_images_from_dir( If `verify` is True, perform extra checks on the config data and layers checksums. """ + logger.debug(f'get_images_from_dir: from {extracted_location} and archive_location: {archive_location}') + if not os.path.isdir(extracted_location): raise Exception(f'Not a directory: {extracted_location}') image_format = Image.find_format(extracted_location) + + logger.debug(f'get_images_from_dir: image_format: {image_format}') + if image_format == 'docker': return Image.get_docker_images_from_dir( extracted_location=extracted_location, @@ -453,7 +461,7 @@ def get_images_from_dir( verify=verify, ) - if image_format == 'docker': + if image_format == 'oci': return Image.get_oci_images_from_dir( extracted_location=extracted_location, archive_location=archive_location, @@ -502,6 +510,8 @@ def get_docker_images_from_dir( .... ] """ + logger.debug(f'get_docker_images_from_dir: {extracted_location}') + if not os.path.isdir(extracted_location): raise Exception(f'Not a directory: {extracted_location}') @@ -513,15 +523,21 @@ def get_docker_images_from_dir( manifest = load_json(manifest_loc) + logger.debug(f'get_docker_images_from_dir: manifest: {manifest}') + images = [] for manifest_config in manifest: - images.append( - Image.from_docker_manifest_config( - extracted_location=extracted_location, - archive_location=archive_location, - manifest_config=manifest_config, - verify=verify, - )) + logger.debug(f'get_docker_images_from_dir: manifest_config: {manifest_config}') + img = Image.from_docker_manifest_config( + extracted_location=extracted_location, + archive_location=archive_location, + manifest_config=manifest_config, + verify=verify, + + ) + logger.debug(f'get_docker_images_from_dir: img: {img!r}') + + images.append(img) return images @@ -596,6 +612,8 @@ def from_docker_manifest_config( } } """ + logger.debug(f'from_docker_manifest_config: manifest_config: {manifest_config!r}') + manifest_config = utils.lower_keys(manifest_config) config_file = manifest_config.get('config') or '' @@ -623,7 +641,9 @@ def from_docker_manifest_config( layer_paths = manifest_config.get('layers') or [] layers_archive_locs = [ - os.path.join(extracted_location, lp) for lp in layer_paths] + os.path.join(extracted_location, lp) + for lp in layer_paths + ] tags = manifest_config.get('repotags') or [] diff --git a/src/container_inspector/rootfs.py b/src/container_inspector/rootfs.py index c35a0ca..d3fd441 100755 --- a/src/container_inspector/rootfs.py +++ b/src/container_inspector/rootfs.py @@ -15,9 +15,9 @@ logger = logging.getLogger(__name__) # un-comment these lines to enable logging -# import sys -# logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) -# logger.setLevel(logging.DEBUG) +import sys +logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) +logger.setLevel(logging.DEBUG) """ Utilities to handle image and layer archives and recreate proper rootfs @@ -206,9 +206,17 @@ def find_root( `walker` is a callable that behaves the same as `os.walk() and is used for testing` """ + logger.debug( + f'find_root: {location} max_depth: {max_depth} ' + f'root_paths: {root_paths}, min_paths: {min_paths}' + ) for depth, (top, dirs, files) in enumerate(walker(location), 1): + logger.debug(f'find_root: depth={depth!r}, top={top!r} dirs={dirs!r} files={files!r}') matches = len(set(dirs + files) & root_paths) + logger.debug(f'find_root: top {top!r} matches: {matches}') if matches >= min_paths: + logger.debug(f'find_root: matches >= min_paths: returning {top!r}') return top if max_depth and depth == max_depth: + logger.debug(f'find_root: max_depth={max_depth!r}, depth={depth!r} returning None') return diff --git a/src/container_inspector/utils.py b/src/container_inspector/utils.py index 32aa4b2..3e3ef6f 100755 --- a/src/container_inspector/utils.py +++ b/src/container_inspector/utils.py @@ -17,8 +17,9 @@ logger = logging.getLogger(__name__) # un-comment these lines to enable logging -# logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) -# logger.setLevel(logging.DEBUG) +import sys +logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) +logger.setLevel(logging.DEBUG) def load_json(location): @@ -104,14 +105,19 @@ def extract_tar_keeping_symlinks(location, target_dir): Do not preserve the permissions and owners. Raise exceptions on possible problematic relative paths. """ - fileutils.create_dir(target_dir) import tarfile + logger.debug(f'extract_tar_keeping_symlinks: {location} to {target_dir}') + + fileutils.create_dir(target_dir) + + with tarfile.open(location) as tarball: # never extract character device, block and fifo files: # we extract dirs, files and links only for tinfo in tarball: if tinfo.isdev(): continue + logger.debug(f'extract_tar_keeping_symlinks: {tinfo}') tarball.extract( member=tinfo, path=target_dir, From 858189ee945efb481f3b7c75df9c417cc0099804 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Sat, 5 Jun 2021 10:34:43 +0200 Subject: [PATCH 12/13] Improve path depth computation Add separate function and tests Signed-off-by: Philippe Ombredanne --- src/container_inspector/rootfs.py | 63 +++++++++++++++++++++++-------- tests/test_rootfs.py | 15 +++++++- 2 files changed, 61 insertions(+), 17 deletions(-) diff --git a/src/container_inspector/rootfs.py b/src/container_inspector/rootfs.py index d3fd441..4681c52 100755 --- a/src/container_inspector/rootfs.py +++ b/src/container_inspector/rootfs.py @@ -12,6 +12,7 @@ from commoncode.fileutils import copytree from commoncode.fileutils import delete +from commoncode.paths import split logger = logging.getLogger(__name__) # un-comment these lines to enable logging @@ -188,6 +189,24 @@ def find_whiteouts(root_location, walker=os.walk): ]) +def compute_path_depth(root_path, dir_path): + """ + Compute the depth of ``dir_path`` below ``root_path`` as the number of paths + segments that extend below the root. + """ + if not dir_path: + return 0 + dir_path = dir_path.strip('/') + + if not root_path: + return len(split(dir_path)) + + root_path = root_path.strip('/') + + suffix = dir_path[len(root_path):] + return len(split(suffix)) + + def find_root( location, max_depth=3, @@ -197,26 +216,38 @@ def find_root( ): """ Return the first likely location of the root of a filesystem found in the - `location` directory and looking down up to `max_depth` directory levels - deep below the location directory. If `max_depth` == 0, look at full depth. - Search for well known directories listed in the `root_paths` set. A root - directory is return as found if at least `min_paths` exists as filenames or - directories under it. + ``location`` directory and below up and including to ``max_depth`` directory + levels deep below the ``location`` root directory. - `walker` is a callable that behaves the same as `os.walk() and is used - for testing` + If ``max_depth`` == 0, look at full depth. + + Search for well known directories listed in the ``root_paths`` set. A root + directory is returned as found if at least ``min_paths`` exists as filenames + or directories under it. + + ``walker`` is a callable behaving like ``os.walk()`` and is used for testing. """ logger.debug( - f'find_root: {location} max_depth: {max_depth} ' - f'root_paths: {root_paths}, min_paths: {min_paths}' + f'find_root: location={location!r}, max_depth={max_depth!r}, ' + f'root_paths={root_paths!r}, min_paths={min_paths!r}' ) - for depth, (top, dirs, files) in enumerate(walker(location), 1): - logger.debug(f'find_root: depth={depth!r}, top={top!r} dirs={dirs!r} files={files!r}') + depth = 0 + for top, dirs, files in walker(location): + logger.debug(f' find_root: top={top!r}, dirs={dirs!r}, files={files!r}') + if max_depth: + depth = compute_path_depth(location, top) + logger.debug(f' find_root: top depth={depth!r}') + if depth > max_depth: + logger.debug( + f' find_root: max_depth={max_depth!r}, ' + f'depth={depth!r} returning None') + return + matches = len(set(dirs + files) & root_paths) - logger.debug(f'find_root: top {top!r} matches: {matches}') + logger.debug(f' find_root: top={top!r}, matches={matches!r}') + if matches >= min_paths: - logger.debug(f'find_root: matches >= min_paths: returning {top!r}') + logger.debug(f' find_root: matches >= min_paths: returning {top!r}') return top - if max_depth and depth == max_depth: - logger.debug(f'find_root: max_depth={max_depth!r}, depth={depth!r} returning None') - return + + logger.debug(f'find_root: noting found: returning None') diff --git a/tests/test_rootfs.py b/tests/test_rootfs.py index 0bad7d3..ee8e6af 100644 --- a/tests/test_rootfs.py +++ b/tests/test_rootfs.py @@ -205,7 +205,7 @@ def test_rootfs_does_respects_max_depth(self): assert not rootfs.find_root(test_dir, max_depth=1) assert not rootfs.find_root(test_dir, max_depth=2) assert not rootfs.find_root(test_dir, max_depth=3) - assert not rootfs.find_root(test_dir, max_depth=4) + assert rootfs.find_root(test_dir, max_depth=4).endswith('level1/level2/level3') expected = '/find_root/level1/level2/level3' found = rootfs.find_root(test_dir, max_depth=5) @@ -216,3 +216,16 @@ def test_rootfs_does_respects_max_depth(self): found = rootfs.find_root(os.path.join(test_dir, 'find_root'), max_depth=4) assert found.replace(test_dir, '') == expected + + def test_rootfs_compute_path_depth(self): + assert rootfs.compute_path_depth(None, None) == 0 + assert rootfs.compute_path_depth('', '') == 0 + assert rootfs.compute_path_depth(None, 'foo') == 1 + assert rootfs.compute_path_depth('foo', None) == 0 + assert rootfs.compute_path_depth('/root', '/root/find_root') == 1 + assert rootfs.compute_path_depth('/root', '/root/one/2/') == 2 + assert rootfs.compute_path_depth('/root/', '/root/one/2/') == 2 + assert rootfs.compute_path_depth('root/', '/root/one/2') == 2 + assert rootfs.compute_path_depth('root/', '/root/') == 0 + assert rootfs.compute_path_depth('root/', '/root/') == 0 + assert rootfs.compute_path_depth('root/', '/root/1/2/3/4') == 4 From 1a43d8105245a9081f0eb006dd5e09bc95d42a65 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Sat, 5 Jun 2021 11:46:05 +0200 Subject: [PATCH 13/13] Guard debug tracing with a TRACE flag Signed-off-by: Philippe Ombredanne --- src/container_inspector/cli.py | 13 +++++----- src/container_inspector/distro.py | 34 ++++++++++++------------ src/container_inspector/dockerfile.py | 16 +++++++----- src/container_inspector/image.py | 29 ++++++++++----------- src/container_inspector/rootfs.py | 37 ++++++++++++++------------- src/container_inspector/utils.py | 13 +++++----- 6 files changed, 72 insertions(+), 70 deletions(-) diff --git a/src/container_inspector/cli.py b/src/container_inspector/cli.py index 916c636..56e2c5a 100755 --- a/src/container_inspector/cli.py +++ b/src/container_inspector/cli.py @@ -6,13 +6,13 @@ # See https://aboutcode.org for more information about nexB OSS projects. # -import csv as csv_module -import json as json_module import logging import os -from os import path import sys import tempfile +import csv as csv_module +import json as json_module +from os import path import click @@ -20,10 +20,11 @@ from container_inspector import dockerfile from container_inspector import rootfs +TRACE = False logger = logging.getLogger(__name__) -# un-comment these lines to enable logging -# logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) -# logger.setLevel(logging.DEBUG) +if TRACE: + logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) + logger.setLevel(logging.DEBUG) @click.command() diff --git a/src/container_inspector/distro.py b/src/container_inspector/distro.py index b0e8e99..a5e0ee9 100755 --- a/src/container_inspector/distro.py +++ b/src/container_inspector/distro.py @@ -15,15 +15,15 @@ from container_inspector import rootfs +TRACE = False logger = logging.getLogger(__name__) -# un-comment these lines to enable logging -import sys -logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) -logger.setLevel(logging.DEBUG) -def logger_debug(*args): - return logger.debug(' '.join(isinstance(a, str) and a or repr(a) for a in args)) + +if TRACE: + import sys + logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) + logger.setLevel(logging.DEBUG) """ Utilities to detect the "distro" of a root filesystem (be it a VM or rootfs @@ -297,7 +297,7 @@ def from_os_release_file(cls, location): parsed """ if not location or not os.path.exists(location): - logger.debug(f'from_os_release_file: {location!r} does not exists') + if TRACE: logger.debug(f'from_os_release_file: {location!r} does not exists') return data = parse_os_release(location) or {} @@ -334,7 +334,7 @@ def from_os_release_file(cls, location): if data: new_data['extra_data'] = data - logger.debug(f'from_os_release_file: new_data: {new_data!r}') + if TRACE: logger.debug(f'from_os_release_file: new_data: {new_data!r}') return cls(**new_data) @@ -357,10 +357,10 @@ def from_rootfs(cls, location, base_distro=None): manifest) and may be missing from the rootfs proper (for instance of an /etc/os-release is missing in the rootfs for a Linux-based image). """ - logger.debug(f'from_rootfs: {location!r} base_distro: {base_distro!r}') + if TRACE: logger.debug(f'from_rootfs: {location!r} base_distro: {base_distro!r}') if not location or not os.path.exists(location): - logger.debug(f'from_rootfs: {location!r} does not exists') + if TRACE: logger.debug(f'from_rootfs: {location!r} does not exists') return finders = { @@ -370,10 +370,10 @@ def from_rootfs(cls, location, base_distro=None): } for finder_os, finder in finders.items(): - logger.debug(f'from_rootfs: trying finder_os: {finder_os!r}') + if TRACE: logger.debug(f'from_rootfs: trying finder_os: {finder_os!r}') found = finder(location) - logger.debug(f'from_rootfs: trying found: {found!r}') + if TRACE: logger.debug(f'from_rootfs: trying found: {found!r}') if found: if base_distro: if base_distro.os != finder_os: @@ -383,11 +383,11 @@ def from_rootfs(cls, location, base_distro=None): ) merged = base_distro.merge(found) - logger.debug(f'from_rootfs: returning merged: {merged!r}') + if TRACE: logger.debug(f'from_rootfs: returning merged: {merged!r}') return merged else: - logger.debug(f'from_rootfs: returning found: {found!r}') + if TRACE: logger.debug(f'from_rootfs: returning found: {found!r}') return found @classmethod @@ -457,7 +457,7 @@ def merge(self, other_distro): Return a new distro based on this Distro data updated with non-empty values from the ``other_distro`` Distro object. """ - logger.debug(f'merge: {self!r} with: {other_distro!r}') + if TRACE: logger.debug(f'merge: {self!r} with: {other_distro!r}') existing = self.to_dict() if other_distro: @@ -466,9 +466,9 @@ def merge(self, other_distro): if v } existing.update(other_non_empty) - logger.debug(f'merge: updated data: {existing!r}') + if TRACE: logger.debug(f'merge: updated data: {existing!r}') - logger.debug(f'merge: merged data: {existing!r}') + if TRACE: logger.debug(f'merge: merged data: {existing!r}') return type(self)(**existing) diff --git a/src/container_inspector/dockerfile.py b/src/container_inspector/dockerfile.py index 23c3b0d..7b6a851 100755 --- a/src/container_inspector/dockerfile.py +++ b/src/container_inspector/dockerfile.py @@ -8,15 +8,17 @@ import logging import operator +import os from os import path import dockerfile_parse -import os +TRACE = False logger = logging.getLogger(__name__) -# un-comment these lines to enable logging -# logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) -# logger.setLevel(logging.DEBUG) +if TRACE: + import sys + logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) + logger.setLevel(logging.DEBUG) """ Analysis helper for Docker Dockerfiles. @@ -32,7 +34,7 @@ def get_dockerfile(location): if not 'Dockerfile' in fn: return {} - logger.debug('Found Dockerfile at: %(location)r' % locals()) + if TRACE: logger.debug('Found Dockerfile at: %(location)r' % locals()) try: # TODO: keep comments instead of ignoring them: @@ -52,7 +54,7 @@ def get_dockerfile(location): df_data['instructions'].append(entry) return {location: df_data} except: - logger.debug('Error parsing Dockerfile at: %(location)r' % locals()) + if TRACE: logger.debug('Error parsing Dockerfile at: %(location)r' % locals()) return {} @@ -80,7 +82,7 @@ def collect_dockerfiles(location): for top, dirs, files in os.walk(location): for f in files: dfiles.update(get_dockerfile(path.join(top, f))) - logger.debug('collect_dockerfiles: %(dfiles)r' % locals()) + if TRACE: logger.debug('collect_dockerfiles: %(dfiles)r' % locals()) return dfiles diff --git a/src/container_inspector/image.py b/src/container_inspector/image.py index 2387c1d..9be77eb 100755 --- a/src/container_inspector/image.py +++ b/src/container_inspector/image.py @@ -21,15 +21,12 @@ from container_inspector.utils import load_json from container_inspector.utils import sha256_digest +TRACE = False logger = logging.getLogger(__name__) -# un-comment these lines to enable logging -import sys -logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) -logger.setLevel(logging.DEBUG) - - -def logger_debug(*args): - return logger.debug(' '.join(isinstance(a, str) and a or repr(a) for a in args)) +if TRACE: + import sys + logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) + logger.setLevel(logging.DEBUG) """ Objects to handle Docker and OCI images and Layers. @@ -419,7 +416,7 @@ def get_images_from_tarball( If `verify` is True, perform extra checks on the config data and layers checksums. """ - logger.debug(f'get_images_from_tarball: {archive_location} , extracting to: {extracted_location}') + if TRACE: logger.debug(f'get_images_from_tarball: {archive_location} , extracting to: {extracted_location}') Image.extract( archive_location=archive_location, @@ -445,14 +442,14 @@ def get_images_from_dir( If `verify` is True, perform extra checks on the config data and layers checksums. """ - logger.debug(f'get_images_from_dir: from {extracted_location} and archive_location: {archive_location}') + if TRACE: logger.debug(f'get_images_from_dir: from {extracted_location} and archive_location: {archive_location}') if not os.path.isdir(extracted_location): raise Exception(f'Not a directory: {extracted_location}') image_format = Image.find_format(extracted_location) - logger.debug(f'get_images_from_dir: image_format: {image_format}') + if TRACE: logger.debug(f'get_images_from_dir: image_format: {image_format}') if image_format == 'docker': return Image.get_docker_images_from_dir( @@ -510,7 +507,7 @@ def get_docker_images_from_dir( .... ] """ - logger.debug(f'get_docker_images_from_dir: {extracted_location}') + if TRACE: logger.debug(f'get_docker_images_from_dir: {extracted_location}') if not os.path.isdir(extracted_location): raise Exception(f'Not a directory: {extracted_location}') @@ -523,11 +520,11 @@ def get_docker_images_from_dir( manifest = load_json(manifest_loc) - logger.debug(f'get_docker_images_from_dir: manifest: {manifest}') + if TRACE: logger.debug(f'get_docker_images_from_dir: manifest: {manifest}') images = [] for manifest_config in manifest: - logger.debug(f'get_docker_images_from_dir: manifest_config: {manifest_config}') + if TRACE: logger.debug(f'get_docker_images_from_dir: manifest_config: {manifest_config}') img = Image.from_docker_manifest_config( extracted_location=extracted_location, archive_location=archive_location, @@ -535,7 +532,7 @@ def get_docker_images_from_dir( verify=verify, ) - logger.debug(f'get_docker_images_from_dir: img: {img!r}') + if TRACE: logger.debug(f'get_docker_images_from_dir: img: {img!r}') images.append(img) @@ -612,7 +609,7 @@ def from_docker_manifest_config( } } """ - logger.debug(f'from_docker_manifest_config: manifest_config: {manifest_config!r}') + if TRACE: logger.debug(f'from_docker_manifest_config: manifest_config: {manifest_config!r}') manifest_config = utils.lower_keys(manifest_config) diff --git a/src/container_inspector/rootfs.py b/src/container_inspector/rootfs.py index 4681c52..6fdbb55 100755 --- a/src/container_inspector/rootfs.py +++ b/src/container_inspector/rootfs.py @@ -14,11 +14,12 @@ from commoncode.fileutils import delete from commoncode.paths import split +TRACE = False logger = logging.getLogger(__name__) -# un-comment these lines to enable logging -import sys -logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) -logger.setLevel(logging.DEBUG) +if TRACE: + import sys + logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) + logger.setLevel(logging.DEBUG) """ Utilities to handle image and layer archives and recreate proper rootfs @@ -64,7 +65,7 @@ def rebuild_rootfs(img, target_dir): deletions = [] for layer_num, layer in enumerate(img.layers): - logger.debug( + if TRACE: logger.debug( f'Extracting layer {layer_num} - {layer.layer_id} ' f'tarball: {layer.archive_location}' ) @@ -73,16 +74,16 @@ def rebuild_rootfs(img, target_dir): # Note that we are not preserving any special file and any file permission extracted_loc = tempfile.mkdtemp('container_inspector-docker') layer.extract(extracted_location=extracted_loc) - logger.debug(f' Extracted layer to: {extracted_loc}') + if TRACE: logger.debug(f' Extracted layer to: {extracted_loc}') # 2. find whiteouts in that layer. whiteouts = list(find_whiteouts(extracted_loc)) - logger.debug(' Merging extracted layers and applying unionfs whiteouts') - logger.debug(' Whiteouts:\n' + ' \n'.join(map(repr, whiteouts))) + if TRACE: logger.debug(' Merging extracted layers and applying unionfs whiteouts') + if TRACE: logger.debug(' Whiteouts:\n' + ' \n'.join(map(repr, whiteouts))) # 3. remove whiteouts in the previous layer stack (e.g. the WIP rootfs) for whiteout_marker_loc, whiteable_path in whiteouts: - logger.debug(f' Deleting dir or file with whiteout marker: {whiteout_marker_loc}') + if TRACE: logger.debug(f' Deleting dir or file with whiteout marker: {whiteout_marker_loc}') whiteable_loc = os.path.join(target_dir, whiteable_path) delete(whiteable_loc) # also delete the whiteout marker file @@ -90,9 +91,9 @@ def rebuild_rootfs(img, target_dir): deletions.append(whiteable_loc) # 4. finall copy/overwrite the extracted layer over the WIP rootfs - logger.debug(f' Moving extracted layer from: {extracted_loc} to: {target_dir}') + if TRACE: logger.debug(f' Moving extracted layer from: {extracted_loc} to: {target_dir}') copytree(extracted_loc, target_dir) - logger.debug(f' Moved layer to: {target_dir}') + if TRACE: logger.debug(f' Moved layer to: {target_dir}') delete(extracted_loc) return deletions @@ -227,27 +228,27 @@ def find_root( ``walker`` is a callable behaving like ``os.walk()`` and is used for testing. """ - logger.debug( + if TRACE: logger.debug( f'find_root: location={location!r}, max_depth={max_depth!r}, ' f'root_paths={root_paths!r}, min_paths={min_paths!r}' ) depth = 0 for top, dirs, files in walker(location): - logger.debug(f' find_root: top={top!r}, dirs={dirs!r}, files={files!r}') + if TRACE: logger.debug(f' find_root: top={top!r}, dirs={dirs!r}, files={files!r}') if max_depth: depth = compute_path_depth(location, top) - logger.debug(f' find_root: top depth={depth!r}') + if TRACE: logger.debug(f' find_root: top depth={depth!r}') if depth > max_depth: - logger.debug( + if TRACE: logger.debug( f' find_root: max_depth={max_depth!r}, ' f'depth={depth!r} returning None') return matches = len(set(dirs + files) & root_paths) - logger.debug(f' find_root: top={top!r}, matches={matches!r}') + if TRACE: logger.debug(f' find_root: top={top!r}, matches={matches!r}') if matches >= min_paths: - logger.debug(f' find_root: matches >= min_paths: returning {top!r}') + if TRACE: logger.debug(f' find_root: matches >= min_paths: returning {top!r}') return top - logger.debug(f'find_root: noting found: returning None') + if TRACE: logger.debug(f'find_root: noting found: returning None') diff --git a/src/container_inspector/utils.py b/src/container_inspector/utils.py index 3e3ef6f..f54dd71 100755 --- a/src/container_inspector/utils.py +++ b/src/container_inspector/utils.py @@ -15,11 +15,12 @@ from extractcode.extract import extract_file +TRACE = False logger = logging.getLogger(__name__) -# un-comment these lines to enable logging -import sys -logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) -logger.setLevel(logging.DEBUG) +if TRACE: + import sys + logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) + logger.setLevel(logging.DEBUG) def load_json(location): @@ -106,7 +107,7 @@ def extract_tar_keeping_symlinks(location, target_dir): Raise exceptions on possible problematic relative paths. """ import tarfile - logger.debug(f'extract_tar_keeping_symlinks: {location} to {target_dir}') + if TRACE: logger.debug(f'extract_tar_keeping_symlinks: {location} to {target_dir}') fileutils.create_dir(target_dir) @@ -117,7 +118,7 @@ def extract_tar_keeping_symlinks(location, target_dir): for tinfo in tarball: if tinfo.isdev(): continue - logger.debug(f'extract_tar_keeping_symlinks: {tinfo}') + if TRACE: logger.debug(f'extract_tar_keeping_symlinks: {tinfo}') tarball.extract( member=tinfo, path=target_dir,