diff --git a/.gitignore b/.gitignore index 38db911..78558e2 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ /Lib /pip-selfcheck.json /tmp +/venv .Python /include /Include diff --git a/.travis.yml b/.travis.yml index 02a1161..18ae646 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,4 +19,4 @@ python: install: ./configure --dev # Scripts to run at script stage -script: tmp/bin/pytest -vvs -n2 +script: venv/bin/pytest -vvs -n2 diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 4e543a1..021ad92 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,19 @@ Changelog ========= + +v30.0.0 +-------- + +This is a minor release with bug fixes and minor updates. + +- Switched back to semver from calver like other AboutCode projects. +- Adopted the latest skeleton. With this the virtualenv is created under venv. +- Add new "layer_path_segments" argument to image.Image.to_dict() to allow + to report the Layer extracted locations as trimmed paths keeping only this + many trailing path segments. + + v21.6.10 -------- diff --git a/README.rst b/README.rst index 685bcd1..c5d12d0 100644 --- a/README.rst +++ b/README.rst @@ -25,8 +25,8 @@ Quick start - Only runs on POSIX OSes - Get Python 3.6+ -- Check out a clone or download of container-inspector, then run: `./configure`. -- Then run `tmp/bin/container-inspector -h` for help. +- Check out a clone or download of container-inspector, then run: `./configure --dev`. +- Then run `env/bin/container-inspector -h` for help. Container image formats diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 61517b9..b811992 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -7,21 +7,13 @@ jobs: - - template: etc/ci/azure-posix.yml - parameters: - job_name: ubuntu16_cpython - image_name: ubuntu-16.04 - python_versions: ['3.6', '3.7', '3.8', '3.9'] - test_suites: - all: tmp/bin/pytest -vvs - - template: etc/ci/azure-posix.yml parameters: job_name: ubuntu18_cpython image_name: ubuntu-18.04 python_versions: ['3.6', '3.7', '3.8', '3.9'] test_suites: - all: tmp/bin/pytest -n 2 -vvs + all: venv/bin/pytest -n 2 -vvs - template: etc/ci/azure-posix.yml parameters: @@ -29,7 +21,7 @@ jobs: image_name: ubuntu-20.04 python_versions: ['3.6', '3.7', '3.8', '3.9'] test_suites: - all: tmp/bin/pytest -n 2 -vvs + all: venv/bin/pytest -n 2 -vvs - template: etc/ci/azure-posix.yml parameters: @@ -37,7 +29,7 @@ jobs: image_name: macos-10.14 python_versions: ['3.6', '3.7', '3.8', '3.9'] test_suites: - all: tmp/bin/pytest -n 2 -vvs + all: venv/bin/pytest -n 2 -vvs - template: etc/ci/azure-posix.yml parameters: @@ -45,4 +37,4 @@ jobs: image_name: macos-10.15 python_versions: ['3.6', '3.7', '3.8', '3.9'] test_suites: - all: tmp/bin/pytest -n 2 -vvs + all: venv/bin/pytest -n 2 -vvs diff --git a/configure b/configure index 25ab0ce..fdfdc85 100755 --- a/configure +++ b/configure @@ -11,7 +11,7 @@ set -e #set -x ################################ -# A configuration script to set things up: +# A configuration script to set things up: # create a virtualenv and install or update thirdparty packages. # Source this script for initial configuration # Use configure --help for details @@ -26,16 +26,16 @@ CLI_ARGS=$1 ################################ # Requirement arguments passed to pip and used by default or with --dev. -REQUIREMENTS="--editable ." -DEV_REQUIREMENTS="--editable .[testing]" +REQUIREMENTS="--editable . --constraint requirements.txt" +DEV_REQUIREMENTS="--editable .[testing] --constraint requirements.txt --constraint requirements-dev.txt" # where we create a virtualenv -VIRTUALENV_DIR=tmp +VIRTUALENV_DIR=venv # Cleanable files and directories with the --clean option CLEANABLE=" build - tmp" + venv" # extra arguments passed to pip PIP_EXTRA_ARGS=" " @@ -50,9 +50,15 @@ VIRTUALENV_PYZ_URL=https://bootstrap.pypa.io/virtualenv.pyz CFG_ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" CFG_BIN_DIR=$CFG_ROOT_DIR/$VIRTUALENV_DIR/bin +# Find packages from the local thirdparty directory or from thirdparty.aboutcode.org +if [ -f "$CFG_ROOT_DIR/thirdparty" ]; then + PIP_EXTRA_ARGS="--find-links $CFG_ROOT_DIR/thirdparty " +fi +PIP_EXTRA_ARGS="$PIP_EXTRA_ARGS --find-links https://thirdparty.aboutcode.org/pypi" + ################################ -# Set the quiet flag to empty if not defined +# Set the quiet flag to empty if not defined if [[ "$CFG_QUIET" == "" ]]; then CFG_QUIET=" " fi @@ -63,7 +69,7 @@ fi # Use environment variables or a file if available. # Otherwise the latest Python by default. if [[ "$PYTHON_EXECUTABLE" == "" ]]; then - # check for a file named PYTHON_EXECUTABLE + # check for a file named PYTHON_EXECUTABLE if [ -f "$CFG_ROOT_DIR/PYTHON_EXECUTABLE" ]; then PYTHON_EXECUTABLE=$(cat "$CFG_ROOT_DIR/PYTHON_EXECUTABLE") else @@ -78,10 +84,14 @@ cli_help() { echo " usage: ./configure [options]" echo echo The default is to configure for regular use. Use --dev for development. + echo Use the --init option if starting a new project and the project + echo dependencies are not available on thirdparty.aboutcode.org/pypi/ + echo and requirements.txt and/or requirements-dev.txt has not been generated. echo echo The options are: echo " --clean: clean built and installed files and exit." echo " --dev: configure the environment for development." + echo " --init: pull dependencies from PyPI. Used when first setting up a project." echo " --help: display this help message and exit." echo echo By default, the python interpreter version found in the path is used. @@ -120,7 +130,7 @@ create_virtualenv() { VIRTUALENV_PYZ="$CFG_ROOT_DIR/etc/thirdparty/virtualenv.pyz" else VIRTUALENV_PYZ="$CFG_ROOT_DIR/$VENV_DIR/virtualenv.pyz" - wget -O "$VIRTUALENV_PYZ" "$VIRTUALENV_PYZ_URL" + wget -O "$VIRTUALENV_PYZ" "$VIRTUALENV_PYZ_URL" 2>/dev/null || curl -o "$VIRTUALENV_PYZ" "$VIRTUALENV_PYZ_URL" fi $PYTHON_EXECUTABLE "$VIRTUALENV_PYZ" \ @@ -155,12 +165,22 @@ install_packages() { # Main command line entry point CFG_DEV_MODE=0 CFG_REQUIREMENTS=$REQUIREMENTS - -case "$CLI_ARGS" in - --help) cli_help;; - --clean) clean;; - --dev) CFG_REQUIREMENTS="$DEV_REQUIREMENTS" && CFG_DEV_MODE=1;; -esac +NO_INDEX="--no-index" + +# We are using getopts to parse option arguments that start with "-" +while getopts :-: optchar; do + case "${optchar}" in + -) + case "${OPTARG}" in + help ) cli_help;; + clean ) clean;; + dev ) CFG_REQUIREMENTS="$DEV_REQUIREMENTS" && CFG_DEV_MODE=1;; + init ) NO_INDEX="";; + esac;; + esac +done + +PIP_EXTRA_ARGS="$PIP_EXTRA_ARGS $NO_INDEX" create_virtualenv "$VIRTUALENV_DIR" install_packages "$CFG_REQUIREMENTS" diff --git a/configure.bat b/configure.bat new file mode 100644 index 0000000..46ed4b3 --- /dev/null +++ b/configure.bat @@ -0,0 +1,204 @@ +@echo OFF +@setlocal + +@rem Copyright (c) nexB Inc. and others. All rights reserved. +@rem SPDX-License-Identifier: Apache-2.0 +@rem See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +@rem See https://github.com/nexB/ for support or download. +@rem See https://aboutcode.org for more information about nexB OSS projects. + + +@rem ################################ +@rem # A configuration script to set things up: +@rem # create a virtualenv and install or update thirdparty packages. +@rem # Source this script for initial configuration +@rem # Use configure --help for details + +@rem # This script will search for a virtualenv.pyz app in etc\thirdparty\virtualenv.pyz +@rem # Otherwise it will download the latest from the VIRTUALENV_PYZ_URL default +@rem ################################ + + +@rem ################################ +@rem # Defaults. Change these variables to customize this script +@rem ################################ + +@rem # Requirement arguments passed to pip and used by default or with --dev. +set "REQUIREMENTS=--editable . --constraint requirements.txt" +set "DEV_REQUIREMENTS=--editable .[testing] --constraint requirements.txt --constraint requirements-dev.txt" + +@rem # where we create a virtualenv +set "VIRTUALENV_DIR=venv" + +@rem # Cleanable files and directories to delete with the --clean option +set "CLEANABLE=build venv" + +@rem # extra arguments passed to pip +set "PIP_EXTRA_ARGS= " + +@rem # the URL to download virtualenv.pyz if needed +set VIRTUALENV_PYZ_URL=https://bootstrap.pypa.io/virtualenv.pyz +@rem ################################ + + +@rem ################################ +@rem # Current directory where this script lives +set CFG_ROOT_DIR=%~dp0 +set "CFG_BIN_DIR=%CFG_ROOT_DIR%\%VIRTUALENV_DIR%\Scripts" + + +@rem ################################ +@rem # Thirdparty package locations and index handling +if exist ""%CFG_ROOT_DIR%\thirdparty"" ( + set "PIP_EXTRA_ARGS=--find-links %CFG_ROOT_DIR%\thirdparty " +) + +set "PIP_EXTRA_ARGS=%PIP_EXTRA_ARGS% --find-links https://thirdparty.aboutcode.org/pypi" & %INDEX_ARG% +@rem ################################ + + +@rem ################################ +@rem # Set the quiet flag to empty if not defined +if not defined CFG_QUIET ( + set "CFG_QUIET= " +) + + +@rem ################################ +@rem # Main command line entry point +set CFG_DEV_MODE=0 +set "CFG_REQUIREMENTS=%REQUIREMENTS%" +set "NO_INDEX=--no-index" + +:again +if not "%1" == "" ( + if "%1" EQU "--help" (goto cli_help) + if "%1" EQU "--clean" (goto clean) + if "%1" EQU "--dev" ( + set "CFG_REQUIREMENTS=%DEV_REQUIREMENTS%" + set CFG_DEV_MODE=1 + ) + if "%1" EQU "--init" ( + set "NO_INDEX= " + ) + shift + goto again +) + +set "PIP_EXTRA_ARGS=%PIP_EXTRA_ARGS% %NO_INDEX%" + + +@rem ################################ +@rem # find a proper Python to run +@rem # Use environment variables or a file if available. +@rem # Otherwise the latest Python by default. +if not defined PYTHON_EXECUTABLE ( + @rem # check for a file named PYTHON_EXECUTABLE + if exist ""%CFG_ROOT_DIR%\PYTHON_EXECUTABLE"" ( + set /p PYTHON_EXECUTABLE=<""%CFG_ROOT_DIR%\PYTHON_EXECUTABLE"" + ) else ( + set "PYTHON_EXECUTABLE=py" + ) +) + +:create_virtualenv +@rem # create a virtualenv for Python +@rem # Note: we do not use the bundled Python 3 "venv" because its behavior and +@rem # presence is not consistent across Linux distro and sometimes pip is not +@rem # included either by default. The virtualenv.pyz app cures all these issues. + +if not exist ""%CFG_BIN_DIR%\python.exe"" ( + if not exist "%CFG_BIN_DIR%" ( + mkdir %CFG_BIN_DIR% + ) + + if exist ""%CFG_ROOT_DIR%\etc\thirdparty\virtualenv.pyz"" ( + %PYTHON_EXECUTABLE% "%CFG_ROOT_DIR%\etc\thirdparty\virtualenv.pyz" ^ + --wheel embed --pip embed --setuptools embed ^ + --seeder pip ^ + --never-download ^ + --no-periodic-update ^ + --no-vcs-ignore ^ + %CFG_QUIET% ^ + %CFG_ROOT_DIR%\%VIRTUALENV_DIR% + ) else ( + if not exist ""%CFG_ROOT_DIR%\%VIRTUALENV_DIR%\virtualenv.pyz"" ( + curl -o "%CFG_ROOT_DIR%\%VIRTUALENV_DIR%\virtualenv.pyz" %VIRTUALENV_PYZ_URL% + + if %ERRORLEVEL% neq 0 ( + exit /b %ERRORLEVEL% + ) + ) + %PYTHON_EXECUTABLE% "%CFG_ROOT_DIR%\%VIRTUALENV_DIR%\virtualenv.pyz" ^ + --wheel embed --pip embed --setuptools embed ^ + --seeder pip ^ + --never-download ^ + --no-periodic-update ^ + --no-vcs-ignore ^ + %CFG_QUIET% ^ + %CFG_ROOT_DIR%\%VIRTUALENV_DIR% + ) +) + +if %ERRORLEVEL% neq 0 ( + exit /b %ERRORLEVEL% +) + + +:install_packages +@rem # install requirements in virtualenv +@rem # note: --no-build-isolation means that pip/wheel/setuptools will not +@rem # be reinstalled a second time and reused from the virtualenv and this +@rem # speeds up the installation. +@rem # We always have the PEP517 build dependencies installed already. + +%CFG_BIN_DIR%\pip install ^ + --upgrade ^ + --no-build-isolation ^ + %CFG_QUIET% ^ + %PIP_EXTRA_ARGS% ^ + %CFG_REQUIREMENTS% + +@rem # Create junction to bin to have the same directory between linux and windows +mklink /J %CFG_ROOT_DIR%\%VIRTUALENV_DIR%\bin %CFG_ROOT_DIR%\%VIRTUALENV_DIR%\Scripts + +if %ERRORLEVEL% neq 0 ( + exit /b %ERRORLEVEL% +) + +exit /b 0 + + +@rem ################################ + +:cli_help + echo An initial configuration script + echo " usage: configure [options]" + echo " " + echo The default is to configure for regular use. Use --dev for development. + echo Use the --init option if starting a new project and the project + echo dependencies are not available on thirdparty.aboutcode.org/pypi/ + echo and requirements.txt and/or requirements-dev.txt has not been generated. + echo " " + echo The options are: + echo " --clean: clean built and installed files and exit." + echo " --dev: configure the environment for development." + echo " --init: pull dependencies from PyPI. Used when first setting up a project." + echo " --help: display this help message and exit." + echo " " + echo By default, the python interpreter version found in the path is used. + echo Alternatively, the PYTHON_EXECUTABLE environment variable can be set to + echo configure another Python executable interpreter to use. If this is not + echo set, a file named PYTHON_EXECUTABLE containing a single line with the + echo path of the Python executable to use will be checked last. + exit /b 0 + + +:clean +@rem # Remove cleanable file and directories and files from the root dir. +echo "* Cleaning ..." +for %%F in (%CLEANABLE%) do ( + rmdir /s /q "%CFG_ROOT_DIR%\%%F" >nul 2>&1 + del /f /q "%CFG_ROOT_DIR%\%%F" >nul 2>&1 +) +exit /b 0 diff --git a/docs/skeleton-usage.rst b/docs/skeleton-usage.rst new file mode 100644 index 0000000..7d16259 --- /dev/null +++ b/docs/skeleton-usage.rst @@ -0,0 +1,157 @@ +Usage +===== +A brand new project +------------------- +.. code-block:: bash + + git init my-new-repo + cd my-new-repo + git pull git@github.com:nexB/skeleton + + # Create the new repo on GitHub, then update your remote + git remote set-url origin git@github.com:nexB/your-new-repo.git + +From here, you can make the appropriate changes to the files for your specific project. + +Update an existing project +--------------------------- +.. code-block:: bash + + cd my-existing-project + git remote add skeleton git@github.com:nexB/skeleton + git fetch skeleton + git merge skeleton/main --allow-unrelated-histories + +This is also the workflow to use when updating the skeleton files in any given repository. + +Customizing +----------- + +You typically want to perform these customizations: + +- remove or update the src/README.rst and tests/README.rst files +- set project info and dependencies in setup.cfg +- check the configure and configure.bat defaults + +Initializing a project +---------------------- + +All projects using the skeleton will be expected to pull all of it dependencies +from thirdparty.aboutcode.org/pypi or the local thirdparty directory, using +requirements.txt and/or requirements-dev.txt to determine what version of a +package to collect. By default, PyPI will not be used to find and collect +packages from. + +In the case where we are starting a new project where we do not have +requirements.txt and requirements-dev.txt and whose dependencies are not yet on +thirdparty.aboutcode.org/pypi, we run the following command after adding and +customizing the skeleton files to your project: + +.. code-block:: bash + + ./configure --init + +This will initialize the virtual environment for the project, pull in the +dependencies from PyPI and add them to the virtual environment. + +Generating requirements.txt and requirements-dev.txt +---------------------------------------------------- + +After the project has been initialized, we can generate the requirements.txt and +requirements-dev.txt files. + +Ensure the virtual environment is enabled. + +.. code-block:: bash + + source venv/bin/activate + +To generate requirements.txt: + +.. code-block:: bash + + python etc/scripts/gen_requirements.py -s venv/lib/python/site-packages/ + +Replace \ with the version number of the Python being used, for example: ``venv/lib/python3.6/site-packages/`` + +To generate requirements-dev.txt after requirements.txt has been generated: + +.. code-block:: bash + ./configure --init --dev + python etc/scripts/gen_requirements_dev.py -s venv/lib/python/site-packages/ + +Note: on Windows, the ``site-packages`` directory is located at ``venv\Lib\site-packages\`` + +.. code-block:: bash + + python .\\etc\\scripts\\gen_requirements.py -s .\\venv\\Lib\\site-packages\\ + .\configure --init --dev + python .\\etc\\scripts\\gen_requirements_dev.py -s .\\venv\\Lib\\site-packages\\ + +Collecting and generating ABOUT files for dependencies +------------------------------------------------------ + +Ensure that the dependencies used by ``etc/scripts/bootstrap.py`` are installed: + +.. code-block:: bash + + pip install -r etc/scripts/requirements.txt + +Once we have requirements.txt and requirements-dev.txt, we can fetch the project +dependencies as wheels and generate ABOUT files for them: + +.. code-block:: bash + + python etc/scripts/bootstrap.py -r requirements.txt -r requirements-dev.txt --with-deps + +There may be issues with the generated ABOUT files, which will have to be +corrected. You can check to see if your corrections are valid by running: + +.. code-block:: bash + + python etc/scripts/check_thirdparty.py -d thirdparty + +Once the wheels are collected and the ABOUT files are generated and correct, +upload them to thirdparty.aboutcode.org/pypi by placing the wheels and ABOUT +files from the thirdparty directory to the pypi directory at +https://github.com/nexB/thirdparty-packages + + +Usage after project initialization +---------------------------------- + +Once the ``requirements.txt`` and ``requirements-dev.txt`` have been generated +and the project dependencies and their ABOUT files have been uploaded to +thirdparty.aboutcode.org/pypi, you can configure the project without using the +``--init`` option. + +If the virtual env for the project becomes polluted, or you would like to remove +it, use the ``--clean`` option: + +.. code-block:: bash + + ./configure --clean + +Then you can run ``./configure`` again to set up the project virtual environment. + +To set up the project for development use: + +.. code-block:: bash + + ./configure --dev + +To update the project dependencies (adding, removing, updating packages, etc.), +update the dependencies in ``setup.cfg``, then run: + +.. code-block:: bash + + ./configure --clean # Remove existing virtual environment + ./configure --init # Create project virtual environment, pull in new dependencies + source venv/bin/activate # Ensure virtual environment is activated + python etc/scripts/gen_requirements.py -s venv/lib/python/site-packages/ # Regenerate requirements.txt + python etc/scripts/gen_requirements_dev.py -s venv/lib/python/site-packages/ # Regenerate requirements-dev.txt + pip install -r etc/scripts/requirements.txt # Install dependencies needed by etc/scripts/bootstrap.py + python etc/scripts/bootstrap.py -r requirements.txt -r requirements-dev.txt --with-deps # Collect dependency wheels and their ABOUT files + +Ensure that the generated ABOUT files are valid, then take the dependency wheels +and ABOUT files and upload them to thirdparty.aboutcode.org/pypi. diff --git a/etc/scripts/README.rst b/etc/scripts/README.rst new file mode 100755 index 0000000..d8b00f9 --- /dev/null +++ b/etc/scripts/README.rst @@ -0,0 +1,143 @@ +This directory contains the tools to manage a directory of thirdparty Python +package source, wheels and metadata pin, build, update, document and publish to +a PyPI-like repo (GitHub release). + +NOTE: These are tested to run ONLY on Linux. + + +Thirdparty packages management scripts +====================================== + +Pre-requisites +-------------- + +* There are two run "modes": + + * To generate or update pip requirement files, you need to start with a clean + virtualenv as instructed below (This is to avoid injecting requirements + specific to the tools here in the main requirements). + + * For other usages, the tools here can run either in their own isolated + virtualenv best or in the the main configured development virtualenv. + These requireements need to be installed:: + + pip install --requirement etc/release/requirements.txt + +TODO: we need to pin the versions of these tools + + + +Generate or update pip requirement files +---------------------------------------- + +Scripts +~~~~~~~ + +**gen_requirements.py**: create/update requirements files from currently + installed requirements. + +**gen_requirements_dev.py** does the same but can subtract the main requirements + to get extra requirements used in only development. + + +Usage +~~~~~ + +The sequence of commands to run are: + + +* Start with these to generate the main pip requirements file:: + + ./configure --clean + ./configure + python etc/release/gen_requirements.py --site-packages-dir + +* You can optionally install or update extra main requirements after the + ./configure step such that these are included in the generated main requirements. + +* Optionally, generate a development pip requirements file by running these:: + + ./configure --clean + ./configure --dev + python etc/release/gen_requirements_dev.py --site-packages-dir + +* You can optionally install or update extra dev requirements after the + ./configure step such that these are included in the generated dev + requirements. + +Notes: we generate development requirements after the main as this step requires +the main requirements.txt to be up-to-date first. See **gen_requirements.py and +gen_requirements_dev.py** --help for details. + +Note: this does NOT hash requirements for now. + +Note: Be aware that if you are using "conditional" requirements (e.g. only for +OS or Python versions) in setup.py/setp.cfg/requirements.txt as these are NOT +yet supported. + + +Populate a thirdparty directory with wheels, sources, .ABOUT and license files +------------------------------------------------------------------------------ + +Scripts +~~~~~~~ + +* **fetch_requirements.py** will fetch package wheels, their ABOUT, LICENSE and + NOTICE files to populate a local a thirdparty directory strictly from our + remote repo and using only pinned packages listed in one or more pip + requirements file(s). Fetch only requirements for specific python versions and + operating systems. Optionally fetch the corresponding source distributions. + +* **publish_files.py** will upload/sync a thirdparty directory of files to our + remote repo. Requires a GitHub personal access token. + +* **build_wheels.py** will build a package binary wheel for multiple OS and + python versions. Optionally wheels that contain native code are built + remotely. Dependent wheels are optionally included. Requires Azure credentials + and tokens if building wheels remotely on multiple operatin systems. + +* **fix_thirdparty.py** will fix a thirdparty directory with a best effort to + add missing wheels, sources archives, create or fetch or fix .ABOUT, .NOTICE + and .LICENSE files. Requires Azure credentials and tokens if requesting the + build of missing wheels remotely on multiple operatin systems. + +* **check_thirdparty.py** will check a thirdparty directory for errors. + +* **bootstrap.py** will bootstrap a thirdparty directory from a requirements + file(s) to add or build missing wheels, sources archives and create .ABOUT, + .NOTICE and .LICENSE files. Requires Azure credentials and tokens if + requesting the build of missing wheels remotely on multiple operatin systems. + + + +Usage +~~~~~ + +See each command line --help option for details. + +* (TODO) **add_package.py** will add or update a Python package including wheels, + sources and ABOUT files and this for multiple Python version and OSes(for use + with upload_packages.py afterwards) You will need an Azure personal access + token for buidling binaries and an optional DejaCode API key to post and fetch + new package versions there. TODO: explain how we use romp + + +Upgrade virtualenv app +---------------------- + +The bundled virtualenv.pyz has to be upgraded by hand and is stored under +etc/thirdparty + +* Fetch https://github.com/pypa/get-virtualenv/raw//public/virtualenv.pyz + for instance https://github.com/pypa/get-virtualenv/raw/20.2.2/public/virtualenv.pyz + and save to thirdparty and update the ABOUT and LICENSE files as needed. + +* This virtualenv app contains also bundled pip, wheel and setuptools that are + essential for the installation to work. + + +Other files +=========== + +The other files and scripts are test, support and utility modules used by the +main scripts documented here. diff --git a/etc/scripts/bootstrap.py b/etc/scripts/bootstrap.py new file mode 100644 index 0000000..fde505b --- /dev/null +++ b/etc/scripts/bootstrap.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/skeleton for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import itertools + +import click + +import utils_thirdparty +from utils_thirdparty import Environment +from utils_thirdparty import PypiPackage + + +@click.command() + +@click.option('-r', '--requirements-file', + type=click.Path(exists=True, readable=True, path_type=str, dir_okay=False), + metavar='FILE', + multiple=True, + default=['requirements.txt'], + show_default=True, + help='Path to the requirements file(s) to use for thirdparty packages.', +) +@click.option('-d', '--thirdparty-dir', + type=click.Path(exists=True, readable=True, path_type=str, file_okay=False), + metavar='DIR', + default=utils_thirdparty.THIRDPARTY_DIR, + show_default=True, + help='Path to the thirdparty directory where wheels are built and ' + 'sources, ABOUT and LICENSE files fetched.', +) +@click.option('-p', '--python-version', + type=click.Choice(utils_thirdparty.PYTHON_VERSIONS), + metavar='PYVER', + default=utils_thirdparty.PYTHON_VERSIONS, + show_default=True, + multiple=True, + help='Python version(s) to use for this build.', +) +@click.option('-o', '--operating-system', + type=click.Choice(utils_thirdparty.PLATFORMS_BY_OS), + metavar='OS', + default=tuple(utils_thirdparty.PLATFORMS_BY_OS), + multiple=True, + show_default=True, + help='OS(ses) to use for this build: one of linux, mac or windows.', +) +@click.option('-l', '--latest-version', + is_flag=True, + help='Get the latest version of all packages, ignoring version specifiers.', +) +@click.option('--sync-dejacode', + is_flag=True, + help='Synchronize packages with DejaCode.', +) +@click.option('--with-deps', + is_flag=True, + help='Also include all dependent wheels.', +) +@click.help_option('-h', '--help') +def bootstrap( + requirements_file, + thirdparty_dir, + python_version, + operating_system, + with_deps, + latest_version, + sync_dejacode, + build_remotely=False, +): + """ + Boostrap a thirdparty Python packages directory from pip requirements. + + Fetch or build to THIRDPARTY_DIR all the wheels and source distributions for + the pip ``--requirement-file`` requirements FILE(s). Build wheels compatible + with all the provided ``--python-version`` PYVER(s) and ```--operating_system`` + OS(s) defaulting to all supported combinations. Create or fetch .ABOUT and + .LICENSE files. + + Optionally ignore version specifiers and use the ``--latest-version`` + of everything. + + Sources and wheels are fetched with attempts first from PyPI, then our remote repository. + If missing wheels are built as needed. + """ + # rename variables for clarity since these are lists + requirements_files = requirements_file + python_versions = python_version + operating_systems = operating_system + + # create the environments we need + evts = itertools.product(python_versions, operating_systems) + environments = [Environment.from_pyver_and_os(pyv, os) for pyv, os in evts] + + # collect all packages to process from requirements files + # this will fail with an exception if there are packages we cannot find + + required_name_versions = set() + + for req_file in requirements_files: + nvs = utils_thirdparty.load_requirements( + requirements_file=req_file, force_pinned=False) + required_name_versions.update(nvs) + if latest_version: + required_name_versions = set((name, None) for name, _ver in required_name_versions) + + print(f'PROCESSING {len(required_name_versions)} REQUIREMENTS in {len(requirements_files)} FILES') + + # fetch all available wheels, keep track of missing + # start with local, then remote, then PyPI + + print('==> COLLECTING ALREADY LOCALLY AVAILABLE REQUIRED WHEELS') + # list of all the wheel filenames either pre-existing, fetched or built + # updated as we progress + available_wheel_filenames = [] + + local_packages_by_namever = { + (p.name, p.version): p + for p in utils_thirdparty.get_local_packages(directory=thirdparty_dir) + } + + # list of (name, version, environment) not local and to fetch + name_version_envt_to_fetch = [] + + # start with a local check + for (name, version), envt in itertools.product(required_name_versions, environments): + local_pack = local_packages_by_namever.get((name, version,)) + if local_pack: + supported_wheels = list(local_pack.get_supported_wheels(environment=envt)) + if supported_wheels: + available_wheel_filenames.extend(w.filename for w in supported_wheels) + print(f'====> No fetch or build needed. ' + f'Local wheel already available for {name}=={version} ' + f'on os: {envt.operating_system} for Python: {envt.python_version}') + continue + + name_version_envt_to_fetch.append((name, version, envt,)) + + print(f'==> TRYING TO FETCH #{len(name_version_envt_to_fetch)} REQUIRED WHEELS') + + # list of (name, version, environment) not fetch and to build + name_version_envt_to_build = [] + + # then check if the wheel can be fetched without building from remote and Pypi + for name, version, envt in name_version_envt_to_fetch: + + fetched_fwn = utils_thirdparty.fetch_package_wheel( + name=name, + version=version, + environment=envt, + dest_dir=thirdparty_dir, + ) + + if fetched_fwn: + available_wheel_filenames.append(fetched_fwn) + else: + name_version_envt_to_build.append((name, version, envt,)) + + # At this stage we have all the wheels we could obtain without building + for name, version, envt in name_version_envt_to_build: + print(f'====> Need to build wheels for {name}=={version} on os: ' + f'{envt.operating_system} for Python: {envt.python_version}') + + packages_and_envts_to_build = [ + (PypiPackage(name, version), envt) + for name, version, envt in name_version_envt_to_build + ] + + print(f'==> BUILDING #{len(packages_and_envts_to_build)} MISSING WHEELS') + + package_envts_not_built, wheel_filenames_built = utils_thirdparty.build_missing_wheels( + packages_and_envts=packages_and_envts_to_build, + build_remotely=build_remotely, + with_deps=with_deps, + dest_dir=thirdparty_dir, +) + if wheel_filenames_built: + available_wheel_filenames.extend(available_wheel_filenames) + + for pack, envt in package_envts_not_built: + print( + f'====> FAILED to build any wheel for {pack.name}=={pack.version} ' + f'on os: {envt.operating_system} for Python: {envt.python_version}' + ) + + print(f'==> FETCHING SOURCE DISTRIBUTIONS') + # fetch all sources, keep track of missing + # This is a list of (name, version) + utils_thirdparty.fetch_missing_sources(dest_dir=thirdparty_dir) + + print(f'==> FETCHING ABOUT AND LICENSE FILES') + utils_thirdparty.add_fetch_or_update_about_and_license_files(dest_dir=thirdparty_dir) + + ############################################################################ + if sync_dejacode: + print(f'==> SYNC WITH DEJACODE') + # try to fetch from DejaCode any missing ABOUT + # create all missing DejaCode packages + pass + + utils_thirdparty.find_problems(dest_dir=thirdparty_dir) + + +if __name__ == '__main__': + bootstrap() diff --git a/etc/scripts/build_wheels.py b/etc/scripts/build_wheels.py new file mode 100644 index 0000000..352b705 --- /dev/null +++ b/etc/scripts/build_wheels.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/skeleton for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +import click + +import utils_thirdparty + + +@click.command() + +@click.option('-n', '--name', + type=str, + metavar='PACKAGE_NAME', + required=True, + help='Python package name to add or build.', +) +@click.option('-v', '--version', + type=str, + default=None, + metavar='VERSION', + help='Python package version to add or build.', +) +@click.option('-d', '--thirdparty-dir', + type=click.Path(exists=True, readable=True, path_type=str, file_okay=False), + metavar='DIR', + default=utils_thirdparty.THIRDPARTY_DIR, + show_default=True, + help='Path to the thirdparty directory where wheels are built.', +) +@click.option('-p', '--python-version', + type=click.Choice(utils_thirdparty.PYTHON_VERSIONS), + metavar='PYVER', + default=utils_thirdparty.PYTHON_VERSIONS, + show_default=True, + multiple=True, + help='Python version to use for this build.', +) +@click.option('-o', '--operating-system', + type=click.Choice(utils_thirdparty.PLATFORMS_BY_OS), + metavar='OS', + default=tuple(utils_thirdparty.PLATFORMS_BY_OS), + multiple=True, + show_default=True, + help='OS to use for this build: one of linux, mac or windows.', +) +@click.option('--build-remotely', + is_flag=True, + help='Build missing wheels remotely.', +) +@click.option('--with-deps', + is_flag=True, + help='Also include all dependent wheels.', +) +@click.option('--verbose', + is_flag=True, + help='Provide verbose output.', +) +@click.help_option('-h', '--help') +def build_wheels( + name, + version, + thirdparty_dir, + python_version, + operating_system, + with_deps, + build_remotely, + verbose, +): + """ + Build to THIRDPARTY_DIR all the wheels for the Python PACKAGE_NAME and + optional VERSION. Build wheels compatible with all the `--python-version` + PYVER(s) and `--operating_system` OS(s). + + Build native wheels remotely if needed when `--build-remotely` and include + all dependencies with `--with-deps`. + """ + utils_thirdparty.add_or_upgrade_built_wheels( + name=name, + version=version, + python_versions=python_version, + operating_systems=operating_system, + dest_dir=thirdparty_dir, + build_remotely=build_remotely, + with_deps=with_deps, + verbose=verbose, + ) + + +if __name__ == '__main__': + build_wheels() diff --git a/etc/scripts/check_thirdparty.py b/etc/scripts/check_thirdparty.py new file mode 100644 index 0000000..e48cfce --- /dev/null +++ b/etc/scripts/check_thirdparty.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/skeleton for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +import click + +import utils_thirdparty + + +@click.command() + +@click.option('-d', '--thirdparty-dir', + type=click.Path(exists=True, readable=True, path_type=str, file_okay=False), + required=True, + help='Path to the thirdparty directory to check.', +) +@click.help_option('-h', '--help') +def check_thirdparty_dir(thirdparty_dir): + """ + Check a thirdparty directory for problems. + """ + utils_thirdparty.find_problems(dest_dir=thirdparty_dir) + + +if __name__ == '__main__': + check_thirdparty_dir() diff --git a/etc/scripts/fetch_requirements.py b/etc/scripts/fetch_requirements.py new file mode 100644 index 0000000..21de865 --- /dev/null +++ b/etc/scripts/fetch_requirements.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/skeleton for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +import itertools + +import click + +import utils_thirdparty + + +@click.command() + +@click.option('-r', '--requirements-file', + type=click.Path(exists=True, readable=True, path_type=str, dir_okay=False), + metavar='FILE', + multiple=True, + default=['requirements.txt'], + show_default=True, + help='Path to the requirements file to use for thirdparty packages.', +) +@click.option('-d', '--thirdparty-dir', + type=click.Path(exists=True, readable=True, path_type=str, file_okay=False), + metavar='DIR', + default=utils_thirdparty.THIRDPARTY_DIR, + show_default=True, + help='Path to the thirdparty directory.', +) +@click.option('-p', '--python-version', + type=click.Choice(utils_thirdparty.PYTHON_VERSIONS), + metavar='INT', + multiple=True, + default=['36'], + show_default=True, + help='Python version to use for this build.', +) +@click.option('-o', '--operating-system', + type=click.Choice(utils_thirdparty.PLATFORMS_BY_OS), + metavar='OS', + multiple=True, + default=['linux'], + show_default=True, + help='OS to use for this build: one of linux, mac or windows.', +) +@click.option('-s', '--with-sources', + is_flag=True, + help='Fetch the corresponding source distributions.', +) +@click.option('-a', '--with-about', + is_flag=True, + help='Fetch the corresponding ABOUT and LICENSE files.', +) +@click.option('--allow-unpinned', + is_flag=True, + help='Allow requirements without pinned versions.', +) +@click.option('-s', '--only-sources', + is_flag=True, + help='Fetch only the corresponding source distributions.', +) +@click.option('-u', '--remote-links-url', + type=str, + metavar='URL', + default=utils_thirdparty.REMOTE_LINKS_URL, + show_default=True, + help='URL to a PyPI-like links web site. ' + 'Or local path to a directory with wheels.', +) + +@click.help_option('-h', '--help') +def fetch_requirements( + requirements_file, + thirdparty_dir, + python_version, + operating_system, + with_sources, + with_about, + allow_unpinned, + only_sources, + remote_links_url=utils_thirdparty.REMOTE_LINKS_URL, +): + """ + Fetch and save to THIRDPARTY_DIR all the required wheels for pinned + dependencies found in the `--requirement` FILE requirements file(s). Only + fetch wheels compatible with the provided `--python-version` and + `--operating-system`. + Also fetch the corresponding .ABOUT, .LICENSE and .NOTICE files together + with a virtualenv.pyz app. + + Use exclusively wheel not from PyPI but rather found in the PyPI-like link + repo ``remote_links_url`` if this is a URL. Treat this ``remote_links_url`` + as a local directory path to a wheels directory if this is not a a URL. + """ + + # fetch wheels + python_versions = python_version + operating_systems = operating_system + requirements_files = requirements_file + + if not only_sources: + envs = itertools.product(python_versions, operating_systems) + envs = (utils_thirdparty.Environment.from_pyver_and_os(pyv, os) for pyv, os in envs) + + for env, reqf in itertools.product(envs, requirements_files): + + for package, error in utils_thirdparty.fetch_wheels( + environment=env, + requirements_file=reqf, + allow_unpinned=allow_unpinned, + dest_dir=thirdparty_dir, + remote_links_url=remote_links_url, + ): + if error: + print('Failed to fetch wheel:', package, ':', error) + + # optionally fetch sources + if with_sources or only_sources: + + for reqf in requirements_files: + for package, error in utils_thirdparty.fetch_sources( + requirements_file=reqf, + allow_unpinned=allow_unpinned, + dest_dir=thirdparty_dir, + remote_links_url=remote_links_url, + ): + if error: + print('Failed to fetch source:', package, ':', error) + + if with_about: + utils_thirdparty.add_fetch_or_update_about_and_license_files(dest_dir=thirdparty_dir) + utils_thirdparty.find_problems( + dest_dir=thirdparty_dir, + report_missing_sources=with_sources or only_sources, + report_missing_wheels=not only_sources, + ) + + +if __name__ == '__main__': + fetch_requirements() diff --git a/etc/scripts/fix_thirdparty.py b/etc/scripts/fix_thirdparty.py new file mode 100644 index 0000000..061d3fa --- /dev/null +++ b/etc/scripts/fix_thirdparty.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/skeleton for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +import click + +import utils_thirdparty + + +@click.command() + +@click.option('-d', '--thirdparty-dir', + type=click.Path(exists=True, readable=True, path_type=str, file_okay=False), + required=True, + help='Path to the thirdparty directory to fix.', +) +@click.option('--build-wheels', + is_flag=True, + help='Build all missing wheels .', +) +@click.option('--build-remotely', + is_flag=True, + help='Build missing wheels remotely.', +) +@click.help_option('-h', '--help') +def fix_thirdparty_dir( + thirdparty_dir, + build_wheels, + build_remotely, +): + """ + Fix a thirdparty directory of dependent package wheels and sdist. + + Multiple fixes are applied: + - fetch or build missing binary wheels + - fetch missing source distributions + - derive, fetch or add missing ABOUT files + - fetch missing .LICENSE and .NOTICE files + - remove outdated package versions and the ABOUT, .LICENSE and .NOTICE files + + Optionally build missing binary wheels for all supported OS and Python + version combos locally or remotely. + """ + print('***FETCH*** MISSING WHEELS') + package_envts_not_fetched = utils_thirdparty.fetch_missing_wheels(dest_dir=thirdparty_dir) + print('***FETCH*** MISSING SOURCES') + src_name_ver_not_fetched = utils_thirdparty.fetch_missing_sources(dest_dir=thirdparty_dir) + + package_envts_not_built = [] + if build_wheels: + print('***BUILD*** MISSING WHEELS') + package_envts_not_built, _wheel_filenames_built = utils_thirdparty.build_missing_wheels( + packages_and_envts=package_envts_not_fetched, + build_remotely=build_remotely, + dest_dir=thirdparty_dir, + ) + + print('***ADD*** ABOUT AND LICENSES') + utils_thirdparty.add_fetch_or_update_about_and_license_files(dest_dir=thirdparty_dir) + + # report issues + for name, version in src_name_ver_not_fetched: + print(f'{name}=={version}: Failed to fetch source distribution.') + + for package, envt in package_envts_not_built: + print( + f'{package.name}=={package.version}: Failed to build wheel ' + f'on {envt.operating_system} for Python {envt.python_version}') + + print('***FIND PROBLEMS***') + utils_thirdparty.find_problems(dest_dir=thirdparty_dir) + + +if __name__ == '__main__': + fix_thirdparty_dir() diff --git a/etc/scripts/gen_pypi_simple.py b/etc/scripts/gen_pypi_simple.py new file mode 100644 index 0000000..887e407 --- /dev/null +++ b/etc/scripts/gen_pypi_simple.py @@ -0,0 +1,191 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# SPDX-License-Identifier: BSD-2-Clause-Views AND MIT +# Copyright (c) 2010 David Wolever . All rights reserved. +# originally from https://github.com/wolever/pip2pi + +import os +import re +import shutil + +from html import escape +from pathlib import Path + +""" +name: pip compatibility tags +version: 20.3.1 +download_url: https://github.com/pypa/pip/blob/20.3.1/src/pip/_internal/models/wheel.py +copyright: Copyright (c) 2008-2020 The pip developers (see AUTHORS.txt file) +license_expression: mit +notes: the weel name regex is copied from pip-20.3.1 pip/_internal/models/wheel.py + +Copyright (c) 2008-2020 The pip developers (see AUTHORS.txt file) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +""" +get_wheel_from_filename = re.compile( + r"""^(?P(?P.+?)-(?P.*?)) + ((-(?P\d[^-]*?))?-(?P.+?)-(?P.+?)-(?P.+?) + \.whl)$""", + re.VERBOSE +).match + +sdist_exts = ".tar.gz", ".tar.bz2", ".zip", ".tar.xz", +wheel_ext = ".whl" +app_ext = ".pyz" +dist_exts = sdist_exts + (wheel_ext, app_ext) + + +class InvalidDistributionFilename(Exception): + pass + + +def get_package_name_from_filename(filename, normalize=True): + """ + Return the package name extracted from a package ``filename``. + Optionally ``normalize`` the name according to distribution name rules. + Raise an ``InvalidDistributionFilename`` if the ``filename`` is invalid:: + + >>> get_package_name_from_filename("foo-1.2.3_rc1.tar.gz") + 'foo' + >>> get_package_name_from_filename("foo-bar-1.2-py27-none-any.whl") + 'foo-bar' + >>> get_package_name_from_filename("Cython-0.17.2-cp26-none-linux_x86_64.whl") + 'cython' + >>> get_package_name_from_filename("python_ldap-2.4.19-cp27-none-macosx_10_10_x86_64.whl") + 'python-ldap' + >>> get_package_name_from_filename("foo.whl") + Traceback (most recent call last): + ... + InvalidDistributionFilename: ... + >>> get_package_name_from_filename("foo.png") + Traceback (most recent call last): + ... + InvalidFilePackageName: ... + """ + if not filename or not filename.endswith(dist_exts): + raise InvalidDistributionFilename(filename) + + filename = os.path.basename(filename) + + if filename.endswith(sdist_exts): + name_ver = None + extension = None + + for ext in sdist_exts: + if filename.endswith(ext): + name_ver, extension, _ = filename.rpartition(ext) + break + + if not extension or not name_ver: + raise InvalidDistributionFilename(filename) + + name, _, version = name_ver.rpartition('-') + + if not (name and version): + raise InvalidDistributionFilename(filename) + + elif filename.endswith(wheel_ext): + + wheel_info = get_wheel_from_filename(filename) + + if not wheel_info: + raise InvalidDistributionFilename(filename) + + name = wheel_info.group('name') + version = wheel_info.group('version') + + if not (name and version): + raise InvalidDistributionFilename(filename) + + elif filename.endswith(app_ext): + name_ver, extension, _ = filename.rpartition(".pyz") + + if "-" in filename: + name, _, version = name_ver.rpartition('-') + else: + name = name_ver + + if not name: + raise InvalidDistributionFilename(filename) + + if normalize: + name = name.lower().replace('_', '-') + return name + + +def build_pypi_index(directory, write_index=False): + """ + Using a ``directory`` directory of wheels and sdists, create the a PyPI simple + directory index at ``directory``/simple/ populated with the proper PyPI simple + index directory structure crafted using symlinks. + + WARNING: The ``directory``/simple/ directory is removed if it exists. + """ + + directory = Path(directory) + + index_dir = directory / "simple" + if index_dir.exists(): + shutil.rmtree(str(index_dir), ignore_errors=True) + + index_dir.mkdir(parents=True) + + if write_index: + simple_html_index = [ + "PyPI Simple Index", + "", + ] + + package_names = set() + for pkg_file in directory.iterdir(): + + pkg_filename = pkg_file.name + + if ( + not pkg_file.is_file() + or not pkg_filename.endswith(dist_exts) + or pkg_filename.startswith(".") + ): + continue + + pkg_name = get_package_name_from_filename(pkg_filename) + pkg_index_dir = index_dir / pkg_name + pkg_index_dir.mkdir(parents=True, exist_ok=True) + pkg_indexed_file = pkg_index_dir / pkg_filename + link_target = Path("../..") / pkg_filename + pkg_indexed_file.symlink_to(link_target) + + if write_index and pkg_name not in package_names: + esc_name = escape(pkg_name) + simple_html_index.append(f'{esc_name}
') + package_names.add(pkg_name) + + if write_index: + simple_html_index.append("") + index_html = index_dir / "index.html" + index_html.write_text("\n".join(simple_html_index)) + + +if __name__ == "__main__": + import sys + pkg_dir = sys.argv[1] + build_pypi_index(pkg_dir) diff --git a/etc/scripts/gen_pypi_simple.py.ABOUT b/etc/scripts/gen_pypi_simple.py.ABOUT new file mode 100644 index 0000000..4de5ded --- /dev/null +++ b/etc/scripts/gen_pypi_simple.py.ABOUT @@ -0,0 +1,8 @@ +about_resource: gen_pypi_simple.py +name: gen_pypi_simple.py +license_expression: bsd-2-clause-views and mit +copyright: Copyright (c) nexB Inc. + Copyright (c) 2010 David Wolever + Copyright (c) The pip developers +notes: Originally from https://github.com/wolever/pip2pi and modified extensivley + Also partially derived from pip code diff --git a/etc/scripts/gen_pypi_simple.py.NOTICE b/etc/scripts/gen_pypi_simple.py.NOTICE new file mode 100644 index 0000000..6e0fbbc --- /dev/null +++ b/etc/scripts/gen_pypi_simple.py.NOTICE @@ -0,0 +1,56 @@ +SPDX-License-Identifier: BSD-2-Clause-Views AND mit + +Copyright (c) nexB Inc. +Copyright (c) 2010 David Wolever +Copyright (c) The pip developers + + +Original code: copyright 2010 David Wolever . All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +EVENT SHALL OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +The views and conclusions contained in the software and documentation are those +of the authors and should not be interpreted as representing official policies, +either expressed or implied, of David Wolever. + + +Original code: Copyright (c) 2008-2020 The pip developers + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + diff --git a/etc/scripts/gen_requirements.py b/etc/scripts/gen_requirements.py new file mode 100644 index 0000000..3be974c --- /dev/null +++ b/etc/scripts/gen_requirements.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/skeleton for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +import click +import utils_requirements + + +@click.command() + +@click.option('-s', '--site-packages-dir', + type=click.Path(exists=True, readable=True, path_type=str, file_okay=False, resolve_path=True), + required=True, + metavar='DIR', + help='Path to the "site-packages" directory where wheels are installed such as lib/python3.6/site-packages', +) +@click.option('-r', '--requirements-file', + type=click.Path(path_type=str, dir_okay=False), + metavar='FILE', + default='requirements.txt', + show_default=True, + help='Path to the requirements file to update or create.', +) +@click.help_option('-h', '--help') +def gen_requirements(site_packages_dir, requirements_file): + """ + Create or replace the `--requirements-file` file FILE requirements file with all + locally installed Python packages.all Python packages found installed in `--site-packages-dir` + """ + utils_requirements.lock_requirements( + requirements_file=requirements_file, + site_packages_dir=site_packages_dir, + ) + + +if __name__ == '__main__': + gen_requirements() diff --git a/etc/scripts/gen_requirements_dev.py b/etc/scripts/gen_requirements_dev.py new file mode 100644 index 0000000..ff4ce50 --- /dev/null +++ b/etc/scripts/gen_requirements_dev.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/skeleton for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +import click +import utils_requirements + + +@click.command() + +@click.option('-s', '--site-packages-dir', + type=click.Path(exists=True, readable=True, path_type=str, file_okay=False, resolve_path=True), + required=True, + metavar='DIR', + help='Path to the "site-packages" directory where wheels are installed such as lib/python3.6/site-packages', +) +@click.option('-d', '--dev-requirements-file', + type=click.Path(path_type=str, dir_okay=False), + metavar='FILE', + default='requirements-dev.txt', + show_default=True, + help='Path to the dev requirements file to update or create.', +) +@click.option('-r', '--main-requirements-file', + type=click.Path(path_type=str, dir_okay=False), + default='requirements.txt', + metavar='FILE', + show_default=True, + help='Path to the main requirements file. Its requirements will be excluded ' + 'from the generated dev requirements.', +) +@click.help_option('-h', '--help') +def gen_dev_requirements(site_packages_dir, dev_requirements_file, main_requirements_file): + """ + Create or overwrite the `--dev-requirements-file` pip requirements FILE with + all Python packages found installed in `--site-packages-dir`. Exclude + package names also listed in the --main-requirements-file pip requirements + FILE (that are assume to the production requirements and therefore to always + be present in addition to the development requirements). + """ + utils_requirements.lock_dev_requirements( + dev_requirements_file=dev_requirements_file, + main_requirements_file=main_requirements_file, + site_packages_dir=site_packages_dir + ) + + +if __name__ == '__main__': + gen_dev_requirements() diff --git a/etc/scripts/publish_files.py b/etc/scripts/publish_files.py new file mode 100644 index 0000000..f343cb3 --- /dev/null +++ b/etc/scripts/publish_files.py @@ -0,0 +1,204 @@ +#!/usr/bin/env python +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/scancode-toolkit for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +import hashlib +import os +import sys + +from pathlib import Path + +import click +import requests +import utils_thirdparty + +from github_release_retry import github_release_retry as grr + +""" +Create GitHub releases and upload files there. +""" + + +def get_files(location): + """ + Return an iterable of (filename, Path, md5) tuples for files in the `location` + directory tree recursively. + """ + for top, _dirs, files in os.walk(location): + for filename in files: + pth = Path(os.path.join(top, filename)) + with open(pth, 'rb') as fi: + md5 = hashlib.md5(fi.read()).hexdigest() + yield filename, pth, md5 + + +def get_etag_md5(url): + """ + Return the cleaned etag of URL `url` or None. + """ + headers = utils_thirdparty.get_remote_headers(url) + headers = {k.lower(): v for k, v in headers.items()} + etag = headers .get('etag') + if etag: + etag = etag.strip('"').lower() + return etag + + +def create_or_update_release_and_upload_directory( + user, + repo, + tag_name, + token, + directory, + retry_limit=10, + description=None, +): + """ + Create or update a GitHub release at https://github.com// for + `tag_name` tag using the optional `description` for this release. + Use the provided `token` as a GitHub token for API calls authentication. + Upload all files found in the `directory` tree to that GitHub release. + Retry API calls up to `retry_limit` time to work around instability the + GitHub API. + + Remote files that are not the same as the local files are deleted and re- + uploaded. + """ + release_homepage_url = f'https://github.com/{user}/{repo}/releases/{tag_name}' + + # scrape release page HTML for links + urls_by_filename = {os.path.basename(l): l + for l in utils_thirdparty.get_paths_or_urls(links_url=release_homepage_url) + } + + # compute what is new, modified or unchanged + print(f'Compute which files is new, modified or unchanged in {release_homepage_url}') + + new_to_upload = [] + unchanged_to_skip = [] + modified_to_delete_and_reupload = [] + for filename, pth, md5 in get_files(directory): + url = urls_by_filename.get(filename) + if not url: + print(f'{filename} content is NEW, will upload') + new_to_upload.append(pth) + continue + + out_of_date = get_etag_md5(url) != md5 + if out_of_date: + print(f'{url} content is CHANGED based on md5 etag, will re-upload') + modified_to_delete_and_reupload.append(pth) + else: + # print(f'{url} content is IDENTICAL, skipping upload based on Etag') + unchanged_to_skip.append(pth) + print('.') + + ghapi = grr.GithubApi( + github_api_url='https://api.github.com', + user=user, + repo=repo, + token=token, + retry_limit=retry_limit, + ) + + # yank modified + print( + f'Unpublishing {len(modified_to_delete_and_reupload)} published but ' + f'locally modified files in {release_homepage_url}') + + release = ghapi.get_release_by_tag(tag_name) + + for pth in modified_to_delete_and_reupload: + filename = os.path.basename(pth) + asset_id = ghapi.find_asset_id_by_file_name(filename, release) + print (f' Unpublishing file: {filename}).') + response = ghapi.delete_asset(asset_id) + if response.status_code != requests.codes.no_content: # NOQA + raise Exception(f'failed asset deletion: {response}') + + # finally upload new and modified + to_upload = new_to_upload + modified_to_delete_and_reupload + print(f'Publishing with {len(to_upload)} files to {release_homepage_url}') + release = grr.Release(tag_name=tag_name, body=description) + grr.make_release(ghapi, release, to_upload) + + +TOKEN_HELP = ( + 'The Github personal acess token is used to authenticate API calls. ' + 'Required unless you set the GITHUB_TOKEN environment variable as an alternative. ' + 'See for details: https://github.com/settings/tokens and ' + 'https://docs.github.com/en/github/authenticating-to-github/creating-a-personal-access-token' +) + + +@click.command() + +@click.option( + '--user-repo-tag', + help='The GitHub qualified repository user/name/tag in which ' + 'to create the release such as in nexB/thirdparty/pypi', + type=str, + required=True, +) +@click.option( + '-d', '--directory', + help='The directory that contains files to upload to the release.', + type=click.Path(exists=True, readable=True, path_type=str, file_okay=False, resolve_path=True), + required=True, +) +@click.option( + '--token', + help=TOKEN_HELP, + default=os.environ.get('GITHUB_TOKEN', None), + type=str, + required=False, +) +@click.option( + '--description', + help='Text description for the release. Ignored if the release exists.', + default=None, + type=str, + required=False, +) +@click.option( + '--retry_limit', + help='Number of retries when making failing GitHub API calls. ' + 'Retrying helps work around transient failures of the GitHub API.', + type=int, + default=10, +) +@click.help_option('-h', '--help') +def publish_files( + user_repo_tag, + directory, + retry_limit=10, token=None, description=None, +): + """ + Publish all the files in DIRECTORY as assets to a GitHub release. + Either create or update/replace remote files' + """ + if not token: + click.secho('--token required option is missing.') + click.secho(TOKEN_HELP) + sys.exit(1) + + user, repo, tag_name = user_repo_tag.split('/') + + create_or_update_release_and_upload_directory( + user=user, + repo=repo, + tag_name=tag_name, + description=description, + retry_limit=retry_limit, + token=token, + directory=directory, + ) + + +if __name__ == '__main__': + publish_files() diff --git a/etc/scripts/requirements.txt b/etc/scripts/requirements.txt new file mode 100644 index 0000000..6591e49 --- /dev/null +++ b/etc/scripts/requirements.txt @@ -0,0 +1,12 @@ +aboutcode_toolkit +github-release-retry2 +attrs +commoncode +click +requests +saneyaml +romp +pip +setuptools +twine +wheel \ No newline at end of file diff --git a/etc/scripts/test_utils_pip_compatibility_tags.py b/etc/scripts/test_utils_pip_compatibility_tags.py new file mode 100644 index 0000000..30c4dda --- /dev/null +++ b/etc/scripts/test_utils_pip_compatibility_tags.py @@ -0,0 +1,128 @@ +"""Generate and work with PEP 425 Compatibility Tags. + +copied from pip-20.3.1 pip/tests/unit/test_utils_compatibility_tags.py +download_url: https://raw.githubusercontent.com/pypa/pip/20.3.1/tests/unit/test_utils_compatibility_tags.py + +Copyright (c) 2008-2020 The pip developers (see AUTHORS.txt file) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +""" + +from unittest.mock import patch +import sysconfig + +import pytest + +import utils_pip_compatibility_tags + + +@pytest.mark.parametrize('version_info, expected', [ + ((2,), '2'), + ((2, 8), '28'), + ((3,), '3'), + ((3, 6), '36'), + # Test a tuple of length 3. + ((3, 6, 5), '36'), + # Test a 2-digit minor version. + ((3, 10), '310'), +]) +def test_version_info_to_nodot(version_info, expected): + actual = pip_compatibility_tags.version_info_to_nodot(version_info) + assert actual == expected + + +class Testcompatibility_tags(object): + + def mock_get_config_var(self, **kwd): + """ + Patch sysconfig.get_config_var for arbitrary keys. + """ + get_config_var = sysconfig.get_config_var + + def _mock_get_config_var(var): + if var in kwd: + return kwd[var] + return get_config_var(var) + + return _mock_get_config_var + + def test_no_hyphen_tag(self): + """ + Test that no tag contains a hyphen. + """ + import pip._internal.utils.compatibility_tags + + mock_gcf = self.mock_get_config_var(SOABI='cpython-35m-darwin') + + with patch('sysconfig.get_config_var', mock_gcf): + supported = pip._internal.utils.compatibility_tags.get_supported() + + for tag in supported: + assert '-' not in tag.interpreter + assert '-' not in tag.abi + assert '-' not in tag.platform + + +class TestManylinux2010Tags(object): + + @pytest.mark.parametrize("manylinux2010,manylinux1", [ + ("manylinux2010_x86_64", "manylinux1_x86_64"), + ("manylinux2010_i686", "manylinux1_i686"), + ]) + def test_manylinux2010_implies_manylinux1(self, manylinux2010, manylinux1): + """ + Specifying manylinux2010 implies manylinux1. + """ + groups = {} + supported = pip_compatibility_tags.get_supported(platforms=[manylinux2010]) + for tag in supported: + groups.setdefault( + (tag.interpreter, tag.abi), [] + ).append(tag.platform) + + for arches in groups.values(): + if arches == ['any']: + continue + assert arches[:2] == [manylinux2010, manylinux1] + + +class TestManylinux2014Tags(object): + + @pytest.mark.parametrize("manylinuxA,manylinuxB", [ + ("manylinux2014_x86_64", ["manylinux2010_x86_64", "manylinux1_x86_64"]), + ("manylinux2014_i686", ["manylinux2010_i686", "manylinux1_i686"]), + ]) + def test_manylinuxA_implies_manylinuxB(self, manylinuxA, manylinuxB): + """ + Specifying manylinux2014 implies manylinux2010/manylinux1. + """ + groups = {} + supported = pip_compatibility_tags.get_supported(platforms=[manylinuxA]) + for tag in supported: + groups.setdefault( + (tag.interpreter, tag.abi), [] + ).append(tag.platform) + + expected_arches = [manylinuxA] + expected_arches.extend(manylinuxB) + for arches in groups.values(): + if arches == ['any']: + continue + assert arches[:3] == expected_arches diff --git a/etc/scripts/test_utils_pip_compatibility_tags.py.ABOUT b/etc/scripts/test_utils_pip_compatibility_tags.py.ABOUT new file mode 100644 index 0000000..07eee35 --- /dev/null +++ b/etc/scripts/test_utils_pip_compatibility_tags.py.ABOUT @@ -0,0 +1,14 @@ +about_resource: test_utils_pip_compatibility_tags.py + +type: github +namespace: pypa +name: pip +version: 20.3.1 +subpath: tests/unit/test_utils_compatibility_tags.py + +package_url: pkg:github/pypa/pip@20.3.1#tests/unit/test_utils_compatibility_tags.py + +download_url: https://raw.githubusercontent.com/pypa/pip/20.3.1/tests/unit/test_utils_compatibility_tags.py +copyright: Copyright (c) 2008-2020 The pip developers (see AUTHORS.txt file) +license_expression: mit +notes: subset copied from pip for tag handling diff --git a/etc/scripts/test_utils_pypi_supported_tags.py b/etc/scripts/test_utils_pypi_supported_tags.py new file mode 100644 index 0000000..9ad68b2 --- /dev/null +++ b/etc/scripts/test_utils_pypi_supported_tags.py @@ -0,0 +1,91 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from utils_pypi_supported_tags import validate_platforms_for_pypi + +""" +Wheel platform checking tests + +Copied and modified on 2020-12-24 from +https://github.com/pypa/warehouse/blob/37a83dd342d9e3b3ab4f6bde47ca30e6883e2c4d/tests/unit/forklift/test_legacy.py +""" + + +def validate_wheel_filename_for_pypi(filename): + """ + Validate if the filename is a PyPI/warehouse-uploadable wheel file name + with supported platform tags. Return a list of unsupported platform tags or + an empty list if all tags are supported. + """ + from utils_thirdparty import Wheel + wheel = Wheel.from_filename(filename) + return validate_platforms_for_pypi(wheel.platforms) + + +@pytest.mark.parametrize( + "plat", + [ + "any", + "win32", + "win_amd64", + "win_ia64", + "manylinux1_i686", + "manylinux1_x86_64", + "manylinux2010_i686", + "manylinux2010_x86_64", + "manylinux2014_i686", + "manylinux2014_x86_64", + "manylinux2014_aarch64", + "manylinux2014_armv7l", + "manylinux2014_ppc64", + "manylinux2014_ppc64le", + "manylinux2014_s390x", + "manylinux_2_5_i686", + "manylinux_2_12_x86_64", + "manylinux_2_17_aarch64", + "manylinux_2_17_armv7l", + "manylinux_2_17_ppc64", + "manylinux_2_17_ppc64le", + "manylinux_3_0_s390x", + "macosx_10_6_intel", + "macosx_10_13_x86_64", + "macosx_11_0_x86_64", + "macosx_10_15_arm64", + "macosx_11_10_universal2", + # A real tag used by e.g. some numpy wheels + ( + "macosx_10_6_intel.macosx_10_9_intel.macosx_10_9_x86_64." + "macosx_10_10_intel.macosx_10_10_x86_64" + ), + ], +) +def test_is_valid_pypi_wheel_return_true_for_supported_wheel(plat): + filename = f"foo-1.2.3-cp34-none-{plat}.whl" + assert not validate_wheel_filename_for_pypi(filename) + + +@pytest.mark.parametrize( + "plat", + [ + "linux_x86_64", + "linux_x86_64.win32", + "macosx_9_2_x86_64", + "macosx_12_2_arm64", + "macosx_10_15_amd64", + ], +) +def test_is_valid_pypi_wheel_raise_exception_for_aunsupported_wheel(plat): + filename = f"foo-1.2.3-cp34-none-{plat}.whl" + invalid = validate_wheel_filename_for_pypi(filename) + assert invalid diff --git a/etc/scripts/test_utils_pypi_supported_tags.py.ABOUT b/etc/scripts/test_utils_pypi_supported_tags.py.ABOUT new file mode 100644 index 0000000..176efac --- /dev/null +++ b/etc/scripts/test_utils_pypi_supported_tags.py.ABOUT @@ -0,0 +1,17 @@ +about_resource: test_utils_pypi_supported_tags.py + +type: github +namespace: pypa +name: warehouse +version: 37a83dd342d9e3b3ab4f6bde47ca30e6883e2c4d +subpath: tests/unit/forklift/test_legacy.py + +package_url: pkg:github/pypa/warehouse@37a83dd342d9e3b3ab4f6bde47ca30e6883e2c4d#tests/unit/forklift/test_legacy.py + +download_url: https://github.com/pypa/warehouse/blob/37a83dd342d9e3b3ab4f6bde47ca30e6883e2c4d/tests/unit/forklift/test_legacy.py +copyright: Copyright (c) The warehouse developers +homepage_url: https://warehouse.readthedocs.io +license_expression: apache-2.0 +notes: Test for wheel platform checking copied and heavily modified on + 2020-12-24 from warehouse. This contains the basic functions to check if a + wheel file name is would be supported for uploading to PyPI. diff --git a/etc/scripts/utils_dejacode.py b/etc/scripts/utils_dejacode.py new file mode 100644 index 0000000..8b6e5d2 --- /dev/null +++ b/etc/scripts/utils_dejacode.py @@ -0,0 +1,213 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/skeleton for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +import io +import os +import zipfile + +import requests +import saneyaml + +from packaging import version as packaging_version + +""" +Utility to create and retrieve package and ABOUT file data from DejaCode. +""" + +DEJACODE_API_KEY = os.environ.get('DEJACODE_API_KEY', '') +DEJACODE_API_URL = os.environ.get('DEJACODE_API_URL', '') + +DEJACODE_API_URL_PACKAGES = f'{DEJACODE_API_URL}packages/' +DEJACODE_API_HEADERS = { + 'Authorization': 'Token {}'.format(DEJACODE_API_KEY), + 'Accept': 'application/json; indent=4', +} + + +def can_do_api_calls(): + if not DEJACODE_API_KEY and DEJACODE_API_URL: + print('DejaCode DEJACODE_API_KEY and DEJACODE_API_URL not configured. Doing nothing') + return False + else: + return True + + +def fetch_dejacode_packages(params): + """ + Return a list of package data mappings calling the package API with using + `params` or an empty list. + """ + if not can_do_api_calls(): + return [] + + response = requests.get( + DEJACODE_API_URL_PACKAGES, + params=params, + headers=DEJACODE_API_HEADERS, + ) + + return response.json()['results'] + + +def get_package_data(distribution): + """ + Return a mapping of package data or None for a Distribution `distribution`. + """ + results = fetch_dejacode_packages(distribution.identifiers()) + + len_results = len(results) + + if len_results == 1: + return results[0] + + elif len_results > 1: + print(f'More than 1 entry exists, review at: {DEJACODE_API_URL_PACKAGES}') + else: + print('Could not find package:', distribution.download_url) + + +def update_with_dejacode_data(distribution): + """ + Update the Distribution `distribution` with DejaCode package data. Return + True if data was updated. + """ + package_data = get_package_data(distribution) + if package_data: + return distribution.update(package_data, keep_extra=False) + + print(f'No package found for: {distribution}') + + +def update_with_dejacode_about_data(distribution): + """ + Update the Distribution `distribution` wiht ABOUT code data fetched from + DejaCode. Return True if data was updated. + """ + package_data = get_package_data(distribution) + if package_data: + package_api_url = package_data['api_url'] + about_url = f'{package_api_url}about' + response = requests.get(about_url, headers=DEJACODE_API_HEADERS) + # note that this is YAML-formatted + about_text = response.json()['about_data'] + about_data = saneyaml.load(about_text) + + return distribution.update(about_data, keep_extra=True) + + print(f'No package found for: {distribution}') + + +def fetch_and_save_about_files(distribution, dest_dir='thirdparty'): + """ + Fetch and save in `dest_dir` the .ABOUT, .LICENSE and .NOTICE files fetched + from DejaCode for a Distribution `distribution`. Return True if files were + fetched. + """ + package_data = get_package_data(distribution) + if package_data: + package_api_url = package_data['api_url'] + about_url = f'{package_api_url}about_files' + response = requests.get(about_url, headers=DEJACODE_API_HEADERS) + about_zip = response.content + with io.BytesIO(about_zip) as zf: + with zipfile.ZipFile(zf) as zi: + zi.extractall(path=dest_dir) + return True + + print(f'No package found for: {distribution}') + + +def find_latest_dejacode_package(distribution): + """ + Return a mapping of package data for the closest version to + a Distribution `distribution` or None. + Return the newest of the packages if prefer_newest is True. + Filter out version-specific attributes. + """ + ids = distribution.purl_identifiers(skinny=True) + packages = fetch_dejacode_packages(params=ids) + if not packages: + return + + for package_data in packages: + matched = ( + package_data['download_url'] == distribution.download_url + and package_data['version'] == distribution.version + and package_data['filename'] == distribution.filename + ) + + if matched: + return package_data + + # there was no exact match, find the latest version + # TODO: consider the closest version rather than the latest + # or the version that has the best data + with_versions = [(packaging_version.parse(p['version']), p) for p in packages] + with_versions = sorted(with_versions) + latest_version, latest_package_version = sorted(with_versions)[-1] + print( + f'Found DejaCode latest version: {latest_version} ' + f'for dist: {distribution.package_url}', + ) + + return latest_package_version + + +def create_dejacode_package(distribution): + """ + Create a new DejaCode Package a Distribution `distribution`. + Return the new or existing package data. + """ + if not can_do_api_calls(): + return + + existing_package_data = get_package_data(distribution) + if existing_package_data: + return existing_package_data + + print(f'Creating new DejaCode package for: {distribution}') + + new_package_payload = { + # Trigger data collection, scan, and purl + 'collect_data': 1, + } + + fields_to_carry_over = [ + 'download_url' + 'type', + 'namespace', + 'name', + 'version', + 'qualifiers', + 'subpath', + 'license_expression', + 'copyright', + 'description', + 'homepage_url', + 'primary_language', + 'notice_text', + ] + + for field in fields_to_carry_over: + value = getattr(distribution, field, None) + if value: + new_package_payload[field] = value + + response = requests.post( + DEJACODE_API_URL_PACKAGES, + data=new_package_payload, + headers=DEJACODE_API_HEADERS, + ) + new_package_data = response.json() + if response.status_code != 201: + raise Exception(f'Error, cannot create package for: {distribution}') + + print(f'New Package created at: {new_package_data["absolute_url"]}') + return new_package_data diff --git a/etc/scripts/utils_pip_compatibility_tags.py b/etc/scripts/utils_pip_compatibility_tags.py new file mode 100644 index 0000000..4c6529b --- /dev/null +++ b/etc/scripts/utils_pip_compatibility_tags.py @@ -0,0 +1,192 @@ +"""Generate and work with PEP 425 Compatibility Tags. + +copied from pip-20.3.1 pip/_internal/utils/compatibility_tags.py +download_url: https://github.com/pypa/pip/blob/20.3.1/src/pip/_internal/utils/compatibility_tags.py + +Copyright (c) 2008-2020 The pip developers (see AUTHORS.txt file) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +""" + +import re + +from packaging.tags import ( + compatible_tags, + cpython_tags, + generic_tags, + interpreter_name, + interpreter_version, + mac_platforms, +) + +_osx_arch_pat = re.compile(r'(.+)_(\d+)_(\d+)_(.+)') + + +def version_info_to_nodot(version_info): + # type: (Tuple[int, ...]) -> str + # Only use up to the first two numbers. + return ''.join(map(str, version_info[:2])) + + +def _mac_platforms(arch): + # type: (str) -> List[str] + match = _osx_arch_pat.match(arch) + if match: + name, major, minor, actual_arch = match.groups() + mac_version = (int(major), int(minor)) + arches = [ + # Since we have always only checked that the platform starts + # with "macosx", for backwards-compatibility we extract the + # actual prefix provided by the user in case they provided + # something like "macosxcustom_". It may be good to remove + # this as undocumented or deprecate it in the future. + '{}_{}'.format(name, arch[len('macosx_'):]) + for arch in mac_platforms(mac_version, actual_arch) + ] + else: + # arch pattern didn't match (?!) + arches = [arch] + return arches + + +def _custom_manylinux_platforms(arch): + # type: (str) -> List[str] + arches = [arch] + arch_prefix, arch_sep, arch_suffix = arch.partition('_') + if arch_prefix == 'manylinux2014': + # manylinux1/manylinux2010 wheels run on most manylinux2014 systems + # with the exception of wheels depending on ncurses. PEP 599 states + # manylinux1/manylinux2010 wheels should be considered + # manylinux2014 wheels: + # https://www.python.org/dev/peps/pep-0599/#backwards-compatibility-with-manylinux2010-wheels + if arch_suffix in {'i686', 'x86_64'}: + arches.append('manylinux2010' + arch_sep + arch_suffix) + arches.append('manylinux1' + arch_sep + arch_suffix) + elif arch_prefix == 'manylinux2010': + # manylinux1 wheels run on most manylinux2010 systems with the + # exception of wheels depending on ncurses. PEP 571 states + # manylinux1 wheels should be considered manylinux2010 wheels: + # https://www.python.org/dev/peps/pep-0571/#backwards-compatibility-with-manylinux1-wheels + arches.append('manylinux1' + arch_sep + arch_suffix) + return arches + + +def _get_custom_platforms(arch): + # type: (str) -> List[str] + arch_prefix, _arch_sep, _arch_suffix = arch.partition('_') + if arch.startswith('macosx'): + arches = _mac_platforms(arch) + elif arch_prefix in ['manylinux2014', 'manylinux2010']: + arches = _custom_manylinux_platforms(arch) + else: + arches = [arch] + return arches + + +def _expand_allowed_platforms(platforms): + # type: (Optional[List[str]]) -> Optional[List[str]] + if not platforms: + return None + + seen = set() + result = [] + + for p in platforms: + if p in seen: + continue + additions = [c for c in _get_custom_platforms(p) if c not in seen] + seen.update(additions) + result.extend(additions) + + return result + + +def _get_python_version(version): + # type: (str) -> PythonVersion + if len(version) > 1: + return int(version[0]), int(version[1:]) + else: + return (int(version[0]),) + + +def _get_custom_interpreter(implementation=None, version=None): + # type: (Optional[str], Optional[str]) -> str + if implementation is None: + implementation = interpreter_name() + if version is None: + version = interpreter_version() + return "{}{}".format(implementation, version) + + +def get_supported( + version=None, # type: Optional[str] + platforms=None, # type: Optional[List[str]] + impl=None, # type: Optional[str] + abis=None # type: Optional[List[str]] +): + # type: (...) -> List[Tag] + """Return a list of supported tags for each version specified in + `versions`. + + :param version: a string version, of the form "33" or "32", + or None. The version will be assumed to support our ABI. + :param platforms: specify a list of platforms you want valid + tags for, or None. If None, use the local system platform. + :param impl: specify the exact implementation you want valid + tags for, or None. If None, use the local interpreter impl. + :param abis: specify a list of abis you want valid + tags for, or None. If None, use the local interpreter abi. + """ + supported = [] # type: List[Tag] + + python_version = None # type: Optional[PythonVersion] + if version is not None: + python_version = _get_python_version(version) + + interpreter = _get_custom_interpreter(impl, version) + + platforms = _expand_allowed_platforms(platforms) + + is_cpython = (impl or interpreter_name()) == "cp" + if is_cpython: + supported.extend( + cpython_tags( + python_version=python_version, + abis=abis, + platforms=platforms, + ) + ) + else: + supported.extend( + generic_tags( + interpreter=interpreter, + abis=abis, + platforms=platforms, + ) + ) + supported.extend( + compatible_tags( + python_version=python_version, + interpreter=interpreter, + platforms=platforms, + ) + ) + + return supported diff --git a/etc/scripts/utils_pip_compatibility_tags.py.ABOUT b/etc/scripts/utils_pip_compatibility_tags.py.ABOUT new file mode 100644 index 0000000..7bbb026 --- /dev/null +++ b/etc/scripts/utils_pip_compatibility_tags.py.ABOUT @@ -0,0 +1,14 @@ +about_resource: utils_pip_compatibility_tags.py + +type: github +namespace: pypa +name: pip +version: 20.3.1 +subpath: src/pip/_internal/utils/compatibility_tags.py + +package_url: pkg:github/pypa/pip@20.3.1#src/pip/_internal/utils/compatibility_tags.py + +download_url: https://github.com/pypa/pip/blob/20.3.1/src/pip/_internal/utils/compatibility_tags.py +copyright: Copyright (c) 2008-2020 The pip developers (see AUTHORS.txt file) +license_expression: mit +notes: subset copied from pip for tag handling \ No newline at end of file diff --git a/etc/scripts/utils_pypi_supported_tags.py b/etc/scripts/utils_pypi_supported_tags.py new file mode 100644 index 0000000..8dcb70f --- /dev/null +++ b/etc/scripts/utils_pypi_supported_tags.py @@ -0,0 +1,109 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re + +""" +Wheel platform checking + +Copied and modified on 2020-12-24 from +https://github.com/pypa/warehouse/blob/37a83dd342d9e3b3ab4f6bde47ca30e6883e2c4d/warehouse/forklift/legacy.py + +This contains the basic functions to check if a wheel file name is would be +supported for uploading to PyPI. +""" + +# These platforms can be handled by a simple static list: +_allowed_platforms = { + "any", + "win32", + "win_amd64", + "win_ia64", + "manylinux1_x86_64", + "manylinux1_i686", + "manylinux2010_x86_64", + "manylinux2010_i686", + "manylinux2014_x86_64", + "manylinux2014_i686", + "manylinux2014_aarch64", + "manylinux2014_armv7l", + "manylinux2014_ppc64", + "manylinux2014_ppc64le", + "manylinux2014_s390x", + "linux_armv6l", + "linux_armv7l", +} +# macosx is a little more complicated: +_macosx_platform_re = re.compile(r"macosx_(?P\d+)_(\d+)_(?P.*)") +_macosx_arches = { + "ppc", + "ppc64", + "i386", + "x86_64", + "arm64", + "intel", + "fat", + "fat32", + "fat64", + "universal", + "universal2", +} +_macosx_major_versions = { + "10", + "11", +} + +# manylinux pep600 is a little more complicated: +_manylinux_platform_re = re.compile(r"manylinux_(\d+)_(\d+)_(?P.*)") +_manylinux_arches = { + "x86_64", + "i686", + "aarch64", + "armv7l", + "ppc64", + "ppc64le", + "s390x", +} + + +def is_supported_platform_tag(platform_tag): + """ + Return True if the ``platform_tag`` is supported on PyPI. + """ + if platform_tag in _allowed_platforms: + return True + m = _macosx_platform_re.match(platform_tag) + if ( + m + and m.group("major") in _macosx_major_versions + and m.group("arch") in _macosx_arches + ): + return True + m = _manylinux_platform_re.match(platform_tag) + if m and m.group("arch") in _manylinux_arches: + return True + return False + + +def validate_platforms_for_pypi(platforms): + """ + Validate if the wheel platforms are supported platform tags on Pypi. Return + a list of unsupported platform tags or an empty list if all tags are + supported. + """ + + # Check that if it's a binary wheel, it's on a supported platform + invalid_tags = [] + for plat in platforms: + if not is_supported_platform_tag(plat): + invalid_tags.append(plat) + return invalid_tags diff --git a/etc/scripts/utils_pypi_supported_tags.py.ABOUT b/etc/scripts/utils_pypi_supported_tags.py.ABOUT new file mode 100644 index 0000000..228a538 --- /dev/null +++ b/etc/scripts/utils_pypi_supported_tags.py.ABOUT @@ -0,0 +1,17 @@ +about_resource: utils_pypi_supported_tags.py + +type: github +namespace: pypa +name: warehouse +version: 37a83dd342d9e3b3ab4f6bde47ca30e6883e2c4d +subpath: warehouse/forklift/legacy.py + +package_url: pkg:github/pypa/warehouse@37a83dd342d9e3b3ab4f6bde47ca30e6883e2c4d#warehouse/forklift/legacy.py + +download_url: https://github.com/pypa/warehouse/blob/37a83dd342d9e3b3ab4f6bde47ca30e6883e2c4d/warehouse/forklift/legacy.py +copyright: Copyright (c) The warehouse developers +homepage_url: https://warehouse.readthedocs.io +license_expression: apache-2.0 +notes: Wheel platform checking copied and heavily modified on 2020-12-24 from + warehouse. This contains the basic functions to check if a wheel file name is + would be supported for uploading to PyPI. diff --git a/etc/scripts/utils_requirements.py b/etc/scripts/utils_requirements.py new file mode 100644 index 0000000..ddbed61 --- /dev/null +++ b/etc/scripts/utils_requirements.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/skeleton for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +import subprocess + +""" +Utilities to manage requirements files and call pip. +NOTE: this should use ONLY the standard library and not import anything else. +""" + + +def load_requirements(requirements_file='requirements.txt', force_pinned=True): + """ + Yield package (name, version) tuples for each requirement in a `requirement` + file. Every requirement versions must be pinned if `force_pinned` is True. + Otherwise un-pinned requirements are returned with a None version + """ + with open(requirements_file) as reqs: + req_lines = reqs.read().splitlines(False) + return get_required_name_versions(req_lines, force_pinned) + + +def get_required_name_versions(requirement_lines, force_pinned=True): + """ + Yield required (name, version) tuples given a`requirement_lines` iterable of + requirement text lines. Every requirement versions must be pinned if + `force_pinned` is True. Otherwise un-pinned requirements are returned with a + None version + """ + for req_line in requirement_lines: + req_line = req_line.strip() + if not req_line or req_line.startswith('#'): + continue + if '==' not in req_line and force_pinned: + raise Exception(f'Requirement version is not pinned: {req_line}') + name = req_line + version = None + else: + name, _, version = req_line.partition('==') + name = name.lower().strip() + version = version.lower().strip() + yield name, version + + +def parse_requires(requires): + """ + Return a list of requirement lines extracted from the `requires` text from + a setup.cfg *_requires section such as the "install_requires" section. + """ + requires = [c for c in requires.splitlines(False) if c] + if not requires: + return [] + + requires = [''.join(r.split()) for r in requires if r and r.strip()] + return sorted(requires) + + +def lock_requirements(requirements_file='requirements.txt', site_packages_dir=None): + """ + Freeze and lock current installed requirements and save this to the + `requirements_file` requirements file. + """ + with open(requirements_file, 'w') as fo: + fo.write(get_installed_reqs(site_packages_dir=site_packages_dir)) + + +def lock_dev_requirements( + dev_requirements_file='requirements-dev.txt', + main_requirements_file='requirements.txt', + site_packages_dir=None, +): + """ + Freeze and lock current installed development-only requirements and save + this to the `dev_requirements_file` requirements file. Development-only is + achieved by subtracting requirements from the `main_requirements_file` + requirements file from the current requirements using package names (and + ignoring versions). + """ + main_names = {n for n, _v in load_requirements(main_requirements_file)} + all_reqs = get_installed_reqs(site_packages_dir=site_packages_dir) + all_req_lines = all_reqs.splitlines(False) + all_req_nvs = get_required_name_versions(all_req_lines) + dev_only_req_nvs = {n: v for n, v in all_req_nvs if n not in main_names} + + new_reqs = '\n'.join(f'{n}=={v}' for n, v in sorted(dev_only_req_nvs.items())) + with open(dev_requirements_file, 'w') as fo: + fo.write(new_reqs) + + +def get_installed_reqs(site_packages_dir): + """ + Return the installed pip requirements as text found in `site_packages_dir` as a text. + """ + # Also include these packages in the output with --all: wheel, distribute, setuptools, pip + args = ['pip', 'freeze', '--exclude-editable', '--all', '--path', site_packages_dir] + return subprocess.check_output(args, encoding='utf-8') diff --git a/etc/scripts/utils_thirdparty.py b/etc/scripts/utils_thirdparty.py new file mode 100644 index 0000000..444b20d --- /dev/null +++ b/etc/scripts/utils_thirdparty.py @@ -0,0 +1,2982 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/skeleton for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +from collections import defaultdict +import email +import itertools +import operator +import os +import re +import shutil +import subprocess +import tarfile +import tempfile +import time +import urllib + +import attr +import license_expression +import packageurl +import requests +import saneyaml +import utils_pip_compatibility_tags +import utils_pypi_supported_tags + +from commoncode import fileutils +from commoncode.hash import multi_checksums +from commoncode.text import python_safe_name +from packaging import tags as packaging_tags +from packaging import version as packaging_version +from utils_requirements import load_requirements + +""" +Utilities to manage Python thirparty libraries source, binaries and metadata in +local directories and remote repositories. + +- update pip requirement files from installed packages for prod. and dev. +- build and save wheels for all required packages +- also build variants for wheels with native code for all each supported + operating systems (Linux, macOS, Windows) and Python versions (3.x) + combinations using remote Ci jobs +- collect source distributions for all required packages +- keep in sync wheels, distributions, ABOUT and LICENSE files to a PyPI-like + repository (using GitHub) +- create, update and fetch ABOUT, NOTICE and LICENSE metadata for all distributions + + +Approach +-------- + +The processing is organized around these key objects: + +- A PyPiPackage represents a PyPI package with its name and version. It tracks + the downloadable Distribution objects for that version: + + - one Sdist source Distribution object + - a list of Wheel binary Distribution objects + +- A Distribution (either a Wheel or Sdist) is identified by and created from its + filename. It also has the metadata used to populate an .ABOUT file and + document origin and license. A Distribution can be fetched from Repository. + Metadata can be loaded from and dumped to ABOUT files and optionally from + DejaCode package data. + +- An Environment is a combination of a Python version and operating system. + A Wheel Distribution also has Python/OS tags is supports and these can be + supported in a given Environment. + +- Paths or URLs to "filenames" live in a Repository, either a plain + LinksRepository (an HTML page listing URLs or a local directory) or a + PypiRepository (a PyPI simple index where each package name has an HTML page + listing URLs to all distribution types and versions). + Repositories and Distributions are related through filenames. + + + The Wheel models code is partially derived from the mit-licensed pip and the + Distribution/Wheel/Sdist design has been heavily inspired by the packaging- + dists library https://github.com/uranusjr/packaging-dists by Tzu-ping Chung +""" + +TRACE = False + +# Supported environments +PYTHON_VERSIONS = '36', '37', '38', '39', '310' + +ABIS_BY_PYTHON_VERSION = { + '36':['cp36', 'cp36m'], + '37':['cp37', 'cp37m'], + '38':['cp38', 'cp38m'], + '39':['cp39', 'cp39m'], + '310':['cp310', 'cp310m'], +} + +PLATFORMS_BY_OS = { + 'linux': [ + 'linux_x86_64', + 'manylinux1_x86_64', + 'manylinux2014_x86_64', + 'manylinux2010_x86_64', + 'manylinux_2_12_x86_64', + ], + 'macos': [ + 'macosx_10_6_intel', 'macosx_10_6_x86_64', + 'macosx_10_9_intel', 'macosx_10_9_x86_64', + 'macosx_10_10_intel', 'macosx_10_10_x86_64', + 'macosx_10_11_intel', 'macosx_10_11_x86_64', + 'macosx_10_12_intel', 'macosx_10_12_x86_64', + 'macosx_10_13_intel', 'macosx_10_13_x86_64', + 'macosx_10_14_intel', 'macosx_10_14_x86_64', + 'macosx_10_15_intel', 'macosx_10_15_x86_64', + 'macosx_10_15_x86_64', + 'macosx_11_0_x86_64', + # 'macosx_11_0_arm64', + ], + 'windows': [ + 'win_amd64', + ], +} + +THIRDPARTY_DIR = 'thirdparty' +CACHE_THIRDPARTY_DIR = '.cache/thirdparty' + +REMOTE_LINKS_URL = 'https://thirdparty.aboutcode.org/pypi' + +EXTENSIONS_APP = '.pyz', +EXTENSIONS_SDIST = '.tar.gz', '.tar.bz2', '.zip', '.tar.xz', +EXTENSIONS_INSTALLABLE = EXTENSIONS_SDIST + ('.whl',) +EXTENSIONS_ABOUT = '.ABOUT', '.LICENSE', '.NOTICE', +EXTENSIONS = EXTENSIONS_INSTALLABLE + EXTENSIONS_ABOUT + EXTENSIONS_APP + +PYPI_SIMPLE_URL = 'https://pypi.org/simple' + +LICENSEDB_API_URL = 'https://scancode-licensedb.aboutcode.org' + +LICENSING = license_expression.Licensing() + +################################################################################ +# +# Fetch remote wheels and sources locally +# +################################################################################ + + +def fetch_wheels( + environment=None, + requirements_file='requirements.txt', + allow_unpinned=False, + dest_dir=THIRDPARTY_DIR, + remote_links_url=REMOTE_LINKS_URL, +): + """ + Download all of the wheel of packages listed in the ``requirements_file`` + requirements file into ``dest_dir`` directory. + + Only get wheels for the ``environment`` Enviromnent constraints. If the + provided ``environment`` is None then the current Python interpreter + environment is used implicitly. + + Only accept pinned requirements (e.g. with a version) unless + ``allow_unpinned`` is True. + + Use exclusively direct downloads from a remote repo at URL + ``remote_links_url``. If ``remote_links_url`` is a path, use this as a + directory of links instead of a URL. + + Yield tuples of (PypiPackage, error) where is None on success. + """ + missed = [] + + if not allow_unpinned: + force_pinned = True + else: + force_pinned = False + + try: + rrp = list(get_required_remote_packages( + requirements_file=requirements_file, + force_pinned=force_pinned, + remote_links_url=remote_links_url, + )) + except Exception as e: + raise Exception( + dict( + requirements_file=requirements_file, + force_pinned=force_pinned, + remote_links_url=remote_links_url, + ) + ) from e + + fetched_filenames = set() + for name, version, package in rrp: + if not package: + missed.append((name, version,)) + nv = f'{name}=={version}' if version else name + yield None, f'fetch_wheels: Missing package in remote repo: {nv}' + + else: + fetched_filename = package.fetch_wheel( + environment=environment, + fetched_filenames=fetched_filenames, + dest_dir=dest_dir, + ) + + if fetched_filename: + fetched_filenames.add(fetched_filename) + error = None + else: + if fetched_filename in fetched_filenames: + error = None + else: + error = f'Failed to fetch' + yield package, error + + if missed: + rr = get_remote_repo() + print() + print(f'===> fetch_wheels: Missed some packages') + for n, v in missed: + nv = f'{n}=={v}' if v else n + print(f'Missed package {nv} in remote repo, has only:') + for pv in rr.get_versions(n): + print(' ', pv) + raise Exception('Missed some packages in remote repo') + + +def fetch_sources( + requirements_file='requirements.txt', + allow_unpinned=False, + dest_dir=THIRDPARTY_DIR, + remote_links_url=REMOTE_LINKS_URL, +): + """ + Download all of the dependent package sources listed in the + ``requirements_file`` requirements file into ``dest_dir`` destination + directory. + + Use direct downloads to achieve this (not pip download). Use exclusively the + packages found from a remote repo at URL ``remote_links_url``. If + ``remote_links_url`` is a path, use this as a directory of links instead of + a URL. + + Only accept pinned requirements (e.g. with a version) unless + ``allow_unpinned`` is True. + + Yield tuples of (PypiPackage, error message) for each package where error + message will empty on success. + """ + missed = [] + + if not allow_unpinned: + force_pinned = True + else: + force_pinned = False + + rrp = list(get_required_remote_packages( + requirements_file=requirements_file, + force_pinned=force_pinned, + remote_links_url=remote_links_url, + )) + + for name, version, package in rrp: + if not package: + missed.append((name, name,)) + nv = f'{name}=={version}' if version else name + yield None, f'fetch_sources: Missing package in remote repo: {nv}' + + elif not package.sdist: + yield package, f'Missing sdist in links' + + else: + fetched = package.fetch_sdist(dest_dir=dest_dir) + error = f'Failed to fetch' if not fetched else None + yield package, error + if missed: + raise Exception(f'Missing source packages in {remote_links_url}', missed) + +################################################################################ +# +# Core models +# +################################################################################ + + +@attr.attributes +class NameVer: + name = attr.ib( + type=str, + metadata=dict(help='Python package name, lowercase and normalized.'), + ) + + version = attr.ib( + type=str, + metadata=dict(help='Python package version string.'), + ) + + @property + def normalized_name(self): + return NameVer.normalize_name(self.name) + + @staticmethod + def normalize_name(name): + """ + Return a normalized package name per PEP503, and copied from + https://www.python.org/dev/peps/pep-0503/#id4 + """ + return name and re.sub(r"[-_.]+", "-", name).lower() or name + + @staticmethod + def standardize_name(name): + """ + Return a standardized package name, e.g. lowercased and using - not _ + """ + return name and re.sub(r"[-_]+", "-", name).lower() or name + + @property + def name_ver(self): + return f'{self.name}-{self.version}' + + def sortable_name_version(self): + """ + Return a tuple of values to sort by name, then version. + This method is a suitable to use as key for sorting NameVer instances. + """ + return self.normalized_name, packaging_version.parse(self.version) + + @classmethod + def sorted(cls, namevers): + return sorted(namevers, key=cls.sortable_name_version) + + +@attr.attributes +class Distribution(NameVer): + + # field names that can be updated from another dist of mapping + updatable_fields = [ + 'license_expression', + 'copyright', + 'description', + 'homepage_url', + 'primary_language', + 'notice_text', + 'extra_data', + ] + + filename = attr.ib( + repr=False, + type=str, + default='', + metadata=dict(help='File name.'), + ) + + path_or_url = attr.ib( + repr=False, + type=str, + default='', + metadata=dict(help='Path or download URL.'), + ) + + sha256 = attr.ib( + repr=False, + type=str, + default='', + metadata=dict(help='SHA256 checksum.'), + ) + + sha1 = attr.ib( + repr=False, + type=str, + default='', + metadata=dict(help='SHA1 checksum.'), + ) + + md5 = attr.ib( + repr=False, + type=int, + default=0, + metadata=dict(help='MD5 checksum.'), + ) + + type = attr.ib( + repr=False, + type=str, + default='pypi', + metadata=dict(help='Package type'), + ) + + namespace = attr.ib( + repr=False, + type=str, + default='', + metadata=dict(help='Package URL namespace'), + ) + + qualifiers = attr.ib( + repr=False, + type=dict, + default=attr.Factory(dict), + metadata=dict(help='Package URL qualifiers'), + ) + + subpath = attr.ib( + repr=False, + type=str, + default='', + metadata=dict(help='Package URL subpath'), + ) + + size = attr.ib( + repr=False, + type=str, + default='', + metadata=dict(help='Size in bytes.'), + ) + + primary_language = attr.ib( + repr=False, + type=str, + default='Python', + metadata=dict(help='Primary Programming language.'), + ) + + description = attr.ib( + repr=False, + type=str, + default='', + metadata=dict(help='Description.'), + ) + + homepage_url = attr.ib( + repr=False, + type=str, + default='', + metadata=dict(help='Homepage URL'), + ) + + notes = attr.ib( + repr=False, + type=str, + default='', + metadata=dict(help='Notes.'), + ) + + copyright = attr.ib( + repr=False, + type=str, + default='', + metadata=dict(help='Copyright.'), + ) + + license_expression = attr.ib( + repr=False, + type=str, + default='', + metadata=dict(help='License expression'), + ) + + licenses = attr.ib( + repr=False, + type=list, + default=attr.Factory(list), + metadata=dict(help='List of license mappings.'), + ) + + notice_text = attr.ib( + repr=False, + type=str, + default='', + metadata=dict(help='Notice text'), + ) + + extra_data = attr.ib( + repr=False, + type=dict, + default=attr.Factory(dict), + metadata=dict(help='Extra data'), + ) + + @property + def package_url(self): + """ + Return a Package URL string of self. + """ + return str(packageurl.PackageURL(**self.purl_identifiers())) + + @property + def download_url(self): + if self.path_or_url and self.path_or_url.startswith('https://'): + return self.path_or_url + else: + return self.get_best_download_url() + + @property + def about_filename(self): + return f'{self.filename}.ABOUT' + + def has_about_file(self, dest_dir=THIRDPARTY_DIR): + return os.path.exists(os.path.join(dest_dir, self.about_filename)) + + @property + def about_download_url(self): + return self.build_remote_download_url(self.about_filename) + + @property + def notice_filename(self): + return f'{self.filename}.NOTICE' + + @property + def notice_download_url(self): + return self.build_remote_download_url(self.notice_filename) + + @classmethod + def from_path_or_url(cls, path_or_url): + """ + Return a distribution built from the data found in the filename of a + `path_or_url` string. Raise an exception if this is not a valid + filename. + """ + filename = os.path.basename(path_or_url.strip('/')) + dist = cls.from_filename(filename) + dist.path_or_url = path_or_url + return dist + + @classmethod + def get_dist_class(cls, filename): + if filename.endswith('.whl'): + return Wheel + elif filename.endswith(('.zip', '.tar.gz',)): + return Sdist + raise InvalidDistributionFilename(filename) + + @classmethod + def from_filename(cls, filename): + """ + Return a distribution built from the data found in a `filename` string. + Raise an exception if this is not a valid filename + """ + clazz = cls.get_dist_class(filename) + return clazz.from_filename(filename) + + @classmethod + def from_data(cls, data, keep_extra=False): + """ + Return a distribution built from a `data` mapping. + """ + filename = data['filename'] + dist = cls.from_filename(filename) + dist.update(data, keep_extra=keep_extra) + return dist + + @classmethod + def from_dist(cls, data, dist): + """ + Return a distribution built from a `data` mapping and update it with data + from another dist Distribution. Return None if it cannot be created + """ + # We can only create from a dist of the same package + has_same_key_fields = all(data.get(kf) == getattr(dist, kf, None) + for kf in ('type', 'namespace', 'name') + ) + if not has_same_key_fields: + print(f'Missing key fields: Cannot derive a new dist from data: {data} and dist: {dist}') + return + + has_key_field_values = all(data.get(kf) for kf in ('type', 'name', 'version')) + if not has_key_field_values: + print(f'Missing key field values: Cannot derive a new dist from data: {data} and dist: {dist}') + return + + data = dict(data) + # do not overwrite the data with the other dist + # only supplement + data.update({k: v for k, v in dist.get_updatable_data().items() if not data.get(k)}) + return cls.from_data(data) + + @classmethod + def build_remote_download_url(cls, filename, base_url=REMOTE_LINKS_URL): + """ + Return a direct download URL for a file in our remote repo + """ + return f'{base_url}/{filename}' + + def get_best_download_url(self): + """ + Return the best download URL for this distribution where best means that + PyPI is better and our own remote repo URLs are second. + If none is found, return a synthetic remote URL. + """ + name = self.normalized_name + version = self.version + filename = self.filename + + pypi_package = get_pypi_package(name=name, version=version) + if pypi_package: + pypi_url = pypi_package.get_url_for_filename(filename) + if pypi_url: + return pypi_url + + remote_package = get_remote_package(name=name, version=version) + if remote_package: + remote_url = remote_package.get_url_for_filename(filename) + if remote_url: + return remote_url + else: + # the package may not have been published yet, so we craft a URL + # using our remote base URL + return self.build_remote_download_url(self.filename) + + def purl_identifiers(self, skinny=False): + """ + Return a mapping of non-empty identifier name/values for the purl + fields. If skinny is True, only inlucde type, namespace and name. + """ + identifiers = dict( + type=self.type, + namespace=self.namespace, + name=self.name, + ) + + if not skinny: + identifiers.update( + version=self.version, + subpath=self.subpath, + qualifiers=self.qualifiers, + ) + + return {k: v for k, v in sorted(identifiers.items()) if v} + + def identifiers(self, purl_as_fields=True): + """ + Return a mapping of non-empty identifier name/values. + Return each purl fields separately if purl_as_fields is True. + Otherwise return a package_url string for the purl. + """ + if purl_as_fields: + identifiers = self.purl_identifiers() + else: + identifiers = dict(package_url=self.package_url) + + identifiers.update( + download_url=self.download_url, + filename=self.filename, + md5=self.md5, + sha1=self.sha1, + package_url=self.package_url, + ) + + return {k: v for k, v in sorted(identifiers.items()) if v} + + def has_key_metadata(self): + """ + Return True if this distribution has key metadata required for basic attribution. + """ + if self.license_expression == 'public-domain': + # copyright not needed + return True + return self.license_expression and self.copyright and self.path_or_url + + def to_about(self): + """ + Return a mapping of ABOUT data from this distribution fields. + """ + about_data = dict( + about_resource=self.filename, + checksum_md5=self.md5, + checksum_sha1=self.sha1, + copyright=self.copyright, + description=self.description, + download_url=self.download_url, + homepage_url=self.homepage_url, + license_expression=self.license_expression, + name=self.name, + namespace=self.namespace, + notes=self.notes, + notice_file=self.notice_filename if self.notice_text else '', + package_url=self.package_url, + primary_language=self.primary_language, + qualifiers=self.qualifiers, + size=self.size, + subpath=self.subpath, + type=self.type, + version=self.version, + ) + + about_data.update(self.extra_data) + about_data = {k: v for k, v in sorted(about_data.items()) if v} + return about_data + + def to_dict(self): + """ + Return a mapping data from this distribution. + """ + return {k: v for k, v in attr.asdict(self).items() if v} + + def save_about_and_notice_files(self, dest_dir=THIRDPARTY_DIR): + """ + Save a .ABOUT file to `dest_dir`. Include a .NOTICE file if there is a + notice_text. + """ + + def save_if_modified(location, content): + if os.path.exists(location): + with open(location) as fi: + existing_content = fi.read() + if existing_content == content: + return False + + if TRACE: print(f'Saving ABOUT (and NOTICE) files for: {self}') + with open(location, 'w') as fo: + fo.write(content) + return True + + save_if_modified( + location=os.path.join(dest_dir, self.about_filename), + content=saneyaml.dump(self.to_about()), + ) + + notice_text = self.notice_text and self.notice_text.strip() + if notice_text: + save_if_modified( + location=os.path.join(dest_dir, self.notice_filename), + content=notice_text, + ) + + def load_about_data(self, about_filename_or_data=None, dest_dir=THIRDPARTY_DIR): + """ + Update self with ABOUT data loaded from an `about_filename_or_data` + which is either a .ABOUT file in `dest_dir` or an ABOUT data mapping. + `about_filename_or_data` defaults to this distribution default ABOUT + filename if not provided. Load the notice_text if present from dest_dir. + """ + if not about_filename_or_data: + about_filename_or_data = self.about_filename + + if isinstance(about_filename_or_data, str): + # that's an about_filename + about_path = os.path.join(dest_dir, about_filename_or_data) + if os.path.exists(about_path): + with open(about_path) as fi: + about_data = saneyaml.load(fi.read()) + if not about_data: + return False + else: + return False + else: + about_data = about_filename_or_data + + md5 = about_data.pop('checksum_md5', None) + if md5: + about_data['md5'] = md5 + sha1 = about_data.pop('checksum_sha1', None) + if sha1: + about_data['sha1'] = sha1 + sha256 = about_data.pop('checksum_sha256', None) + if sha256: + about_data['sha256'] = sha256 + + about_data.pop('about_resource', None) + notice_text = about_data.pop('notice_text', None) + notice_file = about_data.pop('notice_file', None) + if notice_text: + about_data['notice_text'] = notice_text + elif notice_file: + notice_loc = os.path.join(dest_dir, notice_file) + if os.path.exists(notice_loc): + with open(notice_loc) as fi: + about_data['notice_text'] = fi.read() + return self.update(about_data, keep_extra=True) + + def load_remote_about_data(self): + """ + Fetch and update self with "remote" data Distribution ABOUT file and + NOTICE file if any. Return True if the data was updated. + """ + try: + about_text = fetch_content_from_path_or_url_through_cache(self.about_download_url) + except RemoteNotFetchedException: + return False + + if not about_text: + return False + + about_data = saneyaml.load(about_text) + notice_file = about_data.pop('notice_file', None) + if notice_file: + try: + notice_text = fetch_content_from_path_or_url_through_cache(self.notice_download_url) + if notice_text: + about_data['notice_text'] = notice_text + except RemoteNotFetchedException: + print(f'Failed to fetch NOTICE file: {self.notice_download_url}') + return self.load_about_data(about_data) + + def get_checksums(self, dest_dir=THIRDPARTY_DIR): + """ + Return a mapping of computed checksums for this dist filename is + `dest_dir`. + """ + dist_loc = os.path.join(dest_dir, self.filename) + if os.path.exists(dist_loc): + return multi_checksums(dist_loc, checksum_names=('md5', 'sha1', 'sha256')) + else: + return {} + + def set_checksums(self, dest_dir=THIRDPARTY_DIR): + """ + Update self with checksums computed for this dist filename is `dest_dir`. + """ + self.update(self.get_checksums(dest_dir), overwrite=True) + + def validate_checksums(self, dest_dir=THIRDPARTY_DIR): + """ + Return True if all checksums that have a value in this dist match + checksums computed for this dist filename is `dest_dir`. + """ + real_checksums = self.get_checksums(dest_dir) + for csk in ('md5', 'sha1', 'sha256'): + csv = getattr(self, csk) + rcv = real_checksums.get(csk) + if csv and rcv and csv != rcv: + return False + return True + + def get_pip_hash(self): + """ + Return a pip hash option string as used in requirements for this dist. + """ + assert self.sha256, f'Missinh SHA256 for dist {self}' + return f'--hash=sha256:{self.sha256}' + + def get_license_keys(self): + try: + keys = LICENSING.license_keys(self.license_expression, unique=True, simple=True) + except license_expression.ExpressionParseError: + return ['unknown'] + return keys + + def fetch_license_files(self, dest_dir=THIRDPARTY_DIR): + """ + Fetch license files is missing in `dest_dir`. + Return True if license files were fetched. + """ + paths_or_urls = get_remote_repo().links + errors = [] + extra_lic_names = [l.get('file') for l in self.extra_data.get('licenses', {})] + extra_lic_names += [self.extra_data.get('license_file')] + extra_lic_names = [ln for ln in extra_lic_names if ln] + lic_names = [ f'{key}.LICENSE' for key in self.get_license_keys()] + for filename in lic_names + extra_lic_names: + floc = os.path.join(dest_dir, filename) + if os.path.exists(floc): + continue + + try: + # try remotely first + lic_url = get_link_for_filename( + filename=filename, paths_or_urls=paths_or_urls) + + fetch_and_save_path_or_url( + filename=filename, + dest_dir=dest_dir, + path_or_url=lic_url, + as_text=True, + ) + if TRACE: print(f'Fetched license from remote: {lic_url}') + + except: + try: + # try licensedb second + lic_url = f'{LICENSEDB_API_URL}/{filename}' + fetch_and_save_path_or_url( + filename=filename, + dest_dir=dest_dir, + path_or_url=lic_url, + as_text=True, + ) + if TRACE: print(f'Fetched license from licensedb: {lic_url}') + + except: + msg = f'No text for license {filename} in expression "{self.license_expression}" from {self}' + print(msg) + errors.append(msg) + + return errors + + def extract_pkginfo(self, dest_dir=THIRDPARTY_DIR): + """ + Return the text of the first PKG-INFO or METADATA file found in the + archive of this Distribution in `dest_dir`. Return None if not found. + """ + fmt = 'zip' if self.filename.endswith('.whl') else None + dist = os.path.join(dest_dir, self.filename) + with tempfile.TemporaryDirectory(prefix='pypi-tmp-extract') as td: + shutil.unpack_archive(filename=dist, extract_dir=td, format=fmt) + # NOTE: we only care about the first one found in the dist + # which may not be 100% right + for pi in fileutils.resource_iter(location=td, with_dirs=False): + if pi.endswith(('PKG-INFO', 'METADATA',)): + with open(pi) as fi: + return fi.read() + + def load_pkginfo_data(self, dest_dir=THIRDPARTY_DIR): + """ + Update self with data loaded from the PKG-INFO file found in the + archive of this Distribution in `dest_dir`. + """ + pkginfo_text = self.extract_pkginfo(dest_dir=dest_dir) + if not pkginfo_text: + print(f'!!!!PKG-INFO not found in {self.filename}') + return + raw_data = email.message_from_string(pkginfo_text) + + classifiers = raw_data.get_all('Classifier') or [] + + declared_license = [raw_data['License']] + [c for c in classifiers if c.startswith('License')] + license_expression = compute_normalized_license_expression(declared_license) + other_classifiers = [c for c in classifiers if not c.startswith('License')] + + holder = raw_data['Author'] + holder_contact = raw_data['Author-email'] + copyright_statement = f'Copyright (c) {holder} <{holder_contact}>' + + pkginfo_data = dict( + name=raw_data['Name'], + declared_license=declared_license, + version=raw_data['Version'], + description=raw_data['Summary'], + homepage_url=raw_data['Home-page'], + copyright=copyright_statement, + license_expression=license_expression, + holder=holder, + holder_contact=holder_contact, + keywords=raw_data['Keywords'], + classifiers=other_classifiers, + ) + + return self.update(pkginfo_data, keep_extra=True) + + def update_from_other_dist(self, dist): + """ + Update self using data from another dist + """ + return self.update(dist.get_updatable_data()) + + def get_updatable_data(self, data=None): + data = data or self.to_dict() + return { + k: v for k, v in data.items() + if v and k in self.updatable_fields + } + + def update(self, data, overwrite=False, keep_extra=True): + """ + Update self with a mapping of `data`. Keep unknown data as extra_data if + `keep_extra` is True. If `overwrite` is True, overwrite self with `data` + Return True if any data was updated, False otherwise. Raise an exception + if there are key data conflicts. + """ + package_url = data.get('package_url') + if package_url: + purl_from_data = packageurl.PackageURL.from_string(package_url) + purl_from_self = packageurl.PackageURL.from_string(self.package_url) + if purl_from_data != purl_from_self: + print( + f'Invalid dist update attempt, no same same purl with dist: ' + f'{self} using data {data}.') + return + + data.pop('about_resource', None) + dl = data.pop('download_url', None) + if dl: + data['path_or_url'] = dl + + updated = False + extra = {} + for k, v in data.items(): + if isinstance(v, str): + v = v.strip() + if not v: + continue + + if hasattr(self, k): + value = getattr(self, k, None) + if not value or (overwrite and value != v): + try: + setattr(self, k, v) + except Exception as e: + raise Exception(f'{self}, {k}, {v}') from e + updated = True + + elif keep_extra: + # note that we always overwrite extra + extra[k] = v + updated = True + + self.extra_data.update(extra) + + return updated + + +class InvalidDistributionFilename(Exception): + pass + + +@attr.attributes +class Sdist(Distribution): + + extension = attr.ib( + repr=False, + type=str, + default='', + metadata=dict(help='File extension, including leading dot.'), + ) + + @classmethod + def from_filename(cls, filename): + """ + Return a Sdist object built from a filename. + Raise an exception if this is not a valid sdist filename + """ + name_ver = None + extension = None + + for ext in EXTENSIONS_SDIST: + if filename.endswith(ext): + name_ver, extension, _ = filename.rpartition(ext) + break + + if not extension or not name_ver: + raise InvalidDistributionFilename(filename) + + name, _, version = name_ver.rpartition('-') + + if not name or not version: + raise InvalidDistributionFilename(filename) + + return cls( + type='pypi', + name=name, + version=version, + extension=extension, + filename=filename, + ) + + def to_filename(self): + """ + Return an sdist filename reconstructed from its fields (that may not be + the same as the original filename.) + """ + return f'{self.name}-{self.version}.{self.extension}' + + +@attr.attributes +class Wheel(Distribution): + + """ + Represents a wheel file. + + Copied and heavily modified from pip-20.3.1 copied from pip-20.3.1 + pip/_internal/models/wheel.py + + name: pip compatibility tags + version: 20.3.1 + download_url: https://github.com/pypa/pip/blob/20.3.1/src/pip/_internal/models/wheel.py + copyright: Copyright (c) 2008-2020 The pip developers (see AUTHORS.txt file) + license_expression: mit + notes: copied from pip-20.3.1 pip/_internal/models/wheel.py + + Copyright (c) 2008-2020 The pip developers (see AUTHORS.txt file) + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + """ + + get_wheel_from_filename = re.compile( + r"""^(?P(?P.+?)-(?P.*?)) + ((-(?P\d[^-]*?))?-(?P.+?)-(?P.+?)-(?P.+?) + \.whl)$""", + re.VERBOSE + ).match + + build = attr.ib( + type=str, + default='', + metadata=dict(help='Python wheel build.'), + ) + + python_versions = attr.ib( + type=list, + default=attr.Factory(list), + metadata=dict(help='List of wheel Python version tags.'), + ) + + abis = attr.ib( + type=list, + default=attr.Factory(list), + metadata=dict(help='List of wheel ABI tags.'), + ) + + platforms = attr.ib( + type=list, + default=attr.Factory(list), + metadata=dict(help='List of wheel platform tags.'), + ) + + tags = attr.ib( + repr=False, + type=set, + default=attr.Factory(set), + metadata=dict(help='Set of all tags for this wheel.'), + ) + + @classmethod + def from_filename(cls, filename): + """ + Return a wheel object built from a filename. + Raise an exception if this is not a valid wheel filename + """ + wheel_info = cls.get_wheel_from_filename(filename) + if not wheel_info: + raise InvalidDistributionFilename(filename) + + name = wheel_info.group('name').replace('_', '-') + # we'll assume "_" means "-" due to wheel naming scheme + # (https://github.com/pypa/pip/issues/1150) + version = wheel_info.group('ver').replace('_', '-') + build = wheel_info.group('build') + python_versions = wheel_info.group('pyvers').split('.') + abis = wheel_info.group('abis').split('.') + platforms = wheel_info.group('plats').split('.') + + # All the tag combinations from this file + tags = { + packaging_tags.Tag(x, y, z) for x in python_versions + for y in abis for z in platforms + } + + return cls( + filename=filename, + type='pypi', + name=name, + version=version, + build=build, + python_versions=python_versions, + abis=abis, + platforms=platforms, + tags=tags, + ) + + def is_supported_by_tags(self, tags): + """ + Return True is this wheel is compatible with one of a list of PEP 425 tags. + """ + return not self.tags.isdisjoint(tags) + + def is_supported_by_environment(self, environment): + """ + Return True if this wheel is compatible with the Environment + `environment`. + """ + return not self.is_supported_by_tags(environment.tags) + + def to_filename(self): + """ + Return a wheel filename reconstructed from its fields (that may not be + the same as the original filename.) + """ + build = f'-{self.build}' if self.build else '' + pyvers = '.'.join(self.python_versions) + abis = '.'.join(self.abis) + plats = '.'.join(self.platforms) + return f'{self.name}-{self.version}{build}-{pyvers}-{abis}-{plats}.whl' + + def is_pure(self): + """ + Return True if wheel `filename` is for a "pure" wheel e.g. a wheel that + runs on all Pythons 3 and all OSes. + + For example:: + + >>> Wheel.from_filename('aboutcode_toolkit-5.1.0-py2.py3-none-any.whl').is_pure() + True + >>> Wheel.from_filename('beautifulsoup4-4.7.1-py3-none-any.whl').is_pure() + True + >>> Wheel.from_filename('beautifulsoup4-4.7.1-py2-none-any.whl').is_pure() + False + >>> Wheel.from_filename('bitarray-0.8.1-cp36-cp36m-win_amd64.whl').is_pure() + False + >>> Wheel.from_filename('extractcode_7z-16.5-py2.py3-none-macosx_10_13_intel.whl').is_pure() + False + >>> Wheel.from_filename('future-0.16.0-cp36-none-any.whl').is_pure() + False + >>> Wheel.from_filename('foo-4.7.1-py3-none-macosx_10_13_intel.whl').is_pure() + False + >>> Wheel.from_filename('future-0.16.0-py3-cp36m-any.whl').is_pure() + False + """ + return ( + 'py3' in self.python_versions + and 'none' in self.abis + and 'any' in self.platforms + ) + + +def is_pure_wheel(filename): + try: + return Wheel.from_filename(filename).is_pure() + except: + return False + + +@attr.attributes +class PypiPackage(NameVer): + """ + A Python package with its "distributions", e.g. wheels and source + distribution , ABOUT files and licenses or notices. + """ + sdist = attr.ib( + repr=False, + type=str, + default='', + metadata=dict(help='Sdist source distribution for this package.'), + ) + + wheels = attr.ib( + repr=False, + type=list, + default=attr.Factory(list), + metadata=dict(help='List of Wheel for this package'), + ) + + @property + def specifier(self): + """ + A requirement specifier for this package + """ + if self.version: + return f'{self.name}=={self.version}' + else: + return self.name + + @property + def specifier_with_hashes(self): + """ + Return a requirement specifier for this package with --hash options for + all its distributions + """ + items = [self.specifier] + items += [d.get_pip_hashes() for d in self.get_distributions()] + return ' \\\n '.join(items) + + def get_supported_wheels(self, environment): + """ + Yield all the Wheel of this package supported and compatible with the + Environment `environment`. + """ + envt_tags = environment.tags() + for wheel in self.wheels: + if wheel.is_supported_by_tags(envt_tags): + yield wheel + + @classmethod + def package_from_dists(cls, dists): + """ + Return a new PypiPackage built from an iterable of Wheels and Sdist + objects all for the same package name and version. + + For example: + >>> w1 = Wheel(name='bitarray', version='0.8.1', build='', + ... python_versions=['cp36'], abis=['cp36m'], + ... platforms=['linux_x86_64']) + >>> w2 = Wheel(name='bitarray', version='0.8.1', build='', + ... python_versions=['cp36'], abis=['cp36m'], + ... platforms=['macosx_10_9_x86_64', 'macosx_10_10_x86_64']) + >>> sd = Sdist(name='bitarray', version='0.8.1') + >>> package = PypiPackage.package_from_dists(dists=[w1, w2, sd]) + >>> assert package.name == 'bitarray' + >>> assert package.version == '0.8.1' + >>> assert package.sdist == sd + >>> assert package.wheels == [w1, w2] + """ + dists = list(dists) + if not dists: + return + + reference_dist = dists[0] + normalized_name = reference_dist.normalized_name + version = reference_dist.version + + package = PypiPackage(name=normalized_name, version=version) + + for dist in dists: + if dist.normalized_name != normalized_name or dist.version != version: + if TRACE: + print( + f' Skipping inconsistent dist name and version: {dist} ' + f'Expected instead package name: {normalized_name} and version: "{version}"' + ) + continue + + if isinstance(dist, Sdist): + package.sdist = dist + + elif isinstance(dist, Wheel): + package.wheels.append(dist) + + else: + raise Exception(f'Unknown distribution type: {dist}') + + return package + + @classmethod + def packages_from_one_path_or_url(cls, path_or_url): + """ + Yield PypiPackages built from files found in at directory path or the + URL to an HTML page (that will be fetched). + """ + extracted_paths_or_urls = get_paths_or_urls(path_or_url) + return cls.packages_from_many_paths_or_urls(extracted_paths_or_urls) + + @classmethod + def packages_from_many_paths_or_urls(cls, paths_or_urls): + """ + Yield PypiPackages built from a list of paths or URLs. + """ + dists = cls.get_dists(paths_or_urls) + dists = NameVer.sorted(dists) + + for _projver, dists_of_package in itertools.groupby( + dists, key=NameVer.sortable_name_version, + ): + yield PypiPackage.package_from_dists(dists_of_package) + + @classmethod + def get_versions_from_path_or_url(cls, name, path_or_url): + """ + Return a subset list from a list of PypiPackages version at `path_or_url` + that match PypiPackage `name`. + """ + packages = cls.packages_from_one_path_or_url(path_or_url) + return cls.get_versions(name, packages) + + @classmethod + def get_versions(cls, name, packages): + """ + Return a subset list of package versions from a list of `packages` that + match PypiPackage `name`. + The list is sorted by version from oldest to most recent. + """ + norm_name = NameVer.normalize_name(name) + versions = [p for p in packages if p.normalized_name == norm_name] + return cls.sorted(versions) + + @classmethod + def get_latest_version(cls, name, packages): + """ + Return the latest version of PypiPackage `name` from a list of `packages`. + """ + versions = cls.get_versions(name, packages) + if not versions: + return + return versions[-1] + + @classmethod + def get_outdated_versions(cls, name, packages): + """ + Return all versions except the latest version of PypiPackage `name` from a + list of `packages`. + """ + versions = cls.get_versions(name, packages) + return versions[:-1] + + @classmethod + def get_name_version(cls, name, version, packages): + """ + Return the PypiPackage with `name` and `version` from a list of `packages` + or None if it is not found. + If `version` is None, return the latest version found. + """ + if version is None: + return cls.get_latest_version(name, packages) + + nvs = [p for p in cls.get_versions(name, packages) if p.version == version] + + if not nvs: + return + + if len(nvs) == 1: + return nvs[0] + + raise Exception(f'More than one PypiPackage with {name}=={version}') + + def fetch_wheel( + self, + environment=None, + fetched_filenames=None, + dest_dir=THIRDPARTY_DIR, + ): + """ + Download a binary wheel of this package matching the ``environment`` + Enviromnent constraints into ``dest_dir`` directory. + + Return the wheel filename if it was fetched, None otherwise. + + If the provided ``environment`` is None then the current Python + interpreter environment is used implicitly. Do not refetch wheel if + their name is in a provided ``fetched_filenames`` set. + """ + fetched_wheel_filename = None + if fetched_filenames is not None: + fetched_filenames = fetched_filenames + else: + fetched_filenames = set() + + for wheel in self.get_supported_wheels(environment): + + if wheel.filename not in fetched_filenames: + fetch_and_save_path_or_url( + filename=wheel.filename, + path_or_url=wheel.path_or_url, + dest_dir=dest_dir, + as_text=False, + ) + fetched_filenames.add(wheel.filename) + fetched_wheel_filename = wheel.filename + + # TODO: what if there is more than one? + break + + return fetched_wheel_filename + + def fetch_sdist(self, dest_dir=THIRDPARTY_DIR): + """ + Download the source distribution into `dest_dir` directory. Return the + fetched filename if it was fetched, False otherwise. + """ + if self.sdist: + assert self.sdist.filename + if TRACE: print('Fetching source for package:', self.name, self.version) + fetch_and_save_path_or_url( + filename=self.sdist.filename, + dest_dir=dest_dir, + path_or_url=self.sdist.path_or_url, + as_text=False, + ) + if TRACE: print(' --> file:', self.sdist.filename) + return self.sdist.filename + else: + print(f'Missing sdist for: {self.name}=={self.version}') + return False + + def delete_files(self, dest_dir=THIRDPARTY_DIR): + """ + Delete all PypiPackage files from `dest_dir` including wheels, sdist and + their ABOUT files. Note that we do not delete licenses since they can be + shared by several packages: therefore this would be done elsewhere in a + function that is aware of all used licenses. + """ + for to_delete in self.wheels + [self.sdist]: + if not to_delete: + continue + tdfn = to_delete.filename + for deletable in [tdfn, f'{tdfn}.ABOUT', f'{tdfn}.NOTICE']: + target = os.path.join(dest_dir, deletable) + if os.path.exists(target): + print(f'Deleting outdated {target}') + fileutils.delete(target) + + @classmethod + def get_dists(cls, paths_or_urls): + """ + Return a list of Distribution given a list of + `paths_or_urls` to wheels or source distributions. + + Each Distribution receives two extra attributes: + - the path_or_url it was created from + - its filename + + For example: + >>> paths_or_urls =''' + ... /home/foo/bitarray-0.8.1-cp36-cp36m-linux_x86_64.whl + ... bitarray-0.8.1-cp36-cp36m-macosx_10_9_x86_64.macosx_10_10_x86_64.whl + ... bitarray-0.8.1-cp36-cp36m-win_amd64.whl + ... httsp://example.com/bar/bitarray-0.8.1.tar.gz + ... bitarray-0.8.1.tar.gz.ABOUT bit.LICENSE'''.split() + >>> result = list(PypiPackage.get_dists(paths_or_urls)) + >>> for r in results: + ... r.filename = '' + ... r.path_or_url = '' + >>> expected = [ + ... Wheel(name='bitarray', version='0.8.1', build='', + ... python_versions=['cp36'], abis=['cp36m'], + ... platforms=['linux_x86_64']), + ... Wheel(name='bitarray', version='0.8.1', build='', + ... python_versions=['cp36'], abis=['cp36m'], + ... platforms=['macosx_10_9_x86_64', 'macosx_10_10_x86_64']), + ... Wheel(name='bitarray', version='0.8.1', build='', + ... python_versions=['cp36'], abis=['cp36m'], + ... platforms=['win_amd64']), + ... Sdist(name='bitarray', version='0.8.1') + ... ] + >>> assert expected == result + """ + installable = [f for f in paths_or_urls if f.endswith(EXTENSIONS_INSTALLABLE)] + for path_or_url in installable: + try: + yield Distribution.from_path_or_url(path_or_url) + except InvalidDistributionFilename: + if TRACE: + print(f'Skipping invalid distribution from: {path_or_url}') + continue + + def get_distributions(self): + """ + Yield all distributions available for this PypiPackage + """ + if self.sdist: + yield self.sdist + for wheel in self.wheels: + yield wheel + + def get_url_for_filename(self, filename): + """ + Return the URL for this filename or None. + """ + for dist in self.get_distributions(): + if dist.filename == filename: + return dist.path_or_url + + +@attr.attributes +class Environment: + """ + An Environment describes a target installation environment with its + supported Python version, ABI, platform, implementation and related + attributes. We can use these to pass as `pip download` options and force + fetching only the subset of packages that match these Environment + constraints as opposed to the current running Python interpreter + constraints. + """ + + python_version = attr.ib( + type=str, + default='', + metadata=dict(help='Python version supported by this environment.'), + ) + + operating_system = attr.ib( + type=str, + default='', + metadata=dict(help='operating system supported by this environment.'), + ) + + implementation = attr.ib( + type=str, + default='cp', + metadata=dict(help='Python implementation supported by this environment.'), + ) + + abis = attr.ib( + type=list, + default=attr.Factory(list), + metadata=dict(help='List of ABI tags supported by this environment.'), + ) + + platforms = attr.ib( + type=list, + default=attr.Factory(list), + metadata=dict(help='List of platform tags supported by this environment.'), + ) + + @classmethod + def from_pyver_and_os(cls, python_version, operating_system): + if '.' in python_version: + python_version = ''.join(python_version.split('.')) + + return cls( + python_version=python_version, + implementation='cp', + abis=ABIS_BY_PYTHON_VERSION[python_version], + platforms=PLATFORMS_BY_OS[operating_system], + operating_system=operating_system, + ) + + def get_pip_cli_options(self): + """ + Return a list of pip command line options for this environment. + """ + options = [ + '--python-version', self.python_version, + '--implementation', self.implementation, + '--abi', self.abi, + ] + for platform in self.platforms: + options.extend(['--platform', platform]) + return options + + def tags(self): + """ + Return a set of all the PEP425 tags supported by this environment. + """ + return set(utils_pip_compatibility_tags.get_supported( + version=self.python_version or None, + impl=self.implementation or None, + platforms=self.platforms or None, + abis=self.abis or None, + )) + +################################################################################ +# +# PyPI repo and link index for package wheels and sources +# +################################################################################ + + +@attr.attributes +class Repository: + """ + A PyPI or links Repository of Python packages: wheels, sdist, ABOUT, etc. + """ + + packages_by_normalized_name = attr.ib( + type=dict, + default=attr.Factory(lambda: defaultdict(list)), + metadata=dict(help= + 'Mapping of {package name: [package objects]} available in this repo'), + ) + + packages_by_normalized_name_version = attr.ib( + type=dict, + default=attr.Factory(dict), + metadata=dict(help= + 'Mapping of {(name, version): package object} available in this repo'), + ) + + def get_links(self, *args, **kwargs): + raise NotImplementedError() + + def get_versions(self, name): + """ + Return a list of all available PypiPackage version for this package name. + The list may be empty. + """ + raise NotImplementedError() + + def get_package(self, name, version): + """ + Return the PypiPackage with name and version or None. + """ + raise NotImplementedError() + + def get_latest_version(self, name): + """ + Return the latest PypiPackage version for this package name or None. + """ + raise NotImplementedError() + + +@attr.attributes +class LinksRepository(Repository): + """ + Represents a simple links repository which is either a local directory with + Python wheels and sdist or a remote URL to an HTML with links to these. + (e.g. suitable for use with pip --find-links). + """ + path_or_url = attr.ib( + type=str, + default='', + metadata=dict(help='Package directory path or URL'), + ) + + links = attr.ib( + type=list, + default=attr.Factory(list), + metadata=dict(help='List of links available in this repo'), + ) + + def __attrs_post_init__(self): + if not self.links: + self.links = get_paths_or_urls(links_url=self.path_or_url) + if not self.packages_by_normalized_name: + for p in PypiPackage.packages_from_many_paths_or_urls(paths_or_urls=self.links): + normalized_name = p.normalized_name + self.packages_by_normalized_name[normalized_name].append(p) + self.packages_by_normalized_name_version[(normalized_name, p.version)] = p + + def get_links(self, *args, **kwargs): + return self.links or [] + + def get_versions(self, name): + name = name and NameVer.normalize_name(name) + return self.packages_by_normalized_name.get(name, []) + + def get_latest_version(self, name): + return PypiPackage.get_latest_version(name, self.get_versions(name)) + + def get_package(self, name, version): + return PypiPackage.get_name_version(name, version, self.get_versions(name)) + + +@attr.attributes +class PypiRepository(Repository): + """ + Represents the public PyPI simple index. + It is populated lazily based on requested packages names + """ + simple_url = attr.ib( + type=str, + default=PYPI_SIMPLE_URL, + metadata=dict(help='Base PyPI simple URL for this index.'), + ) + + links_by_normalized_name = attr.ib( + type=dict, + default=attr.Factory(lambda: defaultdict(list)), + metadata=dict(help='Mapping of {package name: [links]} available in this repo'), + ) + + def _fetch_links(self, name): + name = name and NameVer.normalize_name(name) + return find_pypi_links(name=name, simple_url=self.simple_url) + + def _populate_links_and_packages(self, name): + name = name and NameVer.normalize_name(name) + if name in self.links_by_normalized_name: + return + + links = self._fetch_links(name) + self.links_by_normalized_name[name] = links + + packages = list(PypiPackage.packages_from_many_paths_or_urls(paths_or_urls=links)) + self.packages_by_normalized_name[name] = packages + + for p in packages: + name = name and NameVer.normalize_name(p.name) + self.packages_by_normalized_name_version[(name, p.version)] = p + + def get_links(self, name, *args, **kwargs): + name = name and NameVer.normalize_name(name) + self._populate_links_and_packages(name) + return self.links_by_normalized_name.get(name, []) + + def get_versions(self, name): + name = name and NameVer.normalize_name(name) + self._populate_links_and_packages(name) + return self.packages_by_normalized_name.get(name, []) + + def get_latest_version(self, name): + return PypiPackage.get_latest_version(name, self.get_versions(name)) + + def get_package(self, name, version): + return PypiPackage.get_name_version(name, version, self.get_versions(name)) + +################################################################################ +# Globals for remote repos to be lazily created and cached on first use for the +# life of the session together with some convenience functions. +################################################################################ + + +def get_local_packages(directory=THIRDPARTY_DIR): + """ + Return the list of all PypiPackage objects built from a local directory. Return + an empty list if the package cannot be found. + """ + return list(PypiPackage.packages_from_one_path_or_url(path_or_url=directory)) + + +def get_local_repo(directory=THIRDPARTY_DIR): + return LinksRepository(path_or_url=directory) + + +_REMOTE_REPO = None + + +def get_remote_repo(remote_links_url=REMOTE_LINKS_URL): + global _REMOTE_REPO + if not _REMOTE_REPO: + _REMOTE_REPO = LinksRepository(path_or_url=remote_links_url) + return _REMOTE_REPO + + +def get_remote_package(name, version, remote_links_url=REMOTE_LINKS_URL): + """ + Return a PypiPackage or None. + """ + try: + return get_remote_repo(remote_links_url).get_package(name, version) + except RemoteNotFetchedException as e: + print(f'Failed to fetch remote package info: {e}') + + +_PYPI_REPO = None + + +def get_pypi_repo(pypi_simple_url=PYPI_SIMPLE_URL): + global _PYPI_REPO + if not _PYPI_REPO: + _PYPI_REPO = PypiRepository(simple_url=pypi_simple_url) + return _PYPI_REPO + + +def get_pypi_package(name, version, pypi_simple_url=PYPI_SIMPLE_URL): + """ + Return a PypiPackage or None. + """ + try: + return get_pypi_repo(pypi_simple_url).get_package(name, version) + except RemoteNotFetchedException as e: + print(f'Failed to fetch remote package info: {e}') + +################################################################################ +# +# Basic file and URL-based operations using a persistent file-based Cache +# +################################################################################ + + +@attr.attributes +class Cache: + """ + A simple file-based cache based only on a filename presence. + This is used to avoid impolite fetching from remote locations. + """ + + directory = attr.ib(type=str, default=CACHE_THIRDPARTY_DIR) + + def __attrs_post_init__(self): + os.makedirs(self.directory, exist_ok=True) + + def clear(self): + shutil.rmtree(self.directory) + + def get(self, path_or_url, as_text=True): + """ + Get a file from a `path_or_url` through the cache. + `path_or_url` can be a path or a URL to a file. + """ + filename = os.path.basename(path_or_url.strip('/')) + cached = os.path.join(self.directory, filename) + + if not os.path.exists(cached): + content = get_file_content(path_or_url=path_or_url, as_text=as_text) + wmode = 'w' if as_text else 'wb' + with open(cached, wmode) as fo: + fo.write(content) + return content + else: + return get_local_file_content(path=cached, as_text=as_text) + + def put(self, filename, content): + """ + Put in the cache the `content` of `filename`. + """ + cached = os.path.join(self.directory, filename) + wmode = 'wb' if isinstance(content, bytes) else 'w' + with open(cached, wmode) as fo: + fo.write(content) + + +def get_file_content(path_or_url, as_text=True): + """ + Fetch and return the content at `path_or_url` from either a local path or a + remote URL. Return the content as bytes is `as_text` is False. + """ + if (path_or_url.startswith('file://') + or (path_or_url.startswith('/') and os.path.exists(path_or_url)) + ): + return get_local_file_content(path=path_or_url, as_text=as_text) + + elif path_or_url.startswith('https://'): + if TRACE: print(f'Fetching: {path_or_url}') + _headers, content = get_remote_file_content(url=path_or_url, as_text=as_text) + return content + + else: + raise Exception(f'Unsupported URL scheme: {path_or_url}') + + +def get_local_file_content(path, as_text=True): + """ + Return the content at `url` as text. Return the content as bytes is + `as_text` is False. + """ + if path.startswith('file://'): + path = path[7:] + + mode = 'r' if as_text else 'rb' + with open(path, mode) as fo: + return fo.read() + + +class RemoteNotFetchedException(Exception): + pass + + +def get_remote_file_content(url, as_text=True, headers_only=False, headers=None, _delay=0,): + """ + Fetch and return a tuple of (headers, content) at `url`. Return content as a + text string if `as_text` is True. Otherwise return the content as bytes. + + If `header_only` is True, return only (headers, None). Headers is a mapping + of HTTP headers. + Retries multiple times to fetch if there is a HTTP 429 throttling response + and this with an increasing delay. + """ + time.sleep(_delay) + headers = headers or {} + # using a GET with stream=True ensure we get the the final header from + # several redirects and that we can ignore content there. A HEAD request may + # not get us this last header + with requests.get(url, allow_redirects=True, stream=True, headers=headers) as response: + status = response.status_code + if status != requests.codes.ok: # NOQA + if status == 429 and _delay < 20: + # too many requests: start some exponential delay + increased_delay = (_delay * 2) or 1 + + return get_remote_file_content( + url, + as_text=as_text, + headers_only=headers_only, + _delay=increased_delay, + ) + + else: + raise RemoteNotFetchedException(f'Failed HTTP request from {url} with {status}') + + if headers_only: + return response.headers, None + + return response.headers, response.text if as_text else response.content + + +def get_url_content_if_modified(url, md5, _delay=0,): + """ + Return fetched content bytes at `url` or None if the md5 has not changed. + Retries multiple times to fetch if there is a HTTP 429 throttling response + and this with an increasing delay. + """ + time.sleep(_delay) + headers = None + if md5: + etag = f'"{md5}"' + headers = {'If-None-Match': f'{etag}'} + + # using a GET with stream=True ensure we get the the final header from + # several redirects and that we can ignore content there. A HEAD request may + # not get us this last header + with requests.get(url, allow_redirects=True, stream=True, headers=headers) as response: + status = response.status_code + if status == requests.codes.too_many_requests and _delay < 20: # NOQA + # too many requests: start waiting with some exponential delay + _delay = (_delay * 2) or 1 + return get_url_content_if_modified(url=url, md5=md5, _delay=_delay) + + elif status == requests.codes.not_modified: # NOQA + # all is well, the md5 is the same + return None + + elif status != requests.codes.ok: # NOQA + raise RemoteNotFetchedException(f'Failed HTTP request from {url} with {status}') + + return response.content + + +def get_remote_headers(url): + """ + Fetch and return a mapping of HTTP headers of `url`. + """ + headers, _content = get_remote_file_content(url, headers_only=True) + return headers + + +def fetch_and_save_filename_from_paths_or_urls( + filename, + paths_or_urls, + dest_dir=THIRDPARTY_DIR, + as_text=True, +): + """ + Return the content from fetching the `filename` file name found in the + `paths_or_urls` list of URLs or paths and save to `dest_dir`. Raise an + Exception on errors. Treats the content as text if `as_text` is True + otherwise as binary. + """ + path_or_url = get_link_for_filename( + filename=filename, + paths_or_urls=paths_or_urls, + ) + + return fetch_and_save_path_or_url( + filename=filename, + dest_dir=dest_dir, + path_or_url=path_or_url, + as_text=as_text, + ) + + +def fetch_content_from_path_or_url_through_cache(path_or_url, as_text=True, cache=Cache()): + """ + Return the content from fetching at path or URL. Raise an Exception on + errors. Treats the content as text if as_text is True otherwise as treat as + binary. Use the provided file cache. This is the main entry for using the + cache. + + Note: the `cache` argument is a global, though it does not really matter + since it does not hold any state which is only kept on disk. + """ + if cache: + return cache.get(path_or_url=path_or_url, as_text=as_text) + else: + return get_file_content(path_or_url=path_or_url, as_text=as_text) + + +def fetch_and_save_path_or_url(filename, dest_dir, path_or_url, as_text=True, through_cache=True): + """ + Return the content from fetching the `filename` file name at URL or path + and save to `dest_dir`. Raise an Exception on errors. Treats the content as + text if as_text is True otherwise as treat as binary. + """ + if through_cache: + content = fetch_content_from_path_or_url_through_cache(path_or_url, as_text) + else: + content = fetch_content_from_path_or_url_through_cache(path_or_url, as_text, cache=None) + + output = os.path.join(dest_dir, filename) + wmode = 'w' if as_text else 'wb' + with open(output, wmode) as fo: + fo.write(content) + return content + +################################################################################ +# +# Sync and fix local thirdparty directory for various issues and gaps +# +################################################################################ + + +def fetch_missing_sources(dest_dir=THIRDPARTY_DIR): + """ + Given a thirdparty dir, fetch missing source distributions from our remote + repo or PyPI. Return a list of (name, version) tuples for source + distribution that were not found + """ + not_found = [] + local_packages = get_local_packages(directory=dest_dir) + remote_repo = get_remote_repo() + pypi_repo = get_pypi_repo() + + for package in local_packages: + if not package.sdist: + print(f'Finding sources for: {package.name}=={package.version}: ', end='') + try: + pypi_package = pypi_repo.get_package( + name=package.name, version=package.version) + + if pypi_package and pypi_package.sdist: + print(f'Fetching sources from Pypi') + pypi_package.fetch_sdist(dest_dir=dest_dir) + continue + else: + remote_package = remote_repo.get_package( + name=package.name, version=package.version) + + if remote_package and remote_package.sdist: + print(f'Fetching sources from Remote') + remote_package.fetch_sdist(dest_dir=dest_dir) + continue + + except RemoteNotFetchedException as e: + print(f'Failed to fetch remote package info: {e}') + + print(f'No sources found') + not_found.append((package.name, package.version,)) + + return not_found + + +def fetch_missing_wheels( + python_versions=PYTHON_VERSIONS, + operating_systems=PLATFORMS_BY_OS, + dest_dir=THIRDPARTY_DIR, +): + """ + Given a thirdparty dir fetch missing wheels for all known combos of Python + versions and OS. Return a list of tuple (Package, Environment) for wheels + that were not found locally or remotely. + """ + local_packages = get_local_packages(directory=dest_dir) + evts = itertools.product(python_versions, operating_systems) + environments = [Environment.from_pyver_and_os(pyv, os) for pyv, os in evts] + packages_and_envts = itertools.product(local_packages, environments) + + not_fetched = [] + fetched_filenames = set() + for package, envt in packages_and_envts: + + filename = package.fetch_wheel( + environment=envt, + fetched_filenames=fetched_filenames, + dest_dir=dest_dir, + ) + + if filename: + fetched_filenames.add(filename) + else: + not_fetched.append((package, envt,)) + + return not_fetched + + +def build_missing_wheels( + packages_and_envts, + build_remotely=False, + with_deps=False, + dest_dir=THIRDPARTY_DIR, +): + """ + Build all wheels in a list of tuple (Package, Environment) and save in + `dest_dir`. Return a list of tuple (Package, Environment), and a list of + built wheel filenames. + """ + + not_built = [] + built_filenames = [] + + packages_and_envts = itertools.groupby( + sorted(packages_and_envts), key=operator.itemgetter(0)) + + for package, pkg_envts in packages_and_envts: + + envts = [envt for _pkg, envt in pkg_envts] + python_versions = sorted(set(e.python_version for e in envts)) + operating_systems = sorted(set(e.operating_system for e in envts)) + built = None + try: + built = build_wheels( + requirements_specifier=package.specifier, + with_deps=with_deps, + build_remotely=build_remotely, + python_versions=python_versions, + operating_systems=operating_systems, + verbose=False, + dest_dir=dest_dir, + ) + print('.') + except Exception as e: + import traceback + print('#############################################################') + print('############# WHEEL BUILD FAILED ######################') + traceback.print_exc() + print() + print('#############################################################') + + if not built: + for envt in pkg_envts: + not_built.append((package, envt)) + else: + for bfn in built: + print(f' --> Built wheel: {bfn}') + built_filenames.append(bfn) + + return not_built, built_filenames + +################################################################################ +# +# Functions to handle remote or local repo used to "find-links" +# +################################################################################ + + +def get_paths_or_urls(links_url): + if links_url.startswith('https:'): + paths_or_urls = find_links_from_release_url(links_url) + else: + paths_or_urls = find_links_from_dir(links_url) + return paths_or_urls + + +def find_links_from_dir(directory=THIRDPARTY_DIR): + """ + Return a list of path to files in `directory` for any file that ends with + any of the extension in the list of `extensions` strings. + """ + base = os.path.abspath(directory) + files = [os.path.join(base, f) for f in os.listdir(base) if f.endswith(EXTENSIONS)] + return files + + +get_links = re.compile('href="([^"]+)"').findall + + +def find_links_from_release_url(links_url=REMOTE_LINKS_URL): + """ + Return a list of download link URLs found in the HTML page at `links_url` + URL that starts with the `prefix` string and ends with any of the extension + in the list of `extensions` strings. Use the `base_url` to prefix the links. + """ + if TRACE: print(f'Finding links for {links_url}') + + plinks_url = urllib.parse.urlparse(links_url) + + base_url = urllib.parse.SplitResult( + plinks_url.scheme, plinks_url.netloc, '', '', '').geturl() + + if TRACE: print(f'Base URL {base_url}') + + _headers, text = get_remote_file_content(links_url) + links = [] + for link in get_links(text): + if not link.endswith(EXTENSIONS): + continue + + plink = urllib.parse.urlsplit(link) + + if plink.scheme: + # full URL kept as-is + url = link + + if plink.path.startswith('/'): + # absolute link + url = f'{base_url}{link}' + + else: + # relative link + url = f'{links_url}/{link}' + + if TRACE: print(f'Adding URL: {url}') + + links.append(url) + + if TRACE: print(f'Found {len(links)} links at {links_url}') + return links + + +def find_pypi_links(name, simple_url=PYPI_SIMPLE_URL): + """ + Return a list of download link URLs found in a PyPI simple index for package name. + with the list of `extensions` strings. Use the `simple_url` PyPI url. + """ + if TRACE: print(f'Finding links for {simple_url}') + + name = name and NameVer.normalize_name(name) + simple_url = simple_url.strip('/') + simple_url = f'{simple_url}/{name}' + + _headers, text = get_remote_file_content(simple_url) + links = get_links(text) + # TODO: keep sha256 + links = [l.partition('#sha256=') for l in links] + links = [url for url, _, _sha256 in links] + links = [l for l in links if l.endswith(EXTENSIONS)] + return links + + +def get_link_for_filename(filename, paths_or_urls): + """ + Return a link for `filename` found in the `links` list of URLs or paths. Raise an + exception if no link is found or if there are more than one link for that + file name. + """ + path_or_url = [l for l in paths_or_urls if l.endswith(f'/{filename}')] + if not path_or_url: + raise Exception(f'Missing link to file: {filename}') + if not len(path_or_url) == 1: + raise Exception(f'Multiple links to file: {filename}: \n' + '\n'.join(path_or_url)) + return path_or_url[0] + +################################################################################ +# +# Requirements processing +# +################################################################################ + + +class MissingRequirementException(Exception): + pass + + +def get_required_packages(required_name_versions): + """ + Return a tuple of (remote packages, PyPI packages) where each is a mapping + of {(name, version): PypiPackage} for packages listed in the + `required_name_versions` list of (name, version) tuples. Raise a + MissingRequirementException with a list of missing (name, version) if a + requirement cannot be satisfied remotely or in PyPI. + """ + remote_repo = get_remote_repo() + + remote_packages = {(name, version): remote_repo.get_package(name, version) + for name, version in required_name_versions} + + pypi_repo = get_pypi_repo() + pypi_packages = {(name, version): pypi_repo.get_package(name, version) + for name, version in required_name_versions} + + # remove any empty package (e.g. that do not exist in some place) + remote_packages = {nv: p for nv, p in remote_packages.items() if p} + pypi_packages = {nv: p for nv, p in pypi_packages.items() if p} + + # check that we are not missing any + repos_name_versions = set(remote_packages.keys()) | set(pypi_packages.keys()) + missing_name_versions = required_name_versions.difference(repos_name_versions) + if missing_name_versions: + raise MissingRequirementException(sorted(missing_name_versions)) + + return remote_packages, pypi_packages + + +def get_required_remote_packages( + requirements_file='requirements.txt', + force_pinned=True, + remote_links_url=REMOTE_LINKS_URL, +): + """ + Yield tuple of (name, version, PypiPackage) for packages listed in the + `requirements_file` requirements file and found in the PyPI-like link repo + ``remote_links_url`` if this is a URL. Treat this ``remote_links_url`` as a + local directory path to a wheels directory if this is not a a URL. + """ + required_name_versions = load_requirements( + requirements_file=requirements_file, + force_pinned=force_pinned, + ) + + if remote_links_url.startswith('https://'): + repo = get_remote_repo(remote_links_url=remote_links_url) + else: + # a local path + assert os.path.exists(remote_links_url), f'Path does not exist: {remote_links_url}' + repo = get_local_repo(directory=remote_links_url) + + for name, version in required_name_versions: + if version: + yield name, version, repo.get_package(name, version) + else: + yield name, version, repo.get_latest_version(name) + + +def update_requirements(name, version=None, requirements_file='requirements.txt'): + """ + Upgrade or add `package_name` with `new_version` to the `requirements_file` + requirements file. Write back requirements sorted with name and version + canonicalized. Note: this cannot deal with hashed or unpinned requirements. + Do nothing if the version already exists as pinned. + """ + normalized_name = NameVer.normalize_name(name) + + is_updated = False + updated_name_versions = [] + for existing_name, existing_version in load_requirements(requirements_file, force_pinned=False): + + existing_normalized_name = NameVer.normalize_name(existing_name) + + if normalized_name == existing_normalized_name: + if version != existing_version: + is_updated = True + updated_name_versions.append((existing_normalized_name, existing_version,)) + + if is_updated: + updated_name_versions = sorted(updated_name_versions) + nvs = '\n'.join(f'{name}=={version}' for name, version in updated_name_versions) + + with open(requirements_file, 'w') as fo: + fo.write(nvs) + + +def hash_requirements(dest_dir=THIRDPARTY_DIR, requirements_file='requirements.txt'): + """ + Hash all the requirements found in the `requirements_file` + requirements file based on distributions available in `dest_dir` + """ + local_repo = get_local_repo(directory=dest_dir) + packages_by_normalized_name_version = local_repo.packages_by_normalized_name_version + hashed = [] + for name, version in load_requirements(requirements_file, force_pinned=True): + package = packages_by_normalized_name_version.get((name, version)) + if not package: + raise Exception(f'Missing required package {name}=={version}') + hashed.append(package.specifier_with_hashes) + + with open(requirements_file, 'w') as fo: + fo.write('\n'.join(hashed)) + +################################################################################ +# +# Functions to update or fetch ABOUT and license files +# +################################################################################ + + +def add_fetch_or_update_about_and_license_files(dest_dir=THIRDPARTY_DIR, include_remote=True): + """ + Given a thirdparty dir, add missing ABOUT. LICENSE and NOTICE files using + best efforts: + + - use existing ABOUT files + - try to load existing remote ABOUT files + - derive from existing distribution with same name and latest version that + would have such ABOUT file + - extract ABOUT file data from distributions PKGINFO or METADATA files + - TODO: make API calls to fetch package data from DejaCode + + The process consists in load and iterate on every package distributions, + collect data and then acsk to save. + """ + + local_packages = get_local_packages(directory=dest_dir) + local_repo = get_local_repo(directory=dest_dir) + + remote_repo = get_remote_repo() + + def get_other_dists(_package, _dist): + """ + Return a list of all the dists from package that are not the `dist` object + """ + return [d for d in _package.get_distributions() if d != _dist] + + for local_package in local_packages: + for local_dist in local_package.get_distributions(): + local_dist.load_about_data(dest_dir=dest_dir) + local_dist.set_checksums(dest_dir=dest_dir) + + # if has key data we may look to improve later, but we can move on + if local_dist.has_key_metadata(): + local_dist.save_about_and_notice_files(dest_dir=dest_dir) + local_dist.fetch_license_files(dest_dir=dest_dir) + continue + + # lets try to get from another dist of the same local package + for otherd in get_other_dists(local_package, local_dist): + updated = local_dist.update_from_other_dist(otherd) + if updated and local_dist.has_key_metadata(): + break + + # if has key data we may look to improve later, but we can move on + if local_dist.has_key_metadata(): + local_dist.save_about_and_notice_files(dest_dir=dest_dir) + local_dist.fetch_license_files(dest_dir=dest_dir) + continue + + # try to get a latest version of the same package that is not our version + other_local_packages = [ + p for p in local_repo.get_versions(local_package.name) + if p.version != local_package.version + ] + + latest_local_version = other_local_packages and other_local_packages[-1] + if latest_local_version: + latest_local_dists = list(latest_local_version.get_distributions()) + for latest_local_dist in latest_local_dists: + latest_local_dist.load_about_data(dest_dir=dest_dir) + if not latest_local_dist.has_key_metadata(): + # there is not much value to get other data if we are missing the key ones + continue + else: + local_dist.update_from_other_dist(latest_local_dist) + # if has key data we may look to improve later, but we can move on + if local_dist.has_key_metadata(): + break + + # if has key data we may look to improve later, but we can move on + if local_dist.has_key_metadata(): + local_dist.save_about_and_notice_files(dest_dir=dest_dir) + local_dist.fetch_license_files(dest_dir=dest_dir) + continue + + if include_remote: + # lets try to fetch remotely + local_dist.load_remote_about_data() + + # if has key data we may look to improve later, but we can move on + if local_dist.has_key_metadata(): + local_dist.save_about_and_notice_files(dest_dir=dest_dir) + local_dist.fetch_license_files(dest_dir=dest_dir) + continue + + # try to get a latest version of the same package that is not our version + other_remote_packages = [ + p for p in remote_repo.get_versions(local_package.name) + if p.version != local_package.version + ] + + latest_version = other_remote_packages and other_remote_packages[-1] + if latest_version: + latest_dists = list(latest_version.get_distributions()) + for remote_dist in latest_dists: + remote_dist.load_remote_about_data() + if not remote_dist.has_key_metadata(): + # there is not much value to get other data if we are missing the key ones + continue + else: + local_dist.update_from_other_dist(remote_dist) + # if has key data we may look to improve later, but we can move on + if local_dist.has_key_metadata(): + break + + # if has key data we may look to improve later, but we can move on + if local_dist.has_key_metadata(): + local_dist.save_about_and_notice_files(dest_dir=dest_dir) + local_dist.fetch_license_files(dest_dir=dest_dir) + continue + + # try to get data from pkginfo (no license though) + local_dist.load_pkginfo_data(dest_dir=dest_dir) + + # FIXME: save as this is the last resort for now in all cases + # if local_dist.has_key_metadata() or not local_dist.has_key_metadata(): + local_dist.save_about_and_notice_files(dest_dir) + + lic_errs = local_dist.fetch_license_files(dest_dir) + + # TODO: try to get data from dejacode + + if not local_dist.has_key_metadata(): + print(f'Unable to add essential ABOUT data for: {local_dist}') + if lic_errs: + lic_errs = '\n'.join(lic_errs) + print(f'Failed to fetch some licenses:: {lic_errs}') + +################################################################################ +# +# Functions to build new Python wheels including native on multiple OSes +# +################################################################################ + + +def call(args): + """ + Call args in a subprocess and display output on the fly. + Return or raise stdout, stderr, returncode + """ + if TRACE: print('Calling:', ' '.join(args)) + with subprocess.Popen( + args, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + encoding='utf-8' + ) as process: + + while True: + line = process.stdout.readline() + if not line and process.poll() is not None: + break + if TRACE: print(line.rstrip(), flush=True) + + stdout, stderr = process.communicate() + returncode = process.returncode + if returncode == 0: + return returncode, stdout, stderr + else: + raise Exception(returncode, stdout, stderr) + + +def add_or_upgrade_built_wheels( + name, + version=None, + python_versions=PYTHON_VERSIONS, + operating_systems=PLATFORMS_BY_OS, + dest_dir=THIRDPARTY_DIR, + build_remotely=False, + with_deps=False, + verbose=False, +): + """ + Add or update package `name` and `version` as a binary wheel saved in + `dest_dir`. Use the latest version if `version` is None. Return the a list + of the collected, fetched or built wheel file names or an empty list. + + Use the provided lists of `python_versions` (e.g. "36", "39") and + `operating_systems` (e.g. linux, windows or macos) to decide which specific + wheel to fetch or build. + + Include wheels for all dependencies if `with_deps` is True. + Build remotely is `build_remotely` is True. + """ + assert name, 'Name is required' + ver = version and f'=={version}' or '' + print(f'\nAdding wheels for package: {name}{ver}') + + wheel_filenames = [] + # a mapping of {req specifier: {mapping build_wheels kwargs}} + wheels_to_build = {} + for python_version, operating_system in itertools.product(python_versions, operating_systems): + print(f' Adding wheels for package: {name}{ver} on {python_version,} and {operating_system}') + environment = Environment.from_pyver_and_os(python_version, operating_system) + + # Check if requested wheel already exists locally for this version + local_repo = get_local_repo(directory=dest_dir) + local_package = local_repo.get_package(name=name, version=version) + + has_local_wheel = False + if version and local_package: + for wheel in local_package.get_supported_wheels(environment): + has_local_wheel = True + wheel_filenames.append(wheel.filename) + break + if has_local_wheel: + print(f' local wheel exists: {wheel.filename}') + continue + + if not version: + pypi_package = get_pypi_repo().get_latest_version(name) + version = pypi_package.version + + # Check if requested wheel already exists remotely or in Pypi for this version + wheel_filename = fetch_package_wheel( + name=name, version=version, environment=environment, dest_dir=dest_dir) + if wheel_filename: + wheel_filenames.append(wheel_filename) + + # the wheel is not available locally, remotely or in Pypi + # we need to build binary from sources + requirements_specifier = f'{name}=={version}' + to_build = wheels_to_build.get(requirements_specifier) + if to_build: + to_build['python_versions'].append(python_version) + to_build['operating_systems'].append(operating_system) + else: + wheels_to_build[requirements_specifier] = dict( + requirements_specifier=requirements_specifier, + python_versions=[python_version], + operating_systems=[operating_system], + dest_dir=dest_dir, + build_remotely=build_remotely, + with_deps=with_deps, + verbose=verbose, + ) + + for build_wheels_kwargs in wheels_to_build.values(): + bwheel_filenames = build_wheels(**build_wheels_kwargs) + wheel_filenames.extend(bwheel_filenames) + + return sorted(set(wheel_filenames)) + + +def build_wheels( + requirements_specifier, + python_versions=PYTHON_VERSIONS, + operating_systems=PLATFORMS_BY_OS, + dest_dir=THIRDPARTY_DIR, + build_remotely=False, + with_deps=False, + verbose=False, +): + """ + Given a pip `requirements_specifier` string (such as package names or as + name==version), build the corresponding binary wheel(s) for all + `python_versions` and `operating_systems` combinations and save them + back in `dest_dir` and return a list of built wheel file names. + + Include wheels for all dependencies if `with_deps` is True. + + First try to build locally to process pure Python wheels, and fall back to + build remotey on all requested Pythons and operating systems. + """ + all_pure, builds = build_wheels_locally_if_pure_python( + requirements_specifier=requirements_specifier, + with_deps=with_deps, + verbose=verbose, + dest_dir=dest_dir, + ) + for local_build in builds: + print(f'Built wheel: {local_build}') + + if all_pure: + return builds + + if build_remotely: + remote_builds = build_wheels_remotely_on_multiple_platforms( + requirements_specifier=requirements_specifier, + with_deps=with_deps, + python_versions=python_versions, + operating_systems=operating_systems, + verbose=verbose, + dest_dir=dest_dir, + ) + builds.extend(remote_builds) + + return builds + + +def build_wheels_remotely_on_multiple_platforms( + requirements_specifier, + with_deps=False, + python_versions=PYTHON_VERSIONS, + operating_systems=PLATFORMS_BY_OS, + verbose=False, + dest_dir=THIRDPARTY_DIR, +): + """ + Given pip `requirements_specifier` string (such as package names or as + name==version), build the corresponding binary wheel(s) including wheels for + all dependencies for all `python_versions` and `operating_systems` + combinations and save them back in `dest_dir` and return a list of built + wheel file names. + """ + check_romp_is_configured() + pyos_options = get_romp_pyos_options(python_versions, operating_systems) + deps = '' if with_deps else '--no-deps' + verbose = '--verbose' if verbose else '' + + romp_args = ([ + 'romp', + '--interpreter', 'cpython', + '--architecture', 'x86_64', + '--check-period', '5', # in seconds + + ] + pyos_options + [ + + '--artifact-paths', '*.whl', + '--artifact', 'artifacts.tar.gz', + '--command', + # create a virtualenv, upgrade pip +# f'python -m ensurepip --user --upgrade; ' + f'python -m pip {verbose} install --user --upgrade pip setuptools wheel; ' + f'python -m pip {verbose} wheel {deps} {requirements_specifier}', + ]) + + if verbose: + romp_args.append('--verbose') + + print(f'Building wheels for: {requirements_specifier}') + print(f'Using command:', ' '.join(romp_args)) + call(romp_args) + + wheel_filenames = extract_tar('artifacts.tar.gz', dest_dir) + for wfn in wheel_filenames: + print(f' built wheel: {wfn}') + return wheel_filenames + + +def get_romp_pyos_options( + python_versions=PYTHON_VERSIONS, + operating_systems=PLATFORMS_BY_OS, +): + """ + Return a list of CLI options for romp + For example: + >>> expected = ['--version', '3.6', '--version', '3.7', '--version', '3.8', + ... '--version', '3.9', '--platform', 'linux', '--platform', 'macos', + ... '--platform', 'windows'] + >>> assert get_romp_pyos_options() == expected + """ + python_dot_versions = ['.'.join(pv) for pv in sorted(set(python_versions))] + pyos_options = list(itertools.chain.from_iterable( + ('--version', ver) for ver in python_dot_versions)) + + pyos_options += list(itertools.chain.from_iterable( + ('--platform' , plat) for plat in sorted(set(operating_systems)))) + + return pyos_options + + +def check_romp_is_configured(): + # these environment variable must be set before + has_envt = ( + os.environ.get('ROMP_BUILD_REQUEST_URL') and + os.environ.get('ROMP_DEFINITION_ID') and + os.environ.get('ROMP_PERSONAL_ACCESS_TOKEN') and + os.environ.get('ROMP_USERNAME') + ) + + if not has_envt: + raise Exception( + 'ROMP_BUILD_REQUEST_URL, ROMP_DEFINITION_ID, ' + 'ROMP_PERSONAL_ACCESS_TOKEN and ROMP_USERNAME ' + 'are required enironment variables.') + + +def build_wheels_locally_if_pure_python( + requirements_specifier, + with_deps=False, + verbose=False, + dest_dir=THIRDPARTY_DIR, +): + """ + Given pip `requirements_specifier` string (such as package names or as + name==version), build the corresponding binary wheel(s) locally. + + If all these are "pure" Python wheels that run on all Python 3 versions and + operating systems, copy them back in `dest_dir` if they do not exists there + + Return a tuple of (True if all wheels are "pure", list of built wheel file names) + """ + deps = [] if with_deps else ['--no-deps'] + verbose = ['--verbose'] if verbose else [] + + wheel_dir = tempfile.mkdtemp(prefix='scancode-release-wheels-local-') + cli_args = [ + 'pip', 'wheel', + '--wheel-dir', wheel_dir, + ] + deps + verbose + [ + requirements_specifier + ] + + print(f'Building local wheels for: {requirements_specifier}') + print(f'Using command:', ' '.join(cli_args)) + call(cli_args) + + built = os.listdir(wheel_dir) + if not built: + return [] + + all_pure = all(is_pure_wheel(bwfn) for bwfn in built) + + if not all_pure: + print(f' Some wheels are not pure') + + print(f' Copying local wheels') + pure_built = [] + for bwfn in built: + owfn = os.path.join(dest_dir, bwfn) + if not os.path.exists(owfn): + nwfn = os.path.join(wheel_dir, bwfn) + fileutils.copyfile(nwfn, owfn) + pure_built.append(bwfn) + print(f' Built local wheel: {bwfn}') + return all_pure, pure_built + + +# TODO: Use me +def optimize_wheel(wheel_filename, dest_dir=THIRDPARTY_DIR): + """ + Optimize a wheel named `wheel_filename` in `dest_dir` such as renaming its + tags for PyPI compatibility and making it smaller if possible. Return the + name of the new wheel if renamed or the existing new name otherwise. + """ + if is_pure_wheel(wheel_filename): + print(f'Pure wheel: {wheel_filename}, nothing to do.') + return wheel_filename + + original_wheel_loc = os.path.join(dest_dir, wheel_filename) + wheel_dir = tempfile.mkdtemp(prefix='scancode-release-wheels-') + awargs = [ + 'auditwheel', + 'addtag', + '--wheel-dir', wheel_dir, + original_wheel_loc + ] + call(awargs) + + audited = os.listdir(wheel_dir) + if not audited: + # cannot optimize wheel + return wheel_filename + + assert len(audited) == 1 + new_wheel_name = audited[0] + + new_wheel_loc = os.path.join(wheel_dir, new_wheel_name) + + # this needs to go now + os.remove(original_wheel_loc) + + if new_wheel_name == wheel_filename: + os.rename(new_wheel_loc, original_wheel_loc) + return wheel_filename + + new_wheel = Wheel.from_filename(new_wheel_name) + non_pypi_plats = utils_pypi_supported_tags.validate_platforms_for_pypi(new_wheel.platforms) + new_wheel.platforms = [p for p in new_wheel.platforms if p not in non_pypi_plats] + if not new_wheel.platforms: + print(f'Cannot make wheel PyPI compatible: {original_wheel_loc}') + os.rename(new_wheel_loc, original_wheel_loc) + return wheel_filename + + new_wheel_cleaned_filename = new_wheel.to_filename() + new_wheel_cleaned_loc = os.path.join(dest_dir, new_wheel_cleaned_filename) + os.rename(new_wheel_loc, new_wheel_cleaned_loc) + return new_wheel_cleaned_filename + + +def extract_tar(location, dest_dir=THIRDPARTY_DIR,): + """ + Extract a tar archive at `location` in the `dest_dir` directory. Return a + list of extracted locations (either directories or files). + """ + with open(location, 'rb') as fi: + with tarfile.open(fileobj=fi) as tar: + members = list(tar.getmembers()) + tar.extractall(dest_dir, members=members) + + return [os.path.basename(ti.name) for ti in members + if ti.type == tarfile.REGTYPE] + + +def fetch_package_wheel(name, version, environment, dest_dir=THIRDPARTY_DIR): + """ + Fetch the binary wheel for package `name` and `version` and save in + `dest_dir`. Use the provided `environment` Environment to determine which + specific wheel to fetch. + + Return the fetched wheel file name on success or None if it was not fetched. + Trying fetching from our own remote repo, then from PyPI. + """ + wheel_filename = None + remote_package = get_remote_package(name=name, version=version) + if remote_package: + wheel_filename = remote_package.fetch_wheel( + environment=environment, dest_dir=dest_dir) + if wheel_filename: + return wheel_filename + + pypi_package = get_pypi_package(name=name, version=version) + if pypi_package: + wheel_filename = pypi_package.fetch_wheel( + environment=environment, dest_dir=dest_dir) + return wheel_filename + + +def check_about(dest_dir=THIRDPARTY_DIR): + try: + subprocess.check_output(f'about check {dest_dir}'.split()) + except subprocess.CalledProcessError as cpe: + print() + print('Invalid ABOUT files:') + print(cpe.output.decode('utf-8', errors='replace')) + + +def find_problems( + dest_dir=THIRDPARTY_DIR, + report_missing_sources=False, + report_missing_wheels=False, +): + """ + Print the problems found in `dest_dir`. + """ + + local_packages = get_local_packages(directory=dest_dir) + + for package in local_packages: + if report_missing_sources and not package.sdist: + print(f'{package.name}=={package.version}: Missing source distribution.') + if report_missing_wheels and not package.wheels: + print(f'{package.name}=={package.version}: Missing wheels.') + + for dist in package.get_distributions(): + dist.load_about_data(dest_dir=dest_dir) + abpth = os.path.abspath(os.path.join(dest_dir, dist.about_filename)) + if not dist.has_key_metadata(): + print(f' Missing key ABOUT data in file://{abpth}') + if 'classifiers' in dist.extra_data: + print(f' Dangling classifiers data in file://{abpth}') + if not dist.validate_checksums(dest_dir): + print(f' Invalid checksums in file://{abpth}') + if not dist.sha1 and dist.md5: + print(f' Missing checksums in file://{abpth}') + + check_about(dest_dir=dest_dir) + +def compute_normalized_license_expression(declared_licenses): + if not declared_licenses: + return + try: + from packagedcode import pypi + return pypi.compute_normalized_license(declared_licenses) + except ImportError: + # Scancode is not installed, clean and join all the licenses + lics = [python_safe_name(l).lower() for l in declared_licenses] + return ' AND '.join(lics).lower() diff --git a/etc/scripts/utils_thirdparty.py.ABOUT b/etc/scripts/utils_thirdparty.py.ABOUT new file mode 100644 index 0000000..8480349 --- /dev/null +++ b/etc/scripts/utils_thirdparty.py.ABOUT @@ -0,0 +1,15 @@ +about_resource: utils_thirdparty.py +package_url: pkg:github.com/pypa/pip/@20.3.1#src/pip/_internal/models/wheel.py +type: github +namespace: pypa +name: pip +version: 20.3.1 +subpath: src/pip/_internal/models/wheel.py + +download_url: https://github.com/pypa/pip/blob/20.3.1/src/pip/_internal/models/wheel.py +copyright: Copyright (c) 2008-2020 The pip developers (see AUTHORS.txt file) +license_expression: mit +notes: copied from pip-20.3.1 pip/_internal/models/wheel.py + The models code has been heavily inspired from the ISC-licensed packaging-dists + https://github.com/uranusjr/packaging-dists by Tzu-ping Chung + \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 852f0fc..1e10f32 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,10 +33,11 @@ norecursedirs = [ "Scripts", "thirdparty", "tmp", + "venv", "tests/data", ".eggs" ] - + python_files = "*.py" python_classes = "Test" diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..1cdef67 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,24 @@ +bleach==4.1.0 +colorama==0.4.4 +docutils==0.17.1 +execnet==1.9.0 +iniconfig==1.1.1 +jeepney==0.7.1 +keyring==23.2.1 +packaging==21.0 +pkginfo==1.7.1 +py==1.10.0 +pygments==2.10.0 +pyparsing==2.4.7 +pytest==6.2.5 +pytest-forked==1.3.0 +pytest-xdist==2.4.0 +readme-renderer==30.0 +requests-toolbelt==0.9.1 +restview==2.9.2 +rfc3986==1.5.0 +secretstorage==3.3.1 +toml==0.10.2 +tqdm==4.62.3 +twine==3.4.2 +webencodings==0.5.1 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..896a07c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,38 @@ +attrs==21.2.0 +beautifulsoup4==4.10.0 +binaryornot==0.4.4 +certifi==2021.10.8 +cffi==1.14.6 +chardet==4.0.0 +charset-normalizer==2.0.7 +click==8.0.3 +commoncode==30.0.0 +cryptography==35.0.0 +dockerfile-parse==1.2.0 +extractcode==30.0.0 +extractcode-7z==16.5.210531 +extractcode-libarchive==3.5.1.210531 +idna==3.3 +importlib-metadata==4.8.1 +intbitset==2.4.1 +patch==1.16 +pdfminer.six==20201018 +pip==21.2.4 +pluggy==1.0.0 +plugincode==21.1.21 +pycparser==2.20 +PyYAML==6.0 +requests==2.26.0 +saneyaml==0.5.2 +setuptools==58.1.0 +six==1.16.0 +sortedcontainers==2.4.0 +soupsieve==2.2.1 +text-unidecode==1.3 +typecode==21.6.1 +typecode-libmagic==5.39.210531 +typing==3.6.6 +typing-extensions==3.10.0.2 +urllib3==1.26.7 +wheel==0.37.0 +zipp==3.6.0 diff --git a/src/container_inspector/cli.py b/src/container_inspector/cli.py index 56e2c5a..6a162b5 100755 --- a/src/container_inspector/cli.py +++ b/src/container_inspector/cli.py @@ -95,17 +95,20 @@ def container_inspector(image_path, extract_to=None, csv=False): click.echo(results) -def _container_inspector(image_path, extract_to=None, csv=False): +def _container_inspector(image_path, extract_to=None, csv=False, _layer_path_segments=2): images = get_images_from_dir_or_tarball(image_path, extract_to=extract_to) as_json = not csv if as_json: - images = [i.to_dict() for i in images] + images = [i.to_dict(layer_path_segments=_layer_path_segments) for i in images] return json_module.dumps(images, indent=2) else: from io import StringIO output = StringIO() - flat = list(image.flatten_images_data(images)) + flat = list(image.flatten_images_data( + images=images, + layer_path_segments=_layer_path_segments + )) if not flat: return keys = flat[0].keys() diff --git a/src/container_inspector/distro.py b/src/container_inspector/distro.py index a5e0ee9..1420fe8 100755 --- a/src/container_inspector/distro.py +++ b/src/container_inspector/distro.py @@ -18,8 +18,6 @@ TRACE = False logger = logging.getLogger(__name__) - - if TRACE: import sys logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) diff --git a/src/container_inspector/dockerfile.py b/src/container_inspector/dockerfile.py index 7b6a851..ee61ca6 100755 --- a/src/container_inspector/dockerfile.py +++ b/src/container_inspector/dockerfile.py @@ -180,7 +180,6 @@ def clean_created_by(created_by): """ # True if the command is a no-op and has no effect on the layer root fs (e.g # label, comment, authior, etc) - is_noop = False if isinstance(created_by, (list, tuple)): # this is a structure, pre-parsed command as found in a layer "json" file # we strip the prefix diff --git a/src/container_inspector/image.py b/src/container_inspector/image.py index d5e3d47..be966c3 100755 --- a/src/container_inspector/image.py +++ b/src/container_inspector/image.py @@ -29,8 +29,8 @@ logger.setLevel(logging.DEBUG) """ -Objects to handle Docker and OCI images and Layers. - +This module contains objects and utilities to handle Docker and OCI images and +Layers. Supported formats: - docker v1.1 and v1.2 @@ -61,32 +61,23 @@ """ -class ToDictMixin(object): - """ - A mixin to add an to_dict() method to an attr-based class. - """ - - def to_dict(self, exclude_fields=()): - if exclude_fields: - filt = lambda attr, value: attr.name not in exclude_fields - else: - filt = lambda attr, value: True - return attr.asdict(self, filter=filt) - - -def flatten_images_data(images): +def flatten_images_data(images, layer_path_segments=0, _test=False): """ Yield mapping for each layer of each image of an `images` list of Image. This is a flat data structure for CSV and tabular output. + Keep only ``layer_path_segments`` trailing layer location segments (or keep + the locations unmodified if ``layer_path_segments`` is 0) """ for img in images: + img_extracted_location = img.extracted_location base_data = dict( - image_extracted_location=img.extracted_location, - image_archive_location=img.archive_location, + image_extracted_location='' if _test else img_extracted_location, + image_archive_location='' if _test else img.archive_location, image_id=img.image_id, image_tags=','.join(img.tags), ) + for layer in img.layers: layer_data = dict(base_data) layer_data['is_empty_layer'] = layer.is_empty_layer @@ -96,11 +87,48 @@ def flatten_images_data(images): layer_data['created_by'] = layer.created_by layer_data['created'] = layer.created layer_data['comment'] = layer.comment - layer_data['layer_extracted_location'] = layer.extracted_location - layer_data['layer_archive_location'] = layer.archive_location + + lay_extracted_location = layer.extracted_location + lay_archive_location = layer.archive_location + + if layer_path_segments: + lay_extracted_location = get_trimmed_path( + location=lay_extracted_location, + num_segments=layer_path_segments, + ) + lay_archive_location = get_trimmed_path( + location=lay_archive_location, + num_segments=layer_path_segments, + ) + + layer_data['layer_archive_location'] = lay_archive_location + layer_data['layer_extracted_location'] = lay_extracted_location yield layer_data +def get_trimmed_path(location, num_segments=2): + """ + Return a trimmed relative path given a location keeping only the + ``num_segments`` trailing path segments. + + For example:: + >>> assert get_trimmed_path(None) == None + >>> assert get_trimmed_path('a/b/c') == 'b/c' + >>> assert get_trimmed_path('/b/c') == 'b/c' + >>> assert get_trimmed_path('b/c') == 'b/c' + >>> assert get_trimmed_path('b/c/') == 'b/c/' + >>> assert get_trimmed_path('/x/a/b/c/', 3) == 'a/b/c/' + >>> assert get_trimmed_path('/x/a/b/c', 3) == 'a/b/c' + """ + if location: + ends = location.endswith('/') + segments = location.strip('/').split('/')[-num_segments:] + relative = '/'.join(segments) + if ends: + relative += '/' + return relative + + @attr.attributes class ConfigMixin(object): """ @@ -197,7 +225,7 @@ class ArchiveMixin: archive_location = attr.attrib( default=None, metadata=dict(doc= - 'Absolute directory location of this Archive original archive.' + 'Absolute directory location of this Archive original file.' 'May be empty if this was created from an extracted_location directory.' ) ) @@ -217,7 +245,7 @@ def set_sha256(self): @attr.attributes -class Image(ArchiveMixin, ConfigMixin, ToDictMixin): +class Image(ArchiveMixin, ConfigMixin): """ A container image with pointers to its layers. Image objects can be created from these inputs: @@ -284,6 +312,36 @@ def __attrs_post_init__(self, *args, **kwargs): if not self.image_format: self.image_format = self.find_format(self.extracted_location) + def to_dict(self, layer_path_segments=0, _test=False): + """ + Return a dictionary of this image fields, excluding ``exclude_fields``. + Keep only ``layer_path_segments`` trailing layer location segments (or + keep the locations unmodified if ``layer_path_segments`` is 0). + """ + image = attr.asdict(self) + + if layer_path_segments: + for layer in image.get('layers', []): + layer['extracted_location'] = get_trimmed_path( + location=layer.get('extracted_location'), + num_segments=layer_path_segments, + ) + + layer['archive_location'] = get_trimmed_path( + location=layer.get('archive_location'), + num_segments=layer_path_segments, + ) + + if _test: + image['extracted_location'] = '' + img_archive_location = self.archive_location + image['archive_location'] = ( + img_archive_location + and os.path.basename(img_archive_location) + or '' + ) + return image + @property def top_layer(self): """ @@ -416,7 +474,11 @@ def get_images_from_tarball( If `verify` is True, perform extra checks on the config data and layers checksums. """ - if TRACE: logger.debug(f'get_images_from_tarball: {archive_location} , extracting to: {extracted_location}') + if TRACE: + logger.debug( + f'get_images_from_tarball: {archive_location} , ' + f'extracting to: {extracted_location}' + ) Image.extract( archive_location=archive_location, @@ -442,7 +504,11 @@ def get_images_from_dir( If `verify` is True, perform extra checks on the config data and layers checksums. """ - if TRACE: logger.debug(f'get_images_from_dir: from {extracted_location} and archive_location: {archive_location}') + if TRACE: + logger.debug( + f'get_images_from_dir: from {extracted_location} and ' + f'archive_location: {archive_location}', + ) if not os.path.isdir(extracted_location): raise Exception(f'Not a directory: {extracted_location}') @@ -507,7 +573,8 @@ def get_docker_images_from_dir( .... ] """ - if TRACE: logger.debug(f'get_docker_images_from_dir: {extracted_location}') + if TRACE: + logger.debug(f'get_docker_images_from_dir: {extracted_location}') if not os.path.isdir(extracted_location): raise Exception(f'Not a directory: {extracted_location}') @@ -520,11 +587,13 @@ def get_docker_images_from_dir( manifest = load_json(manifest_loc) - if TRACE: logger.debug(f'get_docker_images_from_dir: manifest: {manifest}') + if TRACE: + logger.debug(f'get_docker_images_from_dir: manifest: {manifest}') images = [] for manifest_config in manifest: - if TRACE: logger.debug(f'get_docker_images_from_dir: manifest_config: {manifest_config}') + if TRACE: + logger.debug(f'get_docker_images_from_dir: manifest_config: {manifest_config}') img = Image.from_docker_manifest_config( extracted_location=extracted_location, archive_location=archive_location, @@ -914,7 +983,7 @@ def assign_history_to_layers(history, layers): @attr.attributes -class Resource(ToDictMixin): +class Resource: path = attr.attrib( default=None, metadata=dict(doc='Rootfs-relative path for this Resource.') @@ -942,9 +1011,12 @@ class Resource(ToDictMixin): metadata=dict(doc='True for symlink.') ) + def to_dict(self, **kwargs): + return attr.asdict(self) + @attr.attributes -class Layer(ArchiveMixin, ConfigMixin, ToDictMixin): +class Layer(ArchiveMixin, ConfigMixin): """ A layer object represents a slice of a root filesystem in a container image. """ @@ -1035,7 +1107,7 @@ def build_resource(_top, _name, _is_file): yield build_resource(top, f, _is_file=True) if with_dir: for d in dirs: - yield build_resource(top, d, _is_file=True) + yield build_resource(top, d, _is_file=False) def get_installed_packages(self, packages_getter): """ @@ -1054,3 +1126,6 @@ def get_installed_packages(self, packages_getter): the same structure). """ return packages_getter(self.extracted_location) + + def to_dict(self, **kwargs): + return attr.asdict(self) diff --git a/src/container_inspector/utils.py b/src/container_inspector/utils.py index f54dd71..c36099a 100755 --- a/src/container_inspector/utils.py +++ b/src/container_inspector/utils.py @@ -111,7 +111,6 @@ def extract_tar_keeping_symlinks(location, target_dir): fileutils.create_dir(target_dir) - with tarfile.open(location) as tarball: # never extract character device, block and fifo files: # we extract dirs, files and links only diff --git a/tests/data/cli/hello-world.tar-inventory-from-dir-expected.json b/tests/data/cli/hello-world.tar-inventory-from-dir-expected.json index ae7fe92..dcebe1d 100644 --- a/tests/data/cli/hello-world.tar-inventory-from-dir-expected.json +++ b/tests/data/cli/hello-world.tar-inventory-from-dir-expected.json @@ -1,7 +1,7 @@ [ { - "extracted_location": null, - "archive_location": null, + "extracted_location": "", + "archive_location": "", "sha256": null, "docker_version": "17.03.1-ce", "os": "linux", @@ -22,7 +22,7 @@ "layers": [ { "extracted_location": null, - "archive_location": null, + "archive_location": "3e0554cb0efadb678332292bb7835495fbb0c71af7e01bd4f7d11e64fe3d54df/layer.tar", "sha256": "45761469c965421a92a69cc50e92c01e0cfa94fe026cdd1233445ea00e96289a", "docker_version": null, "os": null, diff --git a/tests/data/cli/hello-world.tar-inventory-from-tarball-expected.json b/tests/data/cli/hello-world.tar-inventory-from-tarball-expected.json index de63baf..01cf1fa 100644 --- a/tests/data/cli/hello-world.tar-inventory-from-tarball-expected.json +++ b/tests/data/cli/hello-world.tar-inventory-from-tarball-expected.json @@ -1,7 +1,7 @@ [ { - "extracted_location": null, - "archive_location": null, + "extracted_location": "", + "archive_location": "hello-world.tar", "sha256": "4d99bd478599e97b8fc8c647d7c6b1b4c3cf1d445949d5f20b08de9ca427cd2f", "docker_version": "17.03.1-ce", "os": "linux", @@ -21,8 +21,8 @@ "distro": null, "layers": [ { - "extracted_location": null, - "archive_location": null, + "extracted_location": "45761469c965421a92a69cc50e92c01e0cfa94fe026cdd1233445ea00e96289a", + "archive_location": "layer.tar", "sha256": "45761469c965421a92a69cc50e92c01e0cfa94fe026cdd1233445ea00e96289a", "docker_version": null, "os": null, diff --git a/tests/data/cli/she-image_from_scratch-1.0.tar-inventory-from-tarball-expected.json b/tests/data/cli/she-image_from_scratch-1.0.tar-inventory-from-tarball-expected.json index 926b465..a0f5252 100644 --- a/tests/data/cli/she-image_from_scratch-1.0.tar-inventory-from-tarball-expected.json +++ b/tests/data/cli/she-image_from_scratch-1.0.tar-inventory-from-tarball-expected.json @@ -1,7 +1,7 @@ [ { - "extracted_location": null, - "archive_location": null, + "extracted_location": "", + "archive_location": "she-image_from_scratch-1.0.tar", "sha256": "2ff7a11e7e5425f04d757c3dcf38203df22ba08941061781eeda261b0ff54936", "docker_version": "18.09.7", "os": "linux", @@ -23,8 +23,8 @@ "distro": null, "layers": [ { - "extracted_location": null, - "archive_location": null, + "extracted_location": "1f9928556cf4d8e357642820b93a68343ae4bf8eeba76a041b2ea3a4257c5a5a", + "archive_location": "layer.tar", "sha256": "1f9928556cf4d8e357642820b93a68343ae4bf8eeba76a041b2ea3a4257c5a5a", "docker_version": null, "os": null, @@ -41,8 +41,8 @@ "comment": null }, { - "extracted_location": null, - "archive_location": null, + "extracted_location": "57a9ef5e4048962620a6311018be2010c179c9853e878a20080aeaed97f42826", + "archive_location": "layer.tar", "sha256": "57a9ef5e4048962620a6311018be2010c179c9853e878a20080aeaed97f42826", "docker_version": null, "os": null, @@ -59,8 +59,8 @@ "comment": null }, { - "extracted_location": null, - "archive_location": null, + "extracted_location": "0676ff439070aa2108b9bc3904f04265f83a639b4e6fe3e45c9549fd7bbe3369", + "archive_location": "layer.tar", "sha256": "0676ff439070aa2108b9bc3904f04265f83a639b4e6fe3e45c9549fd7bbe3369", "docker_version": null, "os": null, @@ -77,8 +77,8 @@ "comment": null }, { - "extracted_location": null, - "archive_location": null, + "extracted_location": "85dc439f0d28eceba0cfc2b883f3e99a37e60961e2686acf8b85c09e67facb01", + "archive_location": "layer.tar", "sha256": "85dc439f0d28eceba0cfc2b883f3e99a37e60961e2686acf8b85c09e67facb01", "docker_version": null, "os": null, diff --git a/tests/data/image/mini-image_from_scratch-2.0.tar-relative-expected.json b/tests/data/image/mini-image_from_scratch-2.0.tar-relative-expected.json new file mode 100644 index 0000000..276617a --- /dev/null +++ b/tests/data/image/mini-image_from_scratch-2.0.tar-relative-expected.json @@ -0,0 +1,145 @@ +[ + { + "extracted_location": "", + "archive_location": "mini-image_from_scratch-2.0.tar", + "sha256": "d12b4fea659265f5b340c41c7e26223eaa11a639455b04faf7785119cb00c0d2", + "docker_version": "18.09.7", + "os": "linux", + "os_version": null, + "architecture": "amd64", + "variant": null, + "created": "2020-06-02T07:19:55.133752465Z", + "author": null, + "comment": null, + "labels": { + "some other label": "" + }, + "image_format": "docker", + "image_id": "6d190579c3d7ccc07188cd204310927f553f2ebe1faf868169365f3eccf9c8ab", + "config_digest": "sha256:6d190579c3d7ccc07188cd204310927f553f2ebe1faf868169365f3eccf9c8ab", + "tags": [ + "she/image_from_scratch:1.0" + ], + "distro": null, + "layers": [ + { + "extracted_location": null, + "archive_location": "390f462bd3d4a0f900a4c431a53dc8830461b9e703ba348a2700dcc46f720c02/layer.tar", + "sha256": "1f9928556cf4d8e357642820b93a68343ae4bf8eeba76a041b2ea3a4257c5a5a", + "docker_version": null, + "os": null, + "os_version": null, + "architecture": null, + "variant": null, + "labels": [], + "layer_id": "1f9928556cf4d8e357642820b93a68343ae4bf8eeba76a041b2ea3a4257c5a5a", + "size": 2048, + "is_empty_layer": false, + "author": "You Myself and I.", + "created": "2020-06-02T07:19:53.24191627Z", + "created_by": "/bin/sh -c #(nop) ADD file:bc85f38b0c798897406a4c35fe08d06599df3831fca6db0cbd279e5d71bf9514 in / ", + "comment": null + }, + { + "extracted_location": null, + "archive_location": "24ff9e455c2811b9f3ba124276b5f2bb0111555057fdcc5b528c471597186fc3/layer.tar", + "sha256": "57a9ef5e4048962620a6311018be2010c179c9853e878a20080aeaed97f42826", + "docker_version": null, + "os": null, + "os_version": null, + "architecture": null, + "variant": null, + "labels": [], + "layer_id": "57a9ef5e4048962620a6311018be2010c179c9853e878a20080aeaed97f42826", + "size": 5632, + "is_empty_layer": false, + "author": "You Myself and I.", + "created": "2020-06-02T07:19:53.514748579Z", + "created_by": "/bin/sh -c #(nop) COPY dir:dce95f07b795ec49ef6ddbcc12f24aaad9af237726904837c77b5ed2967e349e in /additions ", + "comment": null + }, + { + "extracted_location": null, + "archive_location": "7295cea29559ce1e63515273dbd67ac770ef613b9b697473c1f95a0c494cfd80/layer.tar", + "sha256": "0676ff439070aa2108b9bc3904f04265f83a639b4e6fe3e45c9549fd7bbe3369", + "docker_version": null, + "os": null, + "os_version": null, + "architecture": null, + "variant": null, + "labels": [], + "layer_id": "0676ff439070aa2108b9bc3904f04265f83a639b4e6fe3e45c9549fd7bbe3369", + "size": 5120, + "is_empty_layer": false, + "author": "You Myself and I.", + "created": "2020-06-02T07:19:54.191141634Z", + "created_by": "/bin/sh -c #(nop) COPY dir:dce95f07b795ec49ef6ddbcc12f24aaad9af237726904837c77b5ed2967e349e in /additions ", + "comment": null + }, + { + "extracted_location": null, + "archive_location": "649ef32010e339b304136b22154e13f984f88f528c8120a17291396b9264ccc0/layer.tar", + "sha256": "85dc439f0d28eceba0cfc2b883f3e99a37e60961e2686acf8b85c09e67facb01", + "docker_version": null, + "os": null, + "os_version": null, + "architecture": null, + "variant": null, + "labels": [], + "layer_id": "85dc439f0d28eceba0cfc2b883f3e99a37e60961e2686acf8b85c09e67facb01", + "size": 2560, + "is_empty_layer": false, + "author": "You Myself and I.", + "created": "2020-06-02T07:19:54.826380355Z", + "created_by": "/bin/sh -c #(nop) ADD file:bc85f38b0c798897406a4c35fe08d06599df3831fca6db0cbd279e5d71bf9514 in /additions/hello ", + "comment": null + } + ], + "history": [ + { + "created": "2020-06-02T07:19:52.945579677Z", + "author": "You Myself and I.", + "created_by": "/bin/sh -c #(nop) MAINTAINER You Myself and I.", + "empty_layer": true + }, + { + "created": "2020-06-02T07:19:53.24191627Z", + "author": "You Myself and I.", + "created_by": "/bin/sh -c #(nop) ADD file:bc85f38b0c798897406a4c35fe08d06599df3831fca6db0cbd279e5d71bf9514 in / " + }, + { + "created": "2020-06-02T07:19:53.514748579Z", + "author": "You Myself and I.", + "created_by": "/bin/sh -c #(nop) COPY dir:dce95f07b795ec49ef6ddbcc12f24aaad9af237726904837c77b5ed2967e349e in /additions " + }, + { + "created": "2020-06-02T07:19:53.83371266Z", + "author": "You Myself and I.", + "created_by": "/bin/sh -c #(nop) CMD [\"/bin/sh\" \"-c\" \"rm /additions/foo\"]", + "empty_layer": true + }, + { + "created": "2020-06-02T07:19:54.191141634Z", + "author": "You Myself and I.", + "created_by": "/bin/sh -c #(nop) COPY dir:dce95f07b795ec49ef6ddbcc12f24aaad9af237726904837c77b5ed2967e349e in /additions " + }, + { + "created": "2020-06-02T07:19:54.506439427Z", + "author": "You Myself and I.", + "created_by": "/bin/sh -c #(nop) CMD [\"/bin/sh\" \"-c\" \"rm -rf /additions/baz\"]", + "empty_layer": true + }, + { + "created": "2020-06-02T07:19:54.826380355Z", + "author": "You Myself and I.", + "created_by": "/bin/sh -c #(nop) ADD file:bc85f38b0c798897406a4c35fe08d06599df3831fca6db0cbd279e5d71bf9514 in /additions/hello " + }, + { + "created": "2020-06-02T07:19:55.133752465Z", + "author": "You Myself and I.", + "created_by": "/bin/sh -c #(nop) LABEL Some other label=", + "empty_layer": true + } + ] + } +] \ No newline at end of file diff --git a/tests/data/image/windows-mini-image.tar.gz.expected.json b/tests/data/image/windows-mini-image.tar.gz.expected.json index df10647..f87e484 100644 --- a/tests/data/image/windows-mini-image.tar.gz.expected.json +++ b/tests/data/image/windows-mini-image.tar.gz.expected.json @@ -1,6 +1,6 @@ { "extracted_location": "", - "archive_location": "", + "archive_location": "windows-mini-image.tar.gz", "sha256": "f3f60bf21091013c476cecbabe4c5579a180fd557c38b296c6d151c4f586b35c", "docker_version": null, "os": "windows", diff --git a/tests/data/repos/hello-world.tar.flatten.expected.json b/tests/data/repos/hello-world.tar.flatten.expected.json index 0038c8f..1e3a202 100644 --- a/tests/data/repos/hello-world.tar.flatten.expected.json +++ b/tests/data/repos/hello-world.tar.flatten.expected.json @@ -11,7 +11,7 @@ "created_by": "/bin/sh -c #(nop) COPY file:b65349dad8105cbef74456e9c0c9da5d001e9eb2ade4b3c21e107909aee5b25a in / ", "created": "2017-06-14T19:28:14.683824199Z", "comment": null, - "layer_extracted_location": "", - "layer_archive_location": "layer.tar" + "layer_extracted_location": null, + "layer_archive_location": "3e0554cb0efadb678332292bb7835495fbb0c71af7e01bd4f7d11e64fe3d54df/layer.tar" } ] \ No newline at end of file diff --git a/tests/data/repos/hello-world.tar.registry.expected.json b/tests/data/repos/hello-world.tar.registry.expected.json index 7490d31..dcebe1d 100644 --- a/tests/data/repos/hello-world.tar.registry.expected.json +++ b/tests/data/repos/hello-world.tar.registry.expected.json @@ -21,8 +21,8 @@ "distro": null, "layers": [ { - "extracted_location": "", - "archive_location": "layer.tar", + "extracted_location": null, + "archive_location": "3e0554cb0efadb678332292bb7835495fbb0c71af7e01bd4f7d11e64fe3d54df/layer.tar", "sha256": "45761469c965421a92a69cc50e92c01e0cfa94fe026cdd1233445ea00e96289a", "docker_version": null, "os": null, diff --git a/tests/data/repos/images.tar.gz.expected.json b/tests/data/repos/images.tar.gz.expected.json index e92f042..4b0cbdf 100644 --- a/tests/data/repos/images.tar.gz.expected.json +++ b/tests/data/repos/images.tar.gz.expected.json @@ -33,8 +33,8 @@ "distro": null, "layers": [ { - "extracted_location": "", - "archive_location": "layer.tar", + "extracted_location": null, + "archive_location": "d388bee71bbf28f77042d89b353bacd14506227a39b6b1098afe457f0a0b608e/layer.tar", "sha256": "7cbcbac42c44c6c38559e5df3a494f44987333c8023a40fec48df2fce1fc146b", "docker_version": null, "os": null, @@ -51,8 +51,8 @@ "comment": null }, { - "extracted_location": "", - "archive_location": "layer.tar", + "extracted_location": null, + "archive_location": "e439273044a3b34ba206c3598cd007dbba34db5af98d40718d585869af68602e/layer.tar", "sha256": "d242f1731c55e0f057e183146de867e820dd2ef575125ec36b008340a3acc65e", "docker_version": null, "os": null, @@ -69,8 +69,8 @@ "comment": null }, { - "extracted_location": "", - "archive_location": "layer.tar", + "extracted_location": null, + "archive_location": "ca473f7411dd74e2d4781fc552d75d3b63c830ff66a253eec6bf5a6261c031b2/layer.tar", "sha256": "d43ffef6b2712ef8ecdd86866e543b21ef8843742bf7c73a308a973534fa6c3f", "docker_version": null, "os": null, @@ -87,8 +87,8 @@ "comment": null }, { - "extracted_location": "", - "archive_location": "layer.tar", + "extracted_location": null, + "archive_location": "325919382e7b94a21ef80452c2fb599f5a03364e8559da0eca1359ad6ac394a0/layer.tar", "sha256": "76ad2c2330f19d6f16fdf86e7b10cc2c1a8160746ffa1c4e3e46c75661f4bdcd", "docker_version": null, "os": null, @@ -210,8 +210,8 @@ "distro": null, "layers": [ { - "extracted_location": "", - "archive_location": "layer.tar", + "extracted_location": null, + "archive_location": "2ba5d0825bcb0d843f975f7ba52db62531dbc73df083fae3d355c1a22d4a4bdc/layer.tar", "sha256": "3e207b409db364b595ba862cdc12be96dcdad8e36c59a03b7b3b61c946a5741a", "docker_version": null, "os": null, @@ -228,8 +228,8 @@ "comment": null }, { - "extracted_location": "", - "archive_location": "layer.tar", + "extracted_location": null, + "archive_location": "c9d345514b7a7f4e4d14748c6a7a6cb408f117fccce87ef80bf72429b5360c44/layer.tar", "sha256": "09c52b6fbc483eb8e2d244a916da54fb3990cdaa575cab35edfbb27e132929cb", "docker_version": null, "os": null, @@ -246,8 +246,8 @@ "comment": null }, { - "extracted_location": "", - "archive_location": "layer.tar", + "extracted_location": null, + "archive_location": "cbfc5b91e94f389bd405928c3be3367feea459d5917bde9d8b2792b96ae789ef/layer.tar", "sha256": "55141db9edb2a13ee593cff8c80e883e672e388c8686fd94a4f2518f21de1d32", "docker_version": null, "os": null, @@ -264,8 +264,8 @@ "comment": null }, { - "extracted_location": "", - "archive_location": "layer.tar", + "extracted_location": null, + "archive_location": "bf1ad815335aca28dc1b64e4a9cad7f0bd9f7e9ee8162dce166150b4f1169b0e/layer.tar", "sha256": "01f37c950ed43fd0ecc47d0a72949201594f650bd63861cc6e6ac8097ca600bf", "docker_version": null, "os": null, @@ -282,8 +282,8 @@ "comment": null }, { - "extracted_location": "", - "archive_location": "layer.tar", + "extracted_location": null, + "archive_location": "5b2fc24305d353053e3a6aa7a613e75240e34cfede725f4bd4c466468cd3c48c/layer.tar", "sha256": "08dc907515cbda226cd872c2c79d087eb226fd27182b6b1315306aade51f963d", "docker_version": null, "os": null, @@ -300,8 +300,8 @@ "comment": null }, { - "extracted_location": "", - "archive_location": "layer.tar", + "extracted_location": null, + "archive_location": "ee512e2eb28e302e80a1e0bfad85ee3b18adc84a80e88ceac4325c13e3e083ec/layer.tar", "sha256": "5b4096031e4780d4c3010335ede79886786ec89d22c2bd85642a30beac682ec9", "docker_version": null, "os": null, diff --git a/tests/data/repos/imagesv11.tar.expected.json b/tests/data/repos/imagesv11.tar.expected.json index 99e30b9..28f6f6d 100644 --- a/tests/data/repos/imagesv11.tar.expected.json +++ b/tests/data/repos/imagesv11.tar.expected.json @@ -1,7 +1,7 @@ [ { "extracted_location": "", - "archive_location": "", + "archive_location": "imagesv11.tar", "sha256": "79e479cd111b4276a2ff68f9cbaf64436556aac1d79a7df0b5527070bb774f47", "docker_version": "1.13.0", "os": "linux", @@ -21,8 +21,8 @@ "distro": null, "layers": [ { - "extracted_location": "", - "archive_location": "layer.tar", + "extracted_location": null, + "archive_location": "de331f94dc3592a39e495432d39ca14ed0cf04a6861c1e7714ff410e8a0225b4/layer.tar", "sha256": "c2c0564e4d42ab2b8e947861d9324a6ab087818731e818bc8667c7523507c4f9", "docker_version": null, "os": null, @@ -48,7 +48,7 @@ }, { "extracted_location": "", - "archive_location": "", + "archive_location": "imagesv11.tar", "sha256": "79e479cd111b4276a2ff68f9cbaf64436556aac1d79a7df0b5527070bb774f47", "docker_version": "1.13.0", "os": "linux", @@ -70,8 +70,8 @@ "distro": null, "layers": [ { - "extracted_location": "", - "archive_location": "layer.tar", + "extracted_location": null, + "archive_location": "a699c18e2119e668e5f1264e97559af65d2111ac0790b6832efeb450fc5193fc/layer.tar", "sha256": "2d3d307589a202188a779ec6d094a39a396d7d48d6fef273b902af4389bcfd62", "docker_version": null, "os": null, @@ -110,7 +110,7 @@ }, { "extracted_location": "", - "archive_location": "", + "archive_location": "imagesv11.tar", "sha256": "79e479cd111b4276a2ff68f9cbaf64436556aac1d79a7df0b5527070bb774f47", "docker_version": "1.13.0", "os": "linux", @@ -132,8 +132,8 @@ "distro": null, "layers": [ { - "extracted_location": "", - "archive_location": "layer.tar", + "extracted_location": null, + "archive_location": "f2323825799160c4c1e508c1189dc285c7928677747b5a679020e9b438843127/layer.tar", "sha256": "c2c0564e4d42ab2b8e947861d9324a6ab087818731e818bc8667c7523507c4f9", "docker_version": null, "os": null, @@ -150,8 +150,8 @@ "comment": "initial import" }, { - "extracted_location": "", - "archive_location": "layer.tar", + "extracted_location": null, + "archive_location": "1466996a39f8d2af81b92fad37ef15c04fb2ca64b43945a00f6f5943913f6b96/layer.tar", "sha256": "2d3d307589a202188a779ec6d094a39a396d7d48d6fef273b902af4389bcfd62", "docker_version": null, "os": null, diff --git a/tests/data/repos/imagesv11_with_tar_at_root.tar.registry.expected.json b/tests/data/repos/imagesv11_with_tar_at_root.tar.registry.expected.json index 217d366..1b6ae4e 100644 --- a/tests/data/repos/imagesv11_with_tar_at_root.tar.registry.expected.json +++ b/tests/data/repos/imagesv11_with_tar_at_root.tar.registry.expected.json @@ -21,8 +21,8 @@ "distro": null, "layers": [ { - "extracted_location": "", - "archive_location": "de331f94dc3592a39e495432d39ca14ed0cf04a6861c1e7714ff410e8a0225b4.tar", + "extracted_location": null, + "archive_location": "imagesv11_with_tar_at_root.tar/de331f94dc3592a39e495432d39ca14ed0cf04a6861c1e7714ff410e8a0225b4.tar", "sha256": "c2c0564e4d42ab2b8e947861d9324a6ab087818731e818bc8667c7523507c4f9", "docker_version": null, "os": null, @@ -70,8 +70,8 @@ "distro": null, "layers": [ { - "extracted_location": "", - "archive_location": "a699c18e2119e668e5f1264e97559af65d2111ac0790b6832efeb450fc5193fc.tar", + "extracted_location": null, + "archive_location": "imagesv11_with_tar_at_root.tar/a699c18e2119e668e5f1264e97559af65d2111ac0790b6832efeb450fc5193fc.tar", "sha256": "2d3d307589a202188a779ec6d094a39a396d7d48d6fef273b902af4389bcfd62", "docker_version": null, "os": null, @@ -132,8 +132,8 @@ "distro": null, "layers": [ { - "extracted_location": "", - "archive_location": "f2323825799160c4c1e508c1189dc285c7928677747b5a679020e9b438843127.tar", + "extracted_location": null, + "archive_location": "imagesv11_with_tar_at_root.tar/f2323825799160c4c1e508c1189dc285c7928677747b5a679020e9b438843127.tar", "sha256": "c2c0564e4d42ab2b8e947861d9324a6ab087818731e818bc8667c7523507c4f9", "docker_version": null, "os": null, @@ -150,8 +150,8 @@ "comment": "initial import" }, { - "extracted_location": "", - "archive_location": "1466996a39f8d2af81b92fad37ef15c04fb2ca64b43945a00f6f5943913f6b96.tar", + "extracted_location": null, + "archive_location": "imagesv11_with_tar_at_root.tar/1466996a39f8d2af81b92fad37ef15c04fb2ca64b43945a00f6f5943913f6b96.tar", "sha256": "2d3d307589a202188a779ec6d094a39a396d7d48d6fef273b902af4389bcfd62", "docker_version": null, "os": null, diff --git a/tests/test_cli.py b/tests/test_cli.py index 237f38f..13cea8e 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -19,17 +19,24 @@ def clean_images_data(images): """ - Clean an Image.to_dict() for testing + Clean an a list of Image.to_dict() for testing """ - for i in images: - i['extracted_location'] = None - i['archive_location'] = None - for l in i['layers']: - l['extracted_location'] = None - l['archive_location'] = None + for image in images: + clean_image_data(image) return images +def clean_image_data(image): + """ + Clean `image` data from Image.to_dict() for testing + """ + image['extracted_location'] = '' + image['archive_location'] = os.path.basename(image['archive_location'] or '') + + return image + + + class TestContainerInspectorCli(FileBasedTesting): test_data_dir = os.path.join(os.path.dirname(__file__), 'data') @@ -43,14 +50,14 @@ def test_container_inspector_single_layer_from_dir(self): def test_container_inspector_single_layer_from_tarball(self): test_dir = self.get_test_loc('cli/hello-world.tar') expected = self.get_test_loc('cli/hello-world.tar-inventory-from-tarball-expected.json') - out = cli._container_inspector(image_path=test_dir) + out = cli._container_inspector(image_path=test_dir, _layer_path_segments=1) result = clean_images_data(json.loads(out)) check_expected(result, expected, regen=False) def test_container_inspector_multiple_layers_from_tarball(self): test_dir = self.get_test_loc('cli/she-image_from_scratch-1.0.tar') expected = self.get_test_loc('cli/she-image_from_scratch-1.0.tar-inventory-from-tarball-expected.json') - out = cli._container_inspector(image_path=test_dir) + out = cli._container_inspector(image_path=test_dir, _layer_path_segments=1) result = clean_images_data(json.loads(out)) check_expected(result, expected, regen=False) diff --git a/tests/test_image.py b/tests/test_image.py index 905da50..00c0783 100644 --- a/tests/test_image.py +++ b/tests/test_image.py @@ -6,7 +6,7 @@ # See https://aboutcode.org for more information about nexB OSS projects. # -from os import path +import os from commoncode.testcase import FileBasedTesting @@ -14,11 +14,10 @@ from container_inspector.image import flatten_images_data from utilities import check_expected -from utilities import clean_image class TestImages(FileBasedTesting): - test_data_dir = path.join(path.dirname(__file__), 'data') + test_data_dir = os.path.join(os.path.dirname(__file__), 'data') def test_Image(self): try: @@ -34,7 +33,7 @@ def test_Image_with_dir(self): def test_Image_get_images_from_tarball(self): test_tarball = self.get_test_loc('repos/imagesv11.tar') extract_dir = self.get_temp_dir() - expected = path.join( + expected = os.path.join( self.get_test_loc('repos'), 'imagesv11.tar.expected.json', ) @@ -44,33 +43,32 @@ def test_Image_get_images_from_tarball(self): extracted_location=extract_dir, verify=False, ) - result = [clean_image(i).to_dict() for i in images] + result = [i.to_dict(layer_path_segments=2, _test=True) for i in images] check_expected(result, expected, regen=False) - def test_Image_get_images_from_tarball_windows(self): + def test_windows_container_Image_get_images_from_tarball(self): test_tarball = self.get_test_loc('image/windows-mini-image.tar.gz') extract_dir = self.get_temp_dir() - expected = path.join( - self.get_test_loc('image'), - 'windows-mini-image.tar.gz.expected.json', - ) - image = Image.get_images_from_tarball( archive_location=test_tarball, extracted_location=extract_dir, verify=False, )[0] - layer_extracted_location = self.get_temp_dir() - image.extract_layers(extracted_location=layer_extracted_location) + image.extract_layers(extracted_location=extract_dir) image.get_and_set_distro() - result = clean_image(image).to_dict() + result = image.to_dict(layer_path_segments=1, _test=True) + expected = self.get_test_loc( + 'image/windows-mini-image.tar.gz.expected.json', + must_exist=False, + ) + check_expected(result, expected, regen=False) def test_Image_get_images_from_dir(self): test_tarball = self.get_test_loc('repos/imagesv11.tar') test_dir = self.extract_test_tar(test_tarball) - expected = path.join( + expected = os.path.join( self.get_test_loc('repos'), 'imagesv11.tar.expected.json', ) @@ -78,40 +76,40 @@ def test_Image_get_images_from_dir(self): extracted_location=test_dir, archive_location=test_tarball, ) - result = [clean_image(i).to_dict() for i in images] + result = [i.to_dict(layer_path_segments=2, _test=True) for i in images] check_expected(result, expected, regen=False) def test_Image_get_images_from_dir_from_hello_world(self): test_arch = self.get_test_loc('repos/hello-world.tar') test_dir = self.extract_test_tar(test_arch) - expected = path.join( + expected = os.path.join( self.get_test_loc('repos'), 'hello-world.tar.registry.expected.json', ) images = Image.get_images_from_dir(test_dir) - result = [clean_image(i).to_dict() for i in images] + result = [i.to_dict(layer_path_segments=2, _test=True) for i in images] check_expected(result, expected, regen=False) def test_Image_get_images_from_dir_then_flatten_images_data(self): test_arch = self.get_test_loc('repos/hello-world.tar') test_dir = self.extract_test_tar(test_arch) - expected = path.join( + expected = os.path.join( self.get_test_loc('repos'), 'hello-world.tar.flatten.expected.json', ) - images = [clean_image(i) for i in Image.get_images_from_dir(test_dir)] - result = list(flatten_images_data(images)) + images = list(Image.get_images_from_dir(test_dir)) + result = list(flatten_images_data(images, layer_path_segments=2, _test=True)) check_expected(result, expected, regen=False) def test_Image_get_images_from_dir_with_direct_at_root_layerid_dot_tar_tarball(self): test_arch = self.get_test_loc('repos/imagesv11_with_tar_at_root.tar') test_dir = self.extract_test_tar(test_arch) - expected = path.join( + expected = os.path.join( self.get_test_loc('repos'), 'imagesv11_with_tar_at_root.tar.registry.expected.json', ) images = Image.get_images_from_dir(test_dir, verify=False) - result = [clean_image(i).to_dict() for i in images] + result = [i.to_dict(layer_path_segments=2, _test=True) for i in images] check_expected(result, expected, regen=False) def test_Image_get_images_from_dir_with_verify(self): @@ -122,9 +120,9 @@ def test_Image_get_images_from_dir_with_verify(self): def test_Image_get_images_from_dir_with_anotations(self): test_arch = self.get_test_loc('repos/images.tar.gz') test_dir = self.extract_test_tar(test_arch) - expected = path.join(self.get_test_loc('repos'), 'images.tar.gz.expected.json') + expected = os.path.join(self.get_test_loc('repos'), 'images.tar.gz.expected.json') images = Image.get_images_from_dir(test_dir, verify=False) - result = [clean_image(i).to_dict() for i in images] + result = [i.to_dict(layer_path_segments=2, _test=True) for i in images] check_expected(result, expected, regen=False) def test_Image_get_images_from_dir_with_verify_fails_if_invalid_checksum(self): @@ -145,3 +143,18 @@ def test_Image_find_format_finds_Docker_images_without_repositories(self): test_arch = self.get_test_loc('image/mini-image_from_scratch-2.0.tar') test_dir = self.extract_test_tar(test_arch) assert Image.find_format(test_dir) == 'docker' + + def test_Image_to_dict_can_report_image_trimmed_layer_paths_or_not(self): + test_image = self.get_test_loc('image/mini-image_from_scratch-2.0.tar') + extract_dir = self.get_temp_dir() + images = Image.get_images_from_tarball( + archive_location=test_image, + extracted_location=extract_dir, + verify=False, + ) + expected1 = self.get_test_loc( + 'image/mini-image_from_scratch-2.0.tar-relative-expected.json', + must_exist=False, + ) + result = [i.to_dict(layer_path_segments=2, _test=True) for i in images] + check_expected(result, expected1, regen=False) diff --git a/tests/utilities.py b/tests/utilities.py index 24a20f8..ccafc71 100644 --- a/tests/utilities.py +++ b/tests/utilities.py @@ -7,7 +7,6 @@ # import json -import os def check_expected(result, expected, regen=False): @@ -23,15 +22,3 @@ def check_expected(result, expected, regen=False): expected = json.loads(ex.read()) assert result == expected - - -def clean_image(image): - """ - Clean `image` data for test purpose - """ - image.extracted_location = '' - image.archive_location = '' - for layer in image.layers: - layer.extracted_location = os.path.basename(layer.extracted_location or '') - layer.archive_location = os.path.basename(layer.archive_location or '') - return image diff --git a/thirdparty/README.rst b/thirdparty/README.rst new file mode 100644 index 0000000..b31482f --- /dev/null +++ b/thirdparty/README.rst @@ -0,0 +1,2 @@ +Put your Python dependency wheels to be vendored in this directory. +