diff --git a/.github/actions/bootstrap/action.yaml b/.github/actions/bootstrap/action.yaml new file mode 100644 index 00000000..d4ed5970 --- /dev/null +++ b/.github/actions/bootstrap/action.yaml @@ -0,0 +1,80 @@ +name: "Bootstrap" +description: "Bootstrap all tools and dependencies" +inputs: + go-version: + description: "Go version to install" + required: true + default: "1.19.x" + use-go-cache: + description: "Restore go cache" + required: true + default: "true" + cache-key-prefix: + description: "Prefix all cache keys with this value" + required: true + default: "831180ac25" + build-cache-key-prefix: + description: "Prefix build cache key with this value" + required: true + default: "f8b6d31dea" + bootstrap-apt-packages: + description: "Space delimited list of tools to install via apt" + default: "" + +runs: + using: "composite" + steps: + - uses: actions/setup-go@v3 + with: + go-version: ${{ inputs.go-version }} + + - name: Restore tool cache + id: tool-cache + uses: actions/cache@v3 + with: + path: ${{ github.workspace }}/.tmp + key: ${{ inputs.cache-key-prefix }}-${{ runner.os }}-tool-${{ hashFiles('Makefile') }} + + # note: we need to keep restoring the go mod cache before bootstrapping tools since `go install` is used in + # some installations of project tools. + - name: Restore go module cache + id: go-mod-cache + if: inputs.use-go-cache == 'true' + uses: actions/cache@v3 + with: + path: | + ~/go/pkg/mod + key: ${{ inputs.cache-key-prefix }}-${{ runner.os }}-go-${{ inputs.go-version }}-${{ hashFiles('**/go.sum') }} + restore-keys: | + ${{ inputs.cache-key-prefix }}-${{ runner.os }}-go-${{ inputs.go-version }}- + + - name: (cache-miss) Bootstrap project tools + shell: bash + if: steps.tool-cache.outputs.cache-hit != 'true' + run: make bootstrap-tools + + - name: Restore go build cache + id: go-cache + if: inputs.use-go-cache == 'true' + uses: actions/cache@v3 + with: + path: | + ~/.cache/go-build + key: ${{ inputs.cache-key-prefix }}-${{ inputs.build-cache-key-prefix }}-${{ runner.os }}-go-${{ inputs.go-version }}-${{ hashFiles('**/go.sum') }} + restore-keys: | + ${{ inputs.cache-key-prefix }}-${{ inputs.build-cache-key-prefix }}-${{ runner.os }}-go-${{ inputs.go-version }}- + + - name: (cache-miss) Bootstrap go dependencies + shell: bash + if: steps.go-mod-cache.outputs.cache-hit != 'true' && inputs.use-go-cache == 'true' + run: make bootstrap-go + + - name: Bootstrap CI dependencies + shell: bash + run: make ci-bootstrap + + - name: Install apt packages + if: inputs.bootstrap-apt-packages != '' + shell: bash + run: | + DEBIAN_FRONTEND=noninteractive sudo apt update && sudo -E apt install -y ${{ inputs.bootstrap-apt-packages }} diff --git a/.github/scripts/build.sh b/.github/scripts/build.sh new file mode 100755 index 00000000..50beb016 --- /dev/null +++ b/.github/scripts/build.sh @@ -0,0 +1,82 @@ +#!/usr/bin/env bash +set -uo pipefail + +SNAPSHOT_DIR=$1 + +# Based on https://gist.github.com/eduncan911/68775dba9d3c028181e4 and https://gist.github.com/makeworld-the-better-one/e1bb127979ae4195f43aaa3ad46b1097 +# but improved to use the `go` command so it never goes out of date. + +type setopt >/dev/null 2>&1 + +contains() { + # Source: https://stackoverflow.com/a/8063398/7361270 + [[ $1 =~ (^|[[:space:]])$2($|[[:space:]]) ]] +} + +mkdir -p "${SNAPSHOT_DIR}" + +BUILD_TARGET=./examples +OUTPUT=${SNAPSHOT_DIR}/stereoscope-example +FAILURES="" + +# You can set your own flags on the command line +FLAGS=${FLAGS:-"-ldflags=\"-s -w\""} + +# A list of OSes and architectures to not build for, space-separated +# It can be set from the command line when the script is called. +NOT_ALLOWED_OS=${NOT_ALLOWED_OS:-"js android ios solaris illumos aix dragonfly plan9 freebsd openbsd netbsd"} +NOT_ALLOWED_ARCH=${NOT_ALLOWED_ARCH:-"riscv64 mips mips64 mips64le ppc64 ppc64le s390x wasm"} + + +# Get all targets +while IFS= read -r target; do + GOOS=${target%/*} + GOARCH=${target#*/} + BIN_FILENAME="${OUTPUT}-${GOOS}-${GOARCH}" + + if contains "$NOT_ALLOWED_OS" "$GOOS" ; then + continue + fi + + if contains "$NOT_ALLOWED_ARCH" "$GOARCH" ; then + continue + fi + + # Check for arm and set arm version + if [[ $GOARCH == "arm" ]]; then + # Set what arm versions each platform supports + if [[ $GOOS == "darwin" ]]; then + arms="7" + elif [[ $GOOS == "windows" ]]; then + # This is a guess, it's not clear what Windows supports from the docs + # But I was able to build all these on my machine + arms="5 6 7" + elif [[ $GOOS == *"bsd" ]]; then + arms="6 7" + else + # Linux goes here + arms="5 6 7" + fi + + # Now do the arm build + for GOARM in $arms; do + BIN_FILENAME="${OUTPUT}-${GOOS}-${GOARCH}${GOARM}" + if [[ "${GOOS}" == "windows" ]]; then BIN_FILENAME="${BIN_FILENAME}.exe"; fi + CMD="GOARM=${GOARM} GOOS=${GOOS} GOARCH=${GOARCH} go build $FLAGS -o ${BIN_FILENAME} ${BUILD_TARGET}" + echo "${CMD}" + eval "${CMD}" || FAILURES="${FAILURES} ${GOOS}/${GOARCH}${GOARM}" + done + else + # Build non-arm here + if [[ "${GOOS}" == "windows" ]]; then BIN_FILENAME="${BIN_FILENAME}.exe"; fi + CMD="GOOS=${GOOS} GOARCH=${GOARCH} go build $FLAGS -o ${BIN_FILENAME} ${BUILD_TARGET}" + echo "${CMD}" + eval "${CMD}" || FAILURES="${FAILURES} ${GOOS}/${GOARCH}" + fi +done <<< "$(go tool dist list)" + +if [[ "${FAILURES}" != "" ]]; then + echo "" + echo "build failed for: ${FAILURES}" + exit 1 +fi \ No newline at end of file diff --git a/.github/scripts/coverage.py b/.github/scripts/coverage.py new file mode 100755 index 00000000..db14135c --- /dev/null +++ b/.github/scripts/coverage.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 +import subprocess +import sys +import shlex + + +class bcolors: + HEADER = '\033[95m' + OKBLUE = '\033[94m' + OKCYAN = '\033[96m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + + +if len(sys.argv) < 3: + print("Usage: coverage.py [threshold] [go-coverage-report]") + sys.exit(1) + + +threshold = float(sys.argv[1]) +report = sys.argv[2] + + +args = shlex.split(f"go tool cover -func {report}") +p = subprocess.run(args, capture_output=True, text=True) + +percent_coverage = float(p.stdout.splitlines()[-1].split()[-1].replace("%", "")) +print(f"{bcolors.BOLD}Coverage: {percent_coverage}%{bcolors.ENDC}") + +if percent_coverage < threshold: + print(f"{bcolors.BOLD}{bcolors.FAIL}Coverage below threshold of {threshold}%{bcolors.ENDC}") + sys.exit(1) diff --git a/.github/scripts/go-mod-tidy-check.sh b/.github/scripts/go-mod-tidy-check.sh new file mode 100755 index 00000000..28f22fcd --- /dev/null +++ b/.github/scripts/go-mod-tidy-check.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +set -eu + +ORIGINAL_STATE_DIR=$(mktemp -d "TEMP-original-state-XXXXXXXXX") +TIDY_STATE_DIR=$(mktemp -d "TEMP-tidy-state-XXXXXXXXX") + +trap "cp -p ${ORIGINAL_STATE_DIR}/* ./ && git update-index -q --refresh && rm -fR ${ORIGINAL_STATE_DIR} ${TIDY_STATE_DIR}" EXIT + +# capturing original state of files... +cp go.mod go.sum "${ORIGINAL_STATE_DIR}" + +# capturing state of go.mod and go.sum after running go mod tidy... +go mod tidy +cp go.mod go.sum "${TIDY_STATE_DIR}" + +set +e + +# detect difference between the git HEAD state and the go mod tidy state +DIFF_MOD=$(diff -u "${ORIGINAL_STATE_DIR}/go.mod" "${TIDY_STATE_DIR}/go.mod") +DIFF_SUM=$(diff -u "${ORIGINAL_STATE_DIR}/go.sum" "${TIDY_STATE_DIR}/go.sum") + +if [[ -n "${DIFF_MOD}" || -n "${DIFF_SUM}" ]]; then + echo "go.mod diff:" + echo "${DIFF_MOD}" + echo "go.sum diff:" + echo "${DIFF_SUM}" + echo "" + printf "FAILED! go.mod and/or go.sum are NOT tidy; please run 'go mod tidy'.\n\n" + exit 1 +fi diff --git a/.github/workflows/benchmark-testing.yaml b/.github/workflows/benchmark-testing.yaml new file mode 100644 index 00000000..4cd87594 --- /dev/null +++ b/.github/workflows/benchmark-testing.yaml @@ -0,0 +1,58 @@ +name: "Benchmark testing" + +on: + workflow_dispatch: + pull_request: + +jobs: + + Benchmark-Test: + name: "Benchmark tests" + runs-on: ubuntu-20.04 + # note: we want benchmarks to run on pull_request events in order to publish results to a sticky comment, and + # we also want to run on push such that merges to main are recorded to the cache. For this reason we don't filter + # the job by event. + steps: + - uses: actions/checkout@v3 + + - name: Bootstrap environment + uses: ./.github/actions/bootstrap + + - name: Restore base benchmark result + uses: actions/cache@v3 + with: + path: test/results/benchmark-main.txt + # use base sha for PR or new commit hash for main push in benchmark result key + key: ${{ runner.os }}-bench-${{ (github.event.pull_request.base.sha != github.event.after) && github.event.pull_request.base.sha || github.event.after }} + + - name: Run benchmark tests + id: benchmark + run: | + REF_NAME=${GITHUB_REF##*/} make benchmark + OUTPUT=$(make show-benchstat) + OUTPUT="${OUTPUT//'%'/'%25'}" # URL encode all '%' characters + OUTPUT="${OUTPUT//$'\n'/'%0A'}" # URL encode all '\n' characters + OUTPUT="${OUTPUT//$'\r'/'%0D'}" # URL encode all '\r' characters + echo "::set-output name=result::$OUTPUT" + + - uses: actions/upload-artifact@v3 + with: + name: benchmark-test-results + path: test/results/**/* + + - name: Update PR benchmark results comment + uses: marocchino/sticky-pull-request-comment@v2 + continue-on-error: true + with: + header: benchmark + message: | + ### Benchmark Test Results + +
+ Benchmark results from the latest changes vs base branch + + ``` + ${{ steps.benchmark.outputs.result }} + ``` + +
diff --git a/.github/workflows/validations.yaml b/.github/workflows/validations.yaml index 4ed98466..f0314a6b 100644 --- a/.github/workflows/validations.yaml +++ b/.github/workflows/validations.yaml @@ -16,79 +16,28 @@ on: - main pull_request: -env: - GO_VERSION: "1.19.x" - jobs: Static-Analysis: name: "Static analysis" runs-on: ubuntu-20.04 steps: - - uses: actions/setup-go@v2 - with: - go-version: ${{ env.GO_VERSION }} - - - uses: actions/checkout@v2 - - - name: Restore tool cache - id: tool-cache - uses: actions/cache@v2.1.3 - with: - path: ${{ github.workspace }}/.tmp - key: ${{ runner.os }}-tool-${{ hashFiles('Makefile') }} + - uses: actions/checkout@v3 - - name: Restore go cache - id: go-cache - uses: actions/cache@v2.1.3 - with: - path: ~/go/pkg/mod - key: ${{ runner.os }}-go-${{ env.GO_VERSION }}-${{ hashFiles('**/go.sum') }} - restore-keys: | - ${{ runner.os }}-go-${{ env.GO_VERSION }}- - - - name: (cache-miss) Bootstrap all project dependencies - if: steps.tool-cache.outputs.cache-hit != 'true' || steps.go-cache.outputs.cache-hit != 'true' - run: make bootstrap + - name: Bootstrap environment + uses: ./.github/actions/bootstrap - - name: Bootstrap CI environment dependencies - run: make ci-bootstrap - - - name: Run static analysis - run: make static-analysis + - name: Run static analysis + run: make static-analysis Unit-Test: name: "Unit tests" runs-on: ubuntu-20.04 steps: - - uses: actions/setup-go@v2 - with: - go-version: ${{ env.GO_VERSION }} - - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - name: Restore tool cache - id: tool-cache - uses: actions/cache@v2.1.3 - with: - path: ${{ github.workspace }}/.tmp - key: ${{ runner.os }}-tool-${{ hashFiles('Makefile') }} - - - name: Restore go cache - id: go-cache - uses: actions/cache@v2.1.3 - with: - path: ~/go/pkg/mod - key: ${{ runner.os }}-go-${{ env.GO_VERSION }}-${{ hashFiles('**/go.sum') }} - restore-keys: | - ${{ runner.os }}-go-${{ env.GO_VERSION }}- - - - name: (cache-miss) Bootstrap all project dependencies - if: steps.tool-cache.outputs.cache-hit != 'true' || steps.go-cache.outputs.cache-hit != 'true' - run: make bootstrap - - - name: Bootstrap CI environment dependencies - run: make ci-bootstrap + - name: Bootstrap environment + uses: ./.github/actions/bootstrap - name: Run unit tests run: make unit @@ -102,11 +51,10 @@ jobs: name: "Integration tests" runs-on: ubuntu-20.04 steps: - - uses: actions/setup-go@v2 - with: - go-version: ${{ env.GO_VERSION }} + - uses: actions/checkout@v3 - - uses: actions/checkout@v2 + - name: Bootstrap environment + uses: ./.github/actions/bootstrap - name: Enable systemd for podman socket activation run: | @@ -128,29 +76,6 @@ jobs: with: limit-access-to-actor: true - - name: Restore tool cache - id: tool-cache - uses: actions/cache@v2.1.3 - with: - path: ${{ github.workspace }}/.tmp - key: ${{ runner.os }}-tool-${{ hashFiles('Makefile') }} - - - name: Restore go cache - id: go-cache - uses: actions/cache@v2.1.3 - with: - path: ~/go/pkg/mod - key: ${{ runner.os }}-go-${{ env.GO_VERSION }}-${{ hashFiles('**/go.sum') }} - restore-keys: | - ${{ runner.os }}-go-${{ env.GO_VERSION }}- - - - name: (cache-miss) Bootstrap all project dependencies - if: steps.tool-cache.outputs.cache-hit != 'true' || steps.go-cache.outputs.cache-hit != 'true' - run: make bootstrap - - - name: Bootstrap CI environment dependencies - run: make ci-bootstrap - - name: Build key for test-fixture cache run: make integration-fingerprint @@ -177,77 +102,15 @@ jobs: - name: Run integration tests run: make integration - Benchmark-Test: - name: "Benchmark tests" + Build-Snapshot-Artifacts: + name: "Build snapshot artifacts" runs-on: ubuntu-20.04 - # note: we want benchmarks to run on pull_request events in order to publish results to a sticky comment, and - # we also want to run on push such that merges to main are recorded to the cache. For this reason we don't filter - # the job by event. steps: - - uses: actions/setup-go@v2 - with: - go-version: ${{ env.GO_VERSION }} - - - uses: actions/checkout@v2 - - - name: Restore tool cache - id: tool-cache - uses: actions/cache@v2.1.3 - with: - path: ${{ github.workspace }}/.tmp - key: ${{ runner.os }}-tool-${{ hashFiles('Makefile') }} - - - name: Restore go cache - id: go-cache - uses: actions/cache@v2.1.3 - with: - path: ~/go/pkg/mod - key: ${{ runner.os }}-go-${{ env.GO_VERSION }}-${{ hashFiles('**/go.sum') }} - restore-keys: | - ${{ runner.os }}-go-${{ env.GO_VERSION }}- - - - name: (cache-miss) Bootstrap all project dependencies - if: steps.tool-cache.outputs.cache-hit != 'true' || steps.go-cache.outputs.cache-hit != 'true' - run: make bootstrap - - - name: Bootstrap CI environment dependencies - run: make ci-bootstrap - - - name: Restore base benchmark result - uses: actions/cache@v2 - with: - path: test/results/benchmark-main.txt - # use base sha for PR or new commit hash for main push in benchmark result key - key: ${{ runner.os }}-bench-${{ (github.event.pull_request.base.sha != github.event.after) && github.event.pull_request.base.sha || github.event.after }} - - - name: Run benchmark tests - id: benchmark - run: | - REF_NAME=${GITHUB_REF##*/} make benchmark - OUTPUT=$(make show-benchstat) - OUTPUT="${OUTPUT//'%'/'%25'}" # URL encode all '%' characters - OUTPUT="${OUTPUT//$'\n'/'%0A'}" # URL encode all '\n' characters - OUTPUT="${OUTPUT//$'\r'/'%0D'}" # URL encode all '\r' characters - echo "::set-output name=result::$OUTPUT" - - - uses: actions/upload-artifact@v2 - with: - name: benchmark-test-results - path: test/results/**/* - - - name: Update PR benchmark results comment - uses: marocchino/sticky-pull-request-comment@v2 - continue-on-error: true - with: - header: benchmark - message: | - ### Benchmark Test Results + - uses: actions/checkout@v3 -
- Benchmark results from the latest changes vs base branch + - name: Bootstrap environment + uses: ./.github/actions/bootstrap - ``` - ${{ steps.benchmark.outputs.result }} - ``` + - name: Build snapshot artifacts + run: make snapshot -
diff --git a/.gitignore b/.gitignore index 930ca30b..26630caf 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ coverage.txt **/test-fixtures/cache/ **/*.fingerprint +snapshot/ # Binaries for programs and plugins *.exe diff --git a/.golangci.yaml b/.golangci.yaml index 1cf5f179..a92c5c43 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -26,7 +26,6 @@ linters: - ineffassign - misspell - nakedret - - nolintlint - revive - staticcheck - stylecheck @@ -37,19 +36,23 @@ linters: - whitespace # do not enable... +# - deadcode # The owner seems to have abandoned the linter. Replaced by "unused". # - gochecknoglobals # - gochecknoinits # this is too aggressive # - godot # - godox # - goerr113 -# - golint # deprecated -# - gomnd # this is too aggressive -# - interfacer # this is a good idea, but is no longer supported and is prone to false positives -# - lll # without a way to specify per-line exception cases, this is not usable -# - maligned # this is an excellent linter, but tricky to optimize and we are not sensitive to memory layout optimizations +# - golint # deprecated +# - gomnd # this is too aggressive +# - interfacer # this is a good idea, but is no longer supported and is prone to false positives +# - lll # without a way to specify per-line exception cases, this is not usable +# - maligned # this is an excellent linter, but tricky to optimize and we are not sensitive to memory layout optimizations # - nestif -# - prealloc # following this rule isn't consistently a good idea, as it sometimes forces unnecessary allocations that result in less idiomatic code -# - scopelint # deprecated +# - nolintlint # as of go1.19 this conflicts with the behavior of gofmt, which is a deal-breaker (lint-fix will still fail when running lint) +# - prealloc # following this rule isn't consistently a good idea, as it sometimes forces unnecessary allocations that result in less idiomatic code +# - rowserrcheck # not in a repo with sql, so this is not useful +# - scopelint # deprecated +# - structcheck # The owner seems to have abandoned the linter. Replaced by "unused". # - testpackage -# - wsl # this doens't have an auto-fixer yet and is pretty noisy (https://github.com/bombsimon/wsl/issues/90) - +# - varcheck # The owner seems to have abandoned the linter. Replaced by "unused". +# - wsl # this doens't have an auto-fixer yet and is pretty noisy (https://github.com/bombsimon/wsl/issues/90) diff --git a/DEVELOPING.md b/DEVELOPING.md new file mode 100644 index 00000000..43577bb2 --- /dev/null +++ b/DEVELOPING.md @@ -0,0 +1,53 @@ +# Developing + +## Getting started + +In order to test and develop in this repo you will need the following dependencies installed: +- Golang +- docker +- make +- podman (for benchmark and integration tests only) + +After cloning the following step can help you get setup: +1. run `make bootstrap` to download go mod dependencies, create the `/.tmp` dir, and download helper utilities. +2. run `make help` to view the selection of developer commands in the Makefile + +The main make tasks for common static analysis and testing are `lint`, `lint-fix`, `unit`, and `integration`. + +See `make help` for all the current make tasks. + +## Background + +Stereoscope is a library for reading and manipulating container images. It is capable of parsing multiple image +sources, providing a single abstraction for interacting with them. Ultimately this provides a squashfs-like +interface for interacting with image layers as well as a content API for accessing files contained within +the image. + +**Overview of objects:** +- `image.Image`: Once parsed with `image.Read()` this object represents a container image. Consists of a sequence of `image.Layer` objects, a `image.FileCatalog` for accessing files, and `filetree.SearchContext` for searching for files from the squashed representation of the image filesystem. Additionally exposes GGCR `v1.Image` objects for accessing the raw image metadata. +- `image.Layer`: represents a single layer of the image. Consists of a `filetree.FileTree` that represents the raw layer contents, and a `filetree.SearchContext` for searching for files relative to the raw (single layer) filetree as well as the squashed representation of the layer relative to all layers below this one. Additionally exposes GGCR `v1.Layer` objects for accessing the raw layer metadata. +- `filetree.FileTree`: a tree representing a filesystem. All nodes represent real paths (paths with no link resolution anywhere in the path) and are absolute paths (start with / and contain no relative path elements [e.g. ../ or ./]). This represents the filesystem structure and each node has a reference to the file metadata for that path. +- `file.Reference`: a unique file in the filesystem, identified by an absolute, real path as well as an integer ID (`file.ID`s). These are used to reference concrete nodes in the `filetree.FileTree` and `image.FileCatalog` objects. +- `file.Index`: stores all known `file.Reference` and `file.Metadata`. Entries are indexed with a variety of ways to provide fast access to references and metadata without needing to crawl the tree. This is especially useful for speeding up globbing. +- `image.FileCatalog`: an image-aware extension of `file.Index` that additionally relates `image.Layers` to `file.IDs` and provides a content API for any files contained within the image (regardless of which layer or squashed representation it exists in). + +### Searching for files + +Searching for files is exposed to users in three ways: +- search by file path +- search by file glob +- search by file content MIME type + +Searching itself is performed two different ways: +- search the `image.FileCatalog` on the image by a heuristic +- search the `filetree.FileTree` directly + +The "best way" to search is automatically determined in the `filetree.searchContext` object, exposed on `image.Image` and `image.Layer` objects as a `filetree.Searcher` for general use. + +### File trees + +The `filetree.FileTree` object represents a filesystem and consists of `filenode.Node` objects. The tree itself leverages `tree.Tree` as a generic datastructure. What `filetree.FileTree` adds is the concept of file types, the semantics of each type, the ability to resolve links based on a given strategy, merging of trees with the same semantics of a union filesystem (e.g. whiteout files), and the ability to search for files via direct paths or globs. + +The `fs.FS` abstraction has been implemented on `filetree.FileTree` to allow for easy integration with the standard library as well as to interop with the `doublestar` library to facilitate globing. Using the `fs.FS` abstraction for filetree operations is faster than OS interactions with the filesystem directly but relatively slower than the indexes provided by `image.FileCatalog` and `file.Index`. + +`filetre.FileTree` objects can be created with a corresponding `file.Index` object by leveraging the `filetree.Builder` object, which aids in the indexing of files. diff --git a/Makefile b/Makefile index 7bdbc175..b5f3d3d6 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,15 @@ -TEMPDIR = ./.tmp -RESULTSDIR = test/results -COVER_REPORT = $(RESULTSDIR)/unit-coverage-details.txt -COVER_TOTAL = $(RESULTSDIR)/unit-coverage-summary.txt -LINTCMD = $(TEMPDIR)/golangci-lint run --tests=false --config .golangci.yaml +TEMP_DIR = ./.tmp + +# Command templates ################################# +LINT_CMD = $(TEMP_DIR)/golangci-lint run --tests=false --config .golangci.yaml + +# Tool versions ################################# +GOLANGCILINT_VERSION := v1.51.0 +GOSIMPORTS_VERSION := v0.3.5 +BOUNCER_VERSION := v0.4.0 +CHRONICLE_VERSION := v0.5.1 + +# Formatting variables ################################# BOLD := $(shell tput -T linux bold) PURPLE := $(shell tput -T linux setaf 5) GREEN := $(shell tput -T linux setaf 2) @@ -11,57 +18,72 @@ RED := $(shell tput -T linux setaf 1) RESET := $(shell tput -T linux sgr0) TITLE := $(BOLD)$(PURPLE) SUCCESS := $(BOLD)$(GREEN) -# the quality gate lower threshold for unit test total % coverage (by function statements) -COVERAGE_THRESHOLD := 48 -ifeq "$(strip $(VERSION))" "" - override VERSION = $(shell git describe --always --tags --dirty) -endif +# Test variables ################################# +COVERAGE_THRESHOLD := 55 # the quality gate lower threshold for unit test total % coverage (by function statements) + +## Build variables ################################# +SNAPSHOT_DIR := ./snapshot +VERSION := $(shell git describe --dirty --always --tags) -ifndef TEMPDIR - $(error TEMPDIR is not set) +ifndef VERSION + $(error VERSION is not set) endif -ifndef REF_NAME - REF_NAME = $(VERSION) +ifndef TEMP_DIR + $(error TEMP_DIR is not set) endif define title @printf '$(TITLE)$(1)$(RESET)\n' endef +define safe_rm_rf + bash -c 'test -z "$(1)" && false || rm -rf $(1)' +endef + +define safe_rm_rf_children + bash -c 'test -z "$(1)" && false || rm -rf $(1)/*' +endef + .PHONY: all -all: static-analysis test ## Run all checks (linting, all tests, and dependencies license checks) +all: static-analysis test ## Run all linux-based checks (linting, license check, unit, integration, and linux compare tests) @printf '$(SUCCESS)All checks pass!$(RESET)\n' +.PHONY: static-analysis +static-analysis: check-go-mod-tidy lint check-licenses ## Run all static analysis checks + .PHONY: test -test: unit integration benchmark ## Run all levels of test +test: unit integration benchmark ## Run all tests (currently unit and integrations) -.PHONY: help -help: - @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "$(BOLD)$(CYAN)%-25s$(RESET)%s\n", $$1, $$2}' + +## Bootstrapping targets ################################# .PHONY: ci-bootstrap ci-bootstrap: bootstrap - sudo apt install -y bc curl -sLO https://github.com/sylabs/singularity/releases/download/v3.10.0/singularity-ce_3.10.0-focal_amd64.deb && sudo apt-get install -y -f ./singularity-ce_3.10.0-focal_amd64.deb -$(RESULTSDIR): - mkdir -p $(RESULTSDIR) - -.PHONY: boostrap -bootstrap: $(RESULTSDIR) ## Download and install all project dependencies (+ prep tooling in the ./tmp dir) - $(call title,Downloading dependencies) - @pwd - # prep temp dirs - mkdir -p $(TEMPDIR) - mkdir -p $(RESULTSDIR) - # install go dependencies +.PHONY: bootstrap +bootstrap: $(TEMP_DIR) bootstrap-go bootstrap-tools ## Download and install all tooling dependencies (+ prep tooling in the ./tmp dir) + $(call title,Bootstrapping dependencies) + +.PHONY: bootstrap-tools +bootstrap-tools: $(TEMP_DIR) + GO111MODULE=off GOBIN=$(realpath $(TEMP_DIR)) go get -u golang.org/x/perf/cmd/benchstat + curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(TEMP_DIR)/ $(GOLANGCILINT_VERSION) + curl -sSfL https://raw.githubusercontent.com/wagoodman/go-bouncer/master/bouncer.sh | sh -s -- -b $(TEMP_DIR)/ $(BOUNCER_VERSION) + curl -sSfL https://raw.githubusercontent.com/anchore/chronicle/main/install.sh | sh -s -- -b $(TEMP_DIR)/ $(CHRONICLE_VERSION) + # the only difference between goimports and gosimports is that gosimports removes extra whitespace between import blocks (see https://github.com/golang/go/issues/20818) + GOBIN="$(realpath $(TEMP_DIR))" go install github.com/rinchsan/gosimports/cmd/gosimports@$(GOSIMPORTS_VERSION) + +.PHONY: bootstrap-go +bootstrap-go: go mod download - # install utilities - [ -f "$(TEMPDIR)/benchstat" ] || GO111MODULE=off GOBIN=$(shell realpath $(TEMPDIR)) go get -u golang.org/x/perf/cmd/benchstat - [ -f "$(TEMPDIR)/golangci" ] || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(TEMPDIR)/ v1.50.1 - [ -f "$(TEMPDIR)/bouncer" ] || curl -sSfL https://raw.githubusercontent.com/wagoodman/go-bouncer/master/bouncer.sh | sh -s -- -b $(TEMPDIR)/ v0.4.0 + +$(TEMP_DIR): + mkdir -p $(TEMP_DIR) + +## Static analysis targets ################################# .PHONY: static-analysis static-analysis: check-licenses lint @@ -71,40 +93,55 @@ lint: ## Run gofmt + golangci lint checks $(call title,Running linters) @printf "files with gofmt issues: [$(shell gofmt -l -s .)]\n" @test -z "$(shell gofmt -l -s .)" - $(LINTCMD) + $(LINT_CMD) .PHONY: lint-fix lint-fix: ## Auto-format all source code + run golangci lint fixers $(call title,Running lint fixers) gofmt -w -s . - $(LINTCMD) --fix + $(LINT_CMD) --fix go mod tidy .PHONY: check-licenses check-licenses: $(call title,Validating licenses for go dependencies) - $(TEMPDIR)/bouncer check + $(TEMP_DIR)/bouncer check + +check-go-mod-tidy: + @ .github/scripts/go-mod-tidy-check.sh && echo "go.mod and go.sum are tidy!" + +## Testing targets ################################# .PHONY: unit -unit: $(RESULTSDIR) ## Run unit tests (with coverage) +unit: $(TEMP_DIR) ## Run unit tests (with coverage) $(call title,Running unit tests) - go test --race -coverprofile $(COVER_REPORT) $(shell go list ./... | grep -v anchore/stereoscope/test/integration) - @go tool cover -func $(COVER_REPORT) | grep total | awk '{print substr($$3, 1, length($$3)-1)}' > $(COVER_TOTAL) - @echo "Coverage: $$(cat $(COVER_TOTAL))" - @if [ $$(echo "$$(cat $(COVER_TOTAL)) >= $(COVERAGE_THRESHOLD)" | bc -l) -ne 1 ]; then echo "$(RED)$(BOLD)Failed coverage quality gate (> $(COVERAGE_THRESHOLD)%)$(RESET)" && false; fi + go test -coverprofile $(TEMP_DIR)/unit-coverage-details.txt $(shell go list ./... | grep -v anchore/stereoscope/test) + @.github/scripts/coverage.py $(COVERAGE_THRESHOLD) $(TEMP_DIR)/unit-coverage-details.txt + + +.PHONY: integration +integration: integration-tools ## Run integration tests + $(call title,Running integration tests) + go test -v ./test/integration + +## Benchmark test targets ################################# + .PHONY: benchmark -benchmark: $(RESULTSDIR) ## Run benchmark tests and compare against the baseline (if available) +benchmark: $(TEMP_DIR) ## Run benchmark tests and compare against the baseline (if available) $(call title,Running benchmark tests) - go test -cpu 2 -p 1 -run=^Benchmark -bench=. -count=5 -benchmem ./... | tee $(RESULTSDIR)/benchmark-$(REF_NAME).txt - (test -s $(RESULTSDIR)/benchmark-main.txt && \ - $(TEMPDIR)/benchstat $(RESULTSDIR)/benchmark-main.txt $(RESULTSDIR)/benchmark-$(REF_NAME).txt || \ - $(TEMPDIR)/benchstat $(RESULTSDIR)/benchmark-$(REF_NAME).txt) \ - | tee $(RESULTSDIR)/benchstat.txt + go test -cpu 2 -p 1 -run=^Benchmark -bench=. -count=5 -benchmem ./... | tee $(TEMP_DIR)/benchmark-$(VERSION).txt + (test -s $(TEMP_DIR)/benchmark-main.txt && \ + $(TEMP_DIR)/benchstat $(TEMP_DIR)/benchmark-main.txt $(TEMP_DIR)/benchmark-$(VERSION).txt || \ + $(TEMP_DIR)/benchstat $(TEMP_DIR)/benchmark-$(VERSION).txt) \ + | tee $(TEMP_DIR)/benchstat.txt + .PHONY: show-benchstat show-benchstat: - @cat $(RESULTSDIR)/benchstat.txt + @cat $(TEMP_DIR)/benchstat.txt + +## Test-fixture-related targets ################################# # note: this is used by CI to determine if the integration test fixture cache (docker image tars) should be busted .PHONY: integration-fingerprint @@ -127,11 +164,30 @@ integration-tools-load: integration-tools-save: @cd test/integration/tools && make save-cache -.PHONY: integration -integration: integration-tools ## Run integration tests - $(call title,Running integration tests) - go test -v ./test/integration +## Build-related targets ################################# + +.PHONY: snapshot +snapshot: clean-snapshot ## Build the binary + $(call title,Build compatability test) + @.github/scripts/build.sh $(SNAPSHOT_DIR) + +## Cleanup targets ################################# + +.PHONY: clean +clean: clear-test-cache clean-snapshot ## Delete all generated artifacts + $(call safe_rm_rf_children,$(TEMP_DIR)) + +.PHONY: clean-snapshot +clean-snapshot: ## Delete all snapshot builds + $(call safe_rm_rf,$(SNAPSHOT_DIR)) .PHONY: clear-test-cache clear-test-cache: ## Delete all test cache (built docker image tars) find . -type f -wholename "**/test-fixtures/cache/*.tar" -delete + + +## Halp! ################################# + +.PHONY: help +help: ## Display this help + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "$(BOLD)$(CYAN)%-25s$(RESET)%s\n", $$1, $$2}' diff --git a/examples/basic.go b/examples/basic.go index 23d3883a..687245b9 100644 --- a/examples/basic.go +++ b/examples/basic.go @@ -89,7 +89,7 @@ func main() { ////////////////////////////////////////////////////////////////// // Fetch file contents from the (squashed) image filePath := file.Path("/etc/group") - contentReader, err := image.FileContentsFromSquash(filePath) + contentReader, err := image.OpenPathFromSquash(filePath) if err != nil { panic(err) } diff --git a/go.mod b/go.mod index eeae65c5..9cec3854 100644 --- a/go.mod +++ b/go.mod @@ -7,12 +7,14 @@ require ( github.com/anchore/go-logger v0.0.0-20220728155337-03b66a5207d8 github.com/anchore/go-testutils v0.0.0-20200925183923-d5f45b0d3c04 github.com/awslabs/amazon-ecr-credential-helper/ecr-login v0.0.0-20220517224237-e6f29200ae04 + github.com/becheran/wildmatch-go v1.0.0 github.com/bmatcuk/doublestar/v4 v4.0.2 github.com/containerd/containerd v1.6.12 github.com/docker/cli v20.10.12+incompatible github.com/docker/docker v20.10.12+incompatible github.com/gabriel-vasile/mimetype v1.4.0 github.com/go-test/deep v1.0.8 + github.com/google/go-cmp v0.5.8 github.com/google/go-containerregistry v0.7.0 github.com/hashicorp/go-multierror v1.1.1 github.com/logrusorgru/aurora v0.0.0-20200102142835-e9ef32dff381 @@ -23,7 +25,7 @@ require ( github.com/scylladb/go-set v1.0.3-0.20200225121959-cc7b2070d91e github.com/sergi/go-diff v1.2.0 github.com/spf13/afero v1.6.0 - github.com/stretchr/testify v1.7.0 + github.com/stretchr/testify v1.8.1 github.com/sylabs/sif/v2 v2.8.1 github.com/sylabs/squashfs v0.6.1 github.com/wagoodman/go-partybus v0.0.0-20200526224238-eb215533f07d @@ -66,14 +68,14 @@ require ( github.com/pierrec/lz4/v4 v4.1.15 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/sirupsen/logrus v1.8.1 // indirect - github.com/stretchr/objx v0.2.0 // indirect + github.com/stretchr/objx v0.5.0 // indirect github.com/therootcompany/xz v1.0.1 // indirect github.com/ulikunitz/xz v0.5.10 // indirect github.com/vbatts/tar-split v0.11.2 // indirect golang.org/x/net v0.0.0-20220722155237-a158d28d115b // indirect golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8 // indirect golang.org/x/sync v0.0.0-20210220032951-036812b2e83c // indirect - golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f // indirect + golang.org/x/sys v0.1.0 // indirect golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 // indirect golang.org/x/text v0.3.7 // indirect google.golang.org/appengine v1.6.7 // indirect diff --git a/go.sum b/go.sum index 18aa55bd..33eeee5a 100644 --- a/go.sum +++ b/go.sum @@ -130,6 +130,8 @@ github.com/aws/smithy-go v1.6.0 h1:T6puApfBcYiTIsaI+SYWqanjMt5pc3aoyyDrI+0YH54= github.com/aws/smithy-go v1.6.0/go.mod h1:SObp3lf9smib00L/v3U2eAKG8FyQ7iLrJnQiAmR5n+E= github.com/awslabs/amazon-ecr-credential-helper/ecr-login v0.0.0-20220517224237-e6f29200ae04 h1:p2I85zYI9z5/c/3Q0LiO3RtNXcmXHTtJfml/hV16zNg= github.com/awslabs/amazon-ecr-credential-helper/ecr-login v0.0.0-20220517224237-e6f29200ae04/go.mod h1:Z+bXnIbhKJYSvxNwsNnwde7pDKxuqlEZCbUBoTwAqf0= +github.com/becheran/wildmatch-go v1.0.0 h1:mE3dGGkTmpKtT4Z+88t8RStG40yN9T+kFEGj2PZFSzA= +github.com/becheran/wildmatch-go v1.0.0/go.mod h1:gbMvj0NtVdJ15Mg/mH9uxk2R1QCistMyU7d9KFzroX4= github.com/beorn7/perks v0.0.0-20160804104726-4c0e84591b9a/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= @@ -428,8 +430,9 @@ github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg= +github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-containerregistry v0.7.0 h1:u0onUUOcyoCDHEiJoyR1R1gx5er1+r06V5DBhUU5ndk= github.com/google/go-containerregistry v0.7.0/go.mod h1:2zaoelrL0d08gGbpdP3LqyUuBmhWbpD6IOe2s9nLS2k= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= @@ -750,16 +753,21 @@ github.com/stefanberger/go-pkcs11uri v0.0.0-20201008174630-78d3cae3a980/go.mod h github.com/stretchr/objx v0.0.0-20180129172003-8a3f7159479f/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.2.0 h1:Hbg2NidpLE8veEBkEZTL3CvlkUIVzuU9jDplZO54c48= github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v0.0.0-20180303142811-b89eecf5ca5d/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw= github.com/sylabs/sif/v2 v2.8.1 h1:whr4Vz12RXfLnYyVGHoD/rD/hbF2g9OW7BJHa+WIqW8= github.com/sylabs/sif/v2 v2.8.1/go.mod h1:LQOdYXC9a8i7BleTKRw9lohi0rTbXkJOeS9u0ebvgyM= @@ -1054,8 +1062,8 @@ golang.org/x/sys v0.0.0-20210823070655-63515b42dcdf/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20210908233432-aa78b53d3365/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211110154304-99a53858aa08/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f h1:v4INt8xihDGvnrfjMDVXGxw9wrfxYyCjk0KbXjhR55s= -golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U= +golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 h1:JGgROgKl9N8DuW20oFS5gxc+lE67/N3FcwmBPMe7ArY= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= @@ -1137,7 +1145,6 @@ golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/api v0.0.0-20160322025152-9bf6e6e569ff/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0= google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= diff --git a/internal/string_set.go b/internal/string_set.go new file mode 100644 index 00000000..42f00165 --- /dev/null +++ b/internal/string_set.go @@ -0,0 +1,76 @@ +package internal + +import ( + "sort" +) + +type StringSet map[string]struct{} + +func NewStringSet(is ...string) StringSet { + // TODO: replace with single generic implementation that also incorporates other set implementations + s := make(StringSet) + s.Add(is...) + return s +} + +func (s StringSet) Size() int { + return len(s) +} + +func (s StringSet) Merge(other StringSet) { + for _, i := range other.List() { + s.Add(i) + } +} + +func (s StringSet) Add(ids ...string) { + for _, i := range ids { + s[i] = struct{}{} + } +} + +func (s StringSet) Remove(ids ...string) { + for _, i := range ids { + delete(s, i) + } +} + +func (s StringSet) Contains(i string) bool { + _, ok := s[i] + return ok +} + +func (s StringSet) Clear() { + // TODO: replace this with the new 'clear' keyword when it's available in go 1.20 or 1.21 + for i := range s { + delete(s, i) + } +} + +func (s StringSet) List() []string { + ret := make([]string, 0, len(s)) + for i := range s { + ret = append(ret, i) + } + return ret +} + +func (s StringSet) Sorted() []string { + ids := s.List() + + sort.Slice(ids, func(i, j int) bool { + return ids[i] < ids[j] + }) + + return ids +} + +func (s StringSet) ContainsAny(ids ...string) bool { + for _, i := range ids { + _, ok := s[i] + if ok { + return true + } + } + return false +} diff --git a/internal/string_set_test.go b/internal/string_set_test.go new file mode 100644 index 00000000..e04727ae --- /dev/null +++ b/internal/string_set_test.go @@ -0,0 +1,226 @@ +package internal + +import ( + "fmt" + "github.com/stretchr/testify/assert" + "testing" +) + +func TestStringSet_Size(t *testing.T) { + type testCase struct { + name string + s StringSet + want int + } + tests := []testCase{ + { + name: "empty set", + s: NewStringSet(), + want: 0, + }, + { + name: "non-empty set", + s: NewStringSet("items", "in", "set"), + want: 3, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := tt.s.Size(); got != tt.want { + t.Errorf("Size() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestStringSet_Add(t *testing.T) { + type args struct { + ids []string + } + type testCase struct { + name string + s StringSet + args args + } + tests := []testCase{ + { + name: "add multiple", + s: NewStringSet(), + args: args{ids: []string{"a", "b", "c"}}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.s.Add(tt.args.ids...) + for _, id := range tt.args.ids { + if !tt.s.Contains(id) { + t.Errorf("expected set to contain %q", id) + } + } + }) + } +} + +func TestStringSet_Remove(t *testing.T) { + type args struct { + ids []string + } + type testCase struct { + name string + s StringSet + args args + expected []string + } + tests := []testCase{ + { + name: "remove multiple", + s: NewStringSet("a", "b", "c"), + args: args{ids: []string{"a", "b"}}, + expected: []string{"c"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.s.Remove(tt.args.ids...) + for _, id := range tt.args.ids { + if tt.s.Contains(id) { + t.Errorf("expected set to NOT contain %q", id) + } + } + for _, id := range tt.expected { + if !tt.s.Contains(id) { + t.Errorf("expected set to contain %q", id) + } + } + }) + } +} + +func TestStringSet_Contains(t *testing.T) { + type args struct { + i string + } + type testCase struct { + name string + s StringSet + args args + want bool + } + tests := []testCase{ + { + name: "contains", + s: NewStringSet("a", "b", "c"), + args: args{i: "a"}, + want: true, + }, + { + name: "not contains", + s: NewStringSet("a", "b", "c"), + args: args{i: "x"}, + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := tt.s.Contains(tt.args.i); got != tt.want { + t.Errorf("Contains() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestStringSet_Clear(t *testing.T) { + type testCase struct { + name string + s StringSet + } + tests := []testCase{ + { + name: "go case", + s: NewStringSet("a", "b", "c"), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.s.Clear() + assert.Equal(t, 0, tt.s.Size()) + }) + } +} + +func TestStringSet_List(t *testing.T) { + type testCase struct { + name string + s StringSet + want []string + } + tests := []testCase{ + { + name: "go case", + s: NewStringSet("a", "b", "c"), + want: []string{"a", "b", "c"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.ElementsMatchf(t, tt.want, tt.s.List(), "List()") + }) + } +} + +func TestStringSet_Sorted(t *testing.T) { + type testCase struct { + name string + s StringSet + want []string + } + tests := []testCase{ + { + name: "go case", + s: NewStringSet("a", "b", "c"), + want: []string{"a", "b", "c"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equalf(t, tt.want, tt.s.Sorted(), "Sorted()") + }) + } +} + +func TestStringSet_ContainsAny(t *testing.T) { + type args struct { + ids []string + } + type testCase struct { + name string + s StringSet + args args + want bool + } + tests := []testCase{ + { + name: "contains one", + s: NewStringSet("a", "b", "c"), + args: args{ids: []string{"a", "x"}}, + want: true, + }, + { + name: "contains all", + s: NewStringSet("a", "b", "c"), + args: args{ids: []string{"a", "b"}}, + want: true, + }, + { + name: "contains none", + s: NewStringSet("a", "b", "c"), + args: args{ids: []string{"x", "y"}}, + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, tt.s.ContainsAny(tt.args.ids...), fmt.Sprintf("ContainsAny(%v)", tt.args.ids)) + }) + } +} diff --git a/internal/stringset.go b/internal/stringset.go deleted file mode 100644 index 327312b0..00000000 --- a/internal/stringset.go +++ /dev/null @@ -1,38 +0,0 @@ -package internal - -import "sort" - -type Set map[string]struct{} - -func NewStringSet(start ...string) Set { - ret := make(Set) - for _, s := range start { - ret.Add(s) - } - return ret -} - -func (s Set) Add(i string) { - s[i] = struct{}{} -} - -func (s Set) Remove(i string) { - delete(s, i) -} - -func (s Set) Contains(i string) bool { - _, ok := s[i] - return ok -} - -// ToSlice returns a sorted slice of strings that are contained within the set. -func (s Set) ToSlice() []string { - ret := make([]string, len(s)) - idx := 0 - for v := range s { - ret[idx] = v - idx++ - } - sort.Strings(ret) - return ret -} diff --git a/pkg/file/get_xid.go b/pkg/file/get_xid.go new file mode 100644 index 00000000..29a7b02a --- /dev/null +++ b/pkg/file/get_xid.go @@ -0,0 +1,20 @@ +//go:build !windows + +package file + +import ( + "os" + "syscall" +) + +// getXid is the UID GID system info for unix +func getXid(info os.FileInfo) (uid, gid int) { + uid = -1 + gid = -1 + if stat, ok := info.Sys().(*syscall.Stat_t); ok { + uid = int(stat.Uid) + gid = int(stat.Gid) + } + + return uid, gid +} diff --git a/pkg/file/get_xid_win.go b/pkg/file/get_xid_win.go new file mode 100644 index 00000000..abe28de8 --- /dev/null +++ b/pkg/file/get_xid_win.go @@ -0,0 +1,12 @@ +//go:build windows + +package file + +import ( + "os" +) + +// getXid is a placeholder for windows file information +func getXid(info os.FileInfo) (uid, gid int) { + return -1, -1 +} diff --git a/pkg/file/id.go b/pkg/file/id.go new file mode 100644 index 00000000..75f05dc5 --- /dev/null +++ b/pkg/file/id.go @@ -0,0 +1,20 @@ +package file + +var nextID = 0 // note: this is governed by the reference constructor + +// ID is used for file tree manipulation to uniquely identify tree nodes. +type ID uint64 + +type IDs []ID + +func (ids IDs) Len() int { + return len(ids) +} + +func (ids IDs) Less(i, j int) bool { + return ids[i] < ids[j] +} + +func (ids IDs) Swap(i, j int) { + ids[i], ids[j] = ids[j], ids[i] +} diff --git a/pkg/file/id_set.go b/pkg/file/id_set.go new file mode 100644 index 00000000..eebe00fa --- /dev/null +++ b/pkg/file/id_set.go @@ -0,0 +1,75 @@ +//nolint:dupl +package file + +import "sort" + +type IDSet map[ID]struct{} + +func NewIDSet(is ...ID) IDSet { + // TODO: replace with single generic implementation that also incorporates other set implementations + s := make(IDSet) + s.Add(is...) + return s +} + +func (s IDSet) Size() int { + return len(s) +} + +func (s IDSet) Merge(other IDSet) { + for _, i := range other.List() { + s.Add(i) + } +} + +func (s IDSet) Add(ids ...ID) { + for _, i := range ids { + s[i] = struct{}{} + } +} + +func (s IDSet) Remove(ids ...ID) { + for _, i := range ids { + delete(s, i) + } +} + +func (s IDSet) Contains(i ID) bool { + _, ok := s[i] + return ok +} + +func (s IDSet) Clear() { + // TODO: replace this with the new 'clear' keyword when it's available in go 1.20 or 1.21 + for i := range s { + delete(s, i) + } +} + +func (s IDSet) List() []ID { + ret := make([]ID, 0, len(s)) + for i := range s { + ret = append(ret, i) + } + return ret +} + +func (s IDSet) Sorted() []ID { + ids := s.List() + + sort.Slice(ids, func(i, j int) bool { + return ids[i] < ids[j] + }) + + return ids +} + +func (s IDSet) ContainsAny(ids ...ID) bool { + for _, i := range ids { + _, ok := s[i] + if ok { + return true + } + } + return false +} diff --git a/pkg/file/id_set_test.go b/pkg/file/id_set_test.go new file mode 100644 index 00000000..b0d146db --- /dev/null +++ b/pkg/file/id_set_test.go @@ -0,0 +1,226 @@ +package file + +import ( + "fmt" + "github.com/stretchr/testify/assert" + "testing" +) + +func TestIDSet_Size(t *testing.T) { + type testCase struct { + name string + s IDSet + want int + } + tests := []testCase{ + { + name: "empty set", + s: NewIDSet(), + want: 0, + }, + { + name: "non-empty set", + s: NewIDSet(1, 2, 3), + want: 3, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := tt.s.Size(); got != tt.want { + t.Errorf("Size() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestIDSet_Add(t *testing.T) { + type args struct { + ids []ID + } + type testCase struct { + name string + s IDSet + args args + } + tests := []testCase{ + { + name: "add multiple", + s: NewIDSet(), + args: args{ids: []ID{1, 2, 3}}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.s.Add(tt.args.ids...) + for _, id := range tt.args.ids { + if !tt.s.Contains(id) { + t.Errorf("expected set to contain %q", id) + } + } + }) + } +} + +func TestIDSet_Remove(t *testing.T) { + type args struct { + ids []ID + } + type testCase struct { + name string + s IDSet + args args + expected []ID + } + tests := []testCase{ + { + name: "remove multiple", + s: NewIDSet(1, 2, 3), + args: args{ids: []ID{1, 2}}, + expected: []ID{3}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.s.Remove(tt.args.ids...) + for _, id := range tt.args.ids { + if tt.s.Contains(id) { + t.Errorf("expected set to NOT contain %q", id) + } + } + for _, id := range tt.expected { + if !tt.s.Contains(id) { + t.Errorf("expected set to contain %q", id) + } + } + }) + } +} + +func TestIDSet_Contains(t *testing.T) { + type args struct { + i ID + } + type testCase struct { + name string + s IDSet + args args + want bool + } + tests := []testCase{ + { + name: "contains", + s: NewIDSet(1, 2, 3), + args: args{i: 1}, + want: true, + }, + { + name: "not contains", + s: NewIDSet(1, 2, 3), + args: args{i: 97}, + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := tt.s.Contains(tt.args.i); got != tt.want { + t.Errorf("Contains() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestIDSet_Clear(t *testing.T) { + type testCase struct { + name string + s IDSet + } + tests := []testCase{ + { + name: "go case", + s: NewIDSet(1, 2, 3), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.s.Clear() + assert.Equal(t, 0, tt.s.Size()) + }) + } +} + +func TestIDSet_List(t *testing.T) { + type testCase struct { + name string + s IDSet + want []ID + } + tests := []testCase{ + { + name: "go case", + s: NewIDSet(1, 2, 3), + want: []ID{1, 2, 3}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.ElementsMatchf(t, tt.want, tt.s.List(), "List()") + }) + } +} + +func TestIDSet_Sorted(t *testing.T) { + type testCase struct { + name string + s IDSet + want []ID + } + tests := []testCase{ + { + name: "go case", + s: NewIDSet(1, 2, 3), + want: []ID{1, 2, 3}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equalf(t, tt.want, tt.s.Sorted(), "Sorted()") + }) + } +} + +func TestIDSet_ContainsAny(t *testing.T) { + type args struct { + ids []ID + } + type testCase struct { + name string + s IDSet + args args + want bool + } + tests := []testCase{ + { + name: "contains one", + s: NewIDSet(1, 2, 3), + args: args{ids: []ID{1, 97}}, + want: true, + }, + { + name: "contains all", + s: NewIDSet(1, 2, 3), + args: args{ids: []ID{1, 2}}, + want: true, + }, + { + name: "contains none", + s: NewIDSet(1, 2, 3), + args: args{ids: []ID{97, 98}}, + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, tt.s.ContainsAny(tt.args.ids...), fmt.Sprintf("ContainsAny(%v)", tt.args.ids)) + }) + } +} diff --git a/pkg/file/metadata.go b/pkg/file/metadata.go index 1cbd0d92..990e7a4f 100644 --- a/pkg/file/metadata.go +++ b/pkg/file/metadata.go @@ -7,6 +7,8 @@ import ( "path" "path/filepath" + "github.com/anchore/stereoscope/internal/log" + "github.com/sylabs/squashfs" ) @@ -14,36 +16,29 @@ import ( type Metadata struct { // Path is the absolute path representation to the file Path string - // TarHeaderName is the exact entry name as found within a tar header - TarHeaderName string - // TarSequence is the nth header in the tar file this entry was found - TarSequence int64 - // Linkname is populated only for hardlinks / symlinks, can be an absolute or relative - Linkname string + // LinkDestination is populated only for hardlinks / symlinks, can be an absolute or relative + LinkDestination string // Size of the file in bytes - Size int64 - UserID int - GroupID int - // TypeFlag is the tar.TypeFlag entry for the file - TypeFlag byte + Size int64 + UserID int + GroupID int + Type Type IsDir bool Mode os.FileMode MIMEType string } -func NewMetadata(header tar.Header, sequence int64, content io.Reader) Metadata { +func NewMetadata(header tar.Header, content io.Reader) Metadata { return Metadata{ - Path: path.Clean(DirSeparator + header.Name), - TarHeaderName: header.Name, - TarSequence: sequence, - TypeFlag: header.Typeflag, - Linkname: header.Linkname, - Size: header.FileInfo().Size(), - Mode: header.FileInfo().Mode(), - UserID: header.Uid, - GroupID: header.Gid, - IsDir: header.FileInfo().IsDir(), - MIMEType: MIMEType(content), + Path: path.Clean(DirSeparator + header.Name), + Type: TypeFromTarType(header.Typeflag), + LinkDestination: header.Linkname, + Size: header.FileInfo().Size(), + Mode: header.FileInfo().Mode(), + UserID: header.Uid, + GroupID: header.Gid, + IsDir: header.FileInfo().IsDir(), + MIMEType: MIMEType(content), } } @@ -54,12 +49,37 @@ func NewMetadataFromSquashFSFile(path string, f *squashfs.File) (Metadata, error return Metadata{}, err } + var ty Type + switch { + case fi.IsDir(): + ty = TypeDirectory + case f.IsRegular(): + ty = TypeRegular + case f.IsSymlink(): + ty = TypeSymLink + default: + switch fi.Mode() & os.ModeType { + case os.ModeNamedPipe: + ty = TypeFIFO + case os.ModeSocket: + ty = TypeSocket + case os.ModeDevice: + ty = TypeBlockDevice + case os.ModeCharDevice: + ty = TypeCharacterDevice + case os.ModeIrregular: + ty = TypeIrregular + } + // note: cannot determine hardlink from squashfs.File (but case us not possible) + } + md := Metadata{ - Path: filepath.Clean(filepath.Join("/", path)), - Linkname: f.SymlinkPath(), - Size: fi.Size(), - IsDir: f.IsDir(), - Mode: fi.Mode(), + Path: filepath.Clean(filepath.Join("/", path)), + LinkDestination: f.SymlinkPath(), + Size: fi.Size(), + IsDir: f.IsDir(), + Mode: fi.Mode(), + Type: ty, } if f.IsRegular() { @@ -68,3 +88,38 @@ func NewMetadataFromSquashFSFile(path string, f *squashfs.File) (Metadata, error return md, nil } + +func NewMetadataFromPath(path string, info os.FileInfo) Metadata { + var mimeType string + uid, gid := getXid(info) + + ty := TypeFromMode(info.Mode()) + + if ty == TypeRegular { + f, err := os.Open(path) + if err != nil { + // TODO: it may be that the file is inaccessible, however, this is not an error or a warning. In the future we need to track these as known-unknowns + f = nil + } else { + defer func() { + if err := f.Close(); err != nil { + log.Warnf("unable to close file while obtaining metadata: %s", path) + } + }() + } + + mimeType = MIMEType(f) + } + + return Metadata{ + Path: path, + Mode: info.Mode(), + Type: ty, + // unsupported across platforms + UserID: uid, + GroupID: gid, + Size: info.Size(), + MIMEType: mimeType, + IsDir: info.IsDir(), + } +} diff --git a/pkg/file/metadata_test.go b/pkg/file/metadata_test.go index b89ed431..aa60bc95 100644 --- a/pkg/file/metadata_test.go +++ b/pkg/file/metadata_test.go @@ -4,6 +4,8 @@ package file import ( + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "io" "os" "strings" @@ -16,13 +18,13 @@ func TestFileMetadataFromTar(t *testing.T) { tarReader := getTarFixture(t, "fixture-1") expected := []Metadata{ - {Path: "/path", TarSequence: 0, TarHeaderName: "path/", TypeFlag: 53, Linkname: "", Size: 0, Mode: os.ModeDir | 0o755, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""}, - {Path: "/path/branch", TarSequence: 1, TarHeaderName: "path/branch/", TypeFlag: 53, Linkname: "", Size: 0, Mode: os.ModeDir | 0o755, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""}, - {Path: "/path/branch/one", TarSequence: 2, TarHeaderName: "path/branch/one/", TypeFlag: 53, Linkname: "", Size: 0, Mode: os.ModeDir | 0o700, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""}, - {Path: "/path/branch/one/file-1.txt", TarSequence: 3, TarHeaderName: "path/branch/one/file-1.txt", TypeFlag: 48, Linkname: "", Size: 11, Mode: 0o700, UserID: 1337, GroupID: 5432, IsDir: false, MIMEType: "text/plain"}, - {Path: "/path/branch/two", TarSequence: 4, TarHeaderName: "path/branch/two/", TypeFlag: 53, Linkname: "", Size: 0, Mode: os.ModeDir | 0o755, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""}, - {Path: "/path/branch/two/file-2.txt", TarSequence: 5, TarHeaderName: "path/branch/two/file-2.txt", TypeFlag: 48, Linkname: "", Size: 12, Mode: 0o755, UserID: 1337, GroupID: 5432, IsDir: false, MIMEType: "text/plain"}, - {Path: "/path/file-3.txt", TarSequence: 6, TarHeaderName: "path/file-3.txt", TypeFlag: 48, Linkname: "", Size: 11, Mode: 0o664, UserID: 1337, GroupID: 5432, IsDir: false, MIMEType: "text/plain"}, + {Path: "/path", Type: TypeDirectory, LinkDestination: "", Size: 0, Mode: os.ModeDir | 0o755, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""}, + {Path: "/path/branch", Type: TypeDirectory, LinkDestination: "", Size: 0, Mode: os.ModeDir | 0o755, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""}, + {Path: "/path/branch/one", Type: TypeDirectory, LinkDestination: "", Size: 0, Mode: os.ModeDir | 0o700, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""}, + {Path: "/path/branch/one/file-1.txt", Type: TypeRegular, LinkDestination: "", Size: 11, Mode: 0o700, UserID: 1337, GroupID: 5432, IsDir: false, MIMEType: "text/plain"}, + {Path: "/path/branch/two", Type: TypeDirectory, LinkDestination: "", Size: 0, Mode: os.ModeDir | 0o755, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""}, + {Path: "/path/branch/two/file-2.txt", Type: TypeRegular, LinkDestination: "", Size: 12, Mode: 0o755, UserID: 1337, GroupID: 5432, IsDir: false, MIMEType: "text/plain"}, + {Path: "/path/file-3.txt", Type: TypeRegular, LinkDestination: "", Size: 11, Mode: 0o664, UserID: 1337, GroupID: 5432, IsDir: false, MIMEType: "text/plain"}, } var actual []Metadata @@ -31,7 +33,7 @@ func TestFileMetadataFromTar(t *testing.T) { if strings.HasSuffix(entry.Header.Name, ".txt") { contents = strings.NewReader("#!/usr/bin/env bash\necho 'awesome script'") } - actual = append(actual, NewMetadata(entry.Header, entry.Sequence, contents)) + actual = append(actual, NewMetadata(entry.Header, contents)) return nil } @@ -43,3 +45,43 @@ func TestFileMetadataFromTar(t *testing.T) { t.Errorf("diff: %s", d) } } + +func TestFileMetadataFromPath(t *testing.T) { + + tests := []struct { + path string + expectedType Type + expectedMIMEType string + }{ + { + path: "test-fixtures/symlinks-simple/readme", + expectedType: TypeRegular, + expectedMIMEType: "text/plain", + }, + { + path: "test-fixtures/symlinks-simple/link_to_new_readme", + expectedType: TypeSymLink, + expectedMIMEType: "", + }, + { + path: "test-fixtures/symlinks-simple/link_to_link_to_new_readme", + expectedType: TypeSymLink, + expectedMIMEType: "", + }, + { + path: "test-fixtures/symlinks-simple", + expectedType: TypeDirectory, + expectedMIMEType: "", + }, + } + for _, test := range tests { + t.Run(test.path, func(t *testing.T) { + info, err := os.Lstat(test.path) + require.NoError(t, err) + + actual := NewMetadataFromPath(test.path, info) + assert.Equal(t, test.expectedMIMEType, actual.MIMEType, "unexpected MIME type for %s", test.path) + assert.Equal(t, test.expectedType, actual.Type, "unexpected type for %s", test.path) + }) + } +} diff --git a/pkg/file/path_set.go b/pkg/file/path_set.go index fe7280fc..a46f342a 100644 --- a/pkg/file/path_set.go +++ b/pkg/file/path_set.go @@ -1,20 +1,77 @@ +//nolint:dupl package file +import ( + "sort" +) + type PathSet map[Path]struct{} -func NewPathSet() PathSet { - return make(PathSet) +func NewPathSet(is ...Path) PathSet { + // TODO: replace with single generic implementation that also incorporates other set implementations + s := make(PathSet) + s.Add(is...) + return s +} + +func (s PathSet) Size() int { + return len(s) +} + +func (s PathSet) Merge(other PathSet) { + for _, i := range other.List() { + s.Add(i) + } } -func (s PathSet) Add(i Path) { - s[i] = struct{}{} +func (s PathSet) Add(ids ...Path) { + for _, i := range ids { + s[i] = struct{}{} + } } -func (s PathSet) Remove(i Path) { - delete(s, i) +func (s PathSet) Remove(ids ...Path) { + for _, i := range ids { + delete(s, i) + } } func (s PathSet) Contains(i Path) bool { _, ok := s[i] return ok } + +func (s PathSet) Clear() { + // TODO: replace this with the new 'clear' keyword when it's available in go 1.20 or 1.21 + for i := range s { + delete(s, i) + } +} + +func (s PathSet) List() []Path { + ret := make([]Path, 0, len(s)) + for i := range s { + ret = append(ret, i) + } + return ret +} + +func (s PathSet) Sorted() []Path { + ids := s.List() + + sort.Slice(ids, func(i, j int) bool { + return ids[i] < ids[j] + }) + + return ids +} + +func (s PathSet) ContainsAny(ids ...Path) bool { + for _, i := range ids { + _, ok := s[i] + if ok { + return true + } + } + return false +} diff --git a/pkg/file/path_set_test.go b/pkg/file/path_set_test.go new file mode 100644 index 00000000..5d296649 --- /dev/null +++ b/pkg/file/path_set_test.go @@ -0,0 +1,226 @@ +package file + +import ( + "fmt" + "github.com/stretchr/testify/assert" + "testing" +) + +func TestPathSet_Size(t *testing.T) { + type testCase struct { + name string + s PathSet + want int + } + tests := []testCase{ + { + name: "empty set", + s: NewPathSet(), + want: 0, + }, + { + name: "non-empty set", + s: NewPathSet("items", "in", "set"), + want: 3, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := tt.s.Size(); got != tt.want { + t.Errorf("Size() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestPathSet_Add(t *testing.T) { + type args struct { + ids []Path + } + type testCase struct { + name string + s PathSet + args args + } + tests := []testCase{ + { + name: "add multiple", + s: NewPathSet(), + args: args{ids: []Path{"a", "b", "c"}}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.s.Add(tt.args.ids...) + for _, id := range tt.args.ids { + if !tt.s.Contains(id) { + t.Errorf("expected set to contain %q", id) + } + } + }) + } +} + +func TestPathSet_Remove(t *testing.T) { + type args struct { + ids []Path + } + type testCase struct { + name string + s PathSet + args args + expected []Path + } + tests := []testCase{ + { + name: "remove multiple", + s: NewPathSet("a", "b", "c"), + args: args{ids: []Path{"a", "b"}}, + expected: []Path{"c"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.s.Remove(tt.args.ids...) + for _, id := range tt.args.ids { + if tt.s.Contains(id) { + t.Errorf("expected set to NOT contain %q", id) + } + } + for _, id := range tt.expected { + if !tt.s.Contains(id) { + t.Errorf("expected set to contain %q", id) + } + } + }) + } +} + +func TestPathSet_Contains(t *testing.T) { + type args struct { + i Path + } + type testCase struct { + name string + s PathSet + args args + want bool + } + tests := []testCase{ + { + name: "contains", + s: NewPathSet("a", "b", "c"), + args: args{i: "a"}, + want: true, + }, + { + name: "not contains", + s: NewPathSet("a", "b", "c"), + args: args{i: "x"}, + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := tt.s.Contains(tt.args.i); got != tt.want { + t.Errorf("Contains() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestPathSet_Clear(t *testing.T) { + type testCase struct { + name string + s PathSet + } + tests := []testCase{ + { + name: "go case", + s: NewPathSet("a", "b", "c"), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.s.Clear() + assert.Equal(t, 0, tt.s.Size()) + }) + } +} + +func TestPathSet_List(t *testing.T) { + type testCase struct { + name string + s PathSet + want []Path + } + tests := []testCase{ + { + name: "go case", + s: NewPathSet("a", "b", "c"), + want: []Path{"a", "b", "c"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.ElementsMatchf(t, tt.want, tt.s.List(), "List()") + }) + } +} + +func TestPathSet_Sorted(t *testing.T) { + type testCase struct { + name string + s PathSet + want []Path + } + tests := []testCase{ + { + name: "go case", + s: NewPathSet("a", "b", "c"), + want: []Path{"a", "b", "c"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equalf(t, tt.want, tt.s.Sorted(), "Sorted()") + }) + } +} + +func TestPathSet_ContainsAny(t *testing.T) { + type args struct { + ids []Path + } + type testCase struct { + name string + s PathSet + args args + want bool + } + tests := []testCase{ + { + name: "contains one", + s: NewPathSet("a", "b", "c"), + args: args{ids: []Path{"a", "x"}}, + want: true, + }, + { + name: "contains all", + s: NewPathSet("a", "b", "c"), + args: args{ids: []Path{"a", "b"}}, + want: true, + }, + { + name: "contains none", + s: NewPathSet("a", "b", "c"), + args: args{ids: []Path{"x", "y"}}, + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, tt.s.ContainsAny(tt.args.ids...), fmt.Sprintf("ContainsAny(%v)", tt.args.ids)) + }) + } +} diff --git a/pkg/file/reference.go b/pkg/file/reference.go index 47ed8ae9..1109227d 100644 --- a/pkg/file/reference.go +++ b/pkg/file/reference.go @@ -1,13 +1,6 @@ package file -import ( - "fmt" -) - -var nextID = 0 - -// ID is used for file tree manipulation to uniquely identify tree nodes. -type ID uint64 +import "fmt" // Reference represents a unique file. This is useful when path is not good enough (i.e. you have the same file path for two files in two different container image layers, and you need to be able to distinguish them apart) type Reference struct { diff --git a/pkg/file/resolution.go b/pkg/file/resolution.go new file mode 100644 index 00000000..5f10c719 --- /dev/null +++ b/pkg/file/resolution.go @@ -0,0 +1,158 @@ +package file + +import ( + "sort" + + "github.com/scylladb/go-set/strset" +) + +// Resolution represents the fetching of a possibly non-existent file via a request path. +type Resolution struct { + RequestPath Path + *Reference + // LinkResolutions represents the traversal through the filesystem to access to current reference, including all symlink and hardlink resolution. + // note: today this only shows resolutions via the basename of the request path, but in the future it may show all resolutions. + LinkResolutions []Resolution +} + +type Resolutions []Resolution + +// NewResolution create a new Resolution for the given request path, showing the resolved reference (or +// nil if it does not exist), and the link resolution of the basename of the request path transitively. +func NewResolution(path Path, ref *Reference, leafs []Resolution) *Resolution { + return &Resolution{ + RequestPath: path, + Reference: ref, + LinkResolutions: leafs, + } +} + +func (f Resolutions) Len() int { + return len(f) +} + +func (f Resolutions) Less(i, j int) bool { + ith := f[i] + jth := f[j] + + ithIsReal := ith.Reference != nil && ith.Reference.RealPath == ith.RequestPath + jthIsReal := jth.Reference != nil && jth.Reference.RealPath == jth.RequestPath + + switch { + case ithIsReal && !jthIsReal: + return true + case !ithIsReal && jthIsReal: + return false + } + + return ith.RequestPath < jth.RequestPath +} + +func (f Resolutions) Swap(i, j int) { + f[i], f[j] = f[j], f[i] +} + +func (f *Resolution) HasReference() bool { + if f == nil { + return false + } + return f.Reference != nil +} + +func (f *Resolution) AllPaths() []Path { + set := strset.New() + set.Add(string(f.RequestPath)) + if f.Reference != nil { + set.Add(string(f.Reference.RealPath)) + } + for _, p := range f.LinkResolutions { + set.Add(string(p.RequestPath)) + if p.Reference != nil { + set.Add(string(p.Reference.RealPath)) + } + } + + paths := set.List() + sort.Strings(paths) + + var results []Path + for _, p := range paths { + results = append(results, Path(p)) + } + return results +} + +func (f *Resolution) AllRequestPaths() []Path { + set := strset.New() + set.Add(string(f.RequestPath)) + for _, p := range f.LinkResolutions { + set.Add(string(p.RequestPath)) + } + + paths := set.List() + sort.Strings(paths) + + var results []Path + for _, p := range paths { + results = append(results, Path(p)) + } + return results +} + +// RequestResolutionPath represents the traversal through the filesystem to access to current reference, including all symlink and hardlink resolution. +func (f *Resolution) RequestResolutionPath() []Path { + var paths []Path + var firstPath Path + var lastLinkResolutionIsDead bool + + if string(f.RequestPath) != "" { + firstPath = f.RequestPath + paths = append(paths, f.RequestPath) + } + for i, p := range f.LinkResolutions { + if i == 0 && p.RequestPath == f.RequestPath { + // ignore link resolution that starts with the same user requested path + continue + } + if firstPath == "" { + firstPath = p.RequestPath + } + + paths = append(paths, p.RequestPath) + + if i == len(f.LinkResolutions)-1 { + // we've reached the final link resolution + if p.Reference == nil { + lastLinkResolutionIsDead = true + } + } + } + if f.HasReference() && firstPath != f.Reference.RealPath && !lastLinkResolutionIsDead { + // we've reached the final reference that was resolved + // we should only do this if there was a link resolution + paths = append(paths, f.Reference.RealPath) + } + return paths +} + +// References represents the traversal through the filesystem to access to current reference, including all symlink and hardlink resolution. +func (f *Resolution) References() []Reference { + var refs []Reference + var lastLinkResolutionIsDead bool + + for i, p := range f.LinkResolutions { + if p.Reference != nil { + refs = append(refs, *p.Reference) + } + if i == len(f.LinkResolutions)-1 { + // we've reached the final link resolution + if p.Reference == nil { + lastLinkResolutionIsDead = true + } + } + } + if f.Reference != nil && !lastLinkResolutionIsDead { + refs = append(refs, *f.Reference) + } + return refs +} diff --git a/pkg/file/resolution_test.go b/pkg/file/resolution_test.go new file mode 100644 index 00000000..3a444950 --- /dev/null +++ b/pkg/file/resolution_test.go @@ -0,0 +1,391 @@ +package file + +import ( + "github.com/stretchr/testify/assert" + "sort" + "testing" +) + +func TestResolution_Less(t *testing.T) { + + realA := Resolution{ + RequestPath: "/parent/a", + Reference: &Reference{ + RealPath: "/parent/a", + }, + } + + realB := Resolution{ + RequestPath: "/parent/b", + Reference: &Reference{ + RealPath: "/parent/b", + }, + } + + linkToA := Resolution{ + RequestPath: "/parent-link/a", + Reference: &Reference{ + RealPath: "/a", + }, + } + + linkToB := Resolution{ + RequestPath: "/parent-link/b", + Reference: &Reference{ + RealPath: "/b", + }, + } + + tests := []struct { + name string + subject []Resolution + want []Resolution + }{ + { + name: "references to real files are preferred first", + subject: []Resolution{ + linkToA, + realA, + }, + want: []Resolution{ + realA, + linkToA, + }, + }, + { + name: "real files are treated equally by request name", + subject: []Resolution{ + realB, + realA, + }, + want: []Resolution{ + realA, + realB, + }, + }, + { + name: "link files are treated equally by request name", + subject: []Resolution{ + linkToB, + linkToA, + }, + want: []Resolution{ + linkToA, + linkToB, + }, + }, + { + name: "regression", + subject: []Resolution{ + { + + RequestPath: "/parent-link/file-4.txt", + Reference: &Reference{ + RealPath: "/parent/file-4.txt", + }, + }, + { + + RequestPath: "/parent/file-4.txt", + Reference: &Reference{ + RealPath: "/parent/file-4.txt", + }, + }, + }, + want: []Resolution{ + { + RequestPath: "/parent/file-4.txt", + Reference: &Reference{ + RealPath: "/parent/file-4.txt", + }, + }, + { + + RequestPath: "/parent-link/file-4.txt", + Reference: &Reference{ + RealPath: "/parent/file-4.txt", + }, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + sort.Sort(Resolutions(tt.subject)) + assert.Equal(t, tt.want, tt.subject) + }) + } +} + +func TestResolution_RequestResolutionPath(t *testing.T) { + tests := []struct { + name string + subject Resolution + want []Path + }{ + { + name: "empty", + subject: Resolution{ + LinkResolutions: nil, + }, + want: nil, + }, + { + name: "single ref", + subject: Resolution{ + RequestPath: "/home/wagoodman/file.txt", + Reference: &Reference{ + id: 1, + RealPath: "/home/wagoodman/file.txt", + }, + LinkResolutions: nil, + }, + want: []Path{ + "/home/wagoodman/file.txt", + }, + }, + { + // /home -> /another/place + name: "ref with 1 leaf link resolutions", + subject: Resolution{ + RequestPath: "/home", + Reference: &Reference{RealPath: "/another/place"}, + LinkResolutions: []Resolution{ + { + RequestPath: "/home", + Reference: &Reference{RealPath: "/home"}, + }, + }, + }, + want: []Path{ + "/home", + "/another/place", + }, + }, + { + // /home/wagoodman/file.txt -> /place/wagoodman/file.txt -> /1/file.txt -> /2/real-file.txt + + // this is the current state of the filetree + // . + // ├── 1 + // │ ├── file.txt -> 2/real-file.txt + // │ └── link-to-place -> place + // ├── 2 + // │ └── real-file.txt + // ├── home -> link-to-1/link-to-place + // ├── link-to-1 -> 1 + // └── place + // └── wagoodman + // └── file.txt -> link-to-1/file.txt + + name: "ref with 2 leaf link resolutions", + subject: Resolution{ + RequestPath: "/home/wagoodman/file.txt", + Reference: &Reference{RealPath: "/2/real-file.txt"}, + LinkResolutions: []Resolution{ + { + RequestPath: "/place/wagoodman/file.txt", + Reference: &Reference{RealPath: "/place/wagoodman/file.txt"}, + }, + { + RequestPath: "/1/file.txt", + Reference: &Reference{RealPath: "/1/file.txt"}, + }, + }, + }, + want: []Path{ + "/home/wagoodman/file.txt", // request + "/place/wagoodman/file.txt", // real intermediate path + "/1/file.txt", // real intermediate path + "/2/real-file.txt", // final resolved path on the reference + }, + }, + { + // /home/wagoodman/file.txt -> /place/wagoodman/file.txt -> /1/file.txt -> /2/real-file.txt + + // this is the current state of the filetree + // . + // ├── 1 + // │ ├── file.txt -> 2/real-file.txt + // │ └── link-to-place -> place + // ├── home -> link-to-1/link-to-place + // ├── link-to-1 -> 1 + // └── place + // └── wagoodman + // └── file.txt -> link-to-1/file.txt + + name: "ref with dead link", + subject: Resolution{ + RequestPath: "/home/wagoodman/file.txt", + // note: this falls back to the last path that exists which is the behavior for link resolution options: + // []LinkResolutionOption{FollowBasenameLinks, DoNotFollowDeadBasenameLinks} + Reference: &Reference{RealPath: "/1/file.txt"}, + LinkResolutions: []Resolution{ + { + RequestPath: "/place/wagoodman/file.txt", + Reference: &Reference{RealPath: "/place/wagoodman/file.txt"}, + }, + { + RequestPath: "/1/file.txt", + Reference: &Reference{RealPath: "/1/file.txt"}, + }, + { + RequestPath: "/2/real-file.txt", + // nope! it's dead! + //Reference: &file.Reference{RealPath: "/2/real-file.txt"}, + }, + }, + }, + want: []Path{ + "/home/wagoodman/file.txt", // request + "/place/wagoodman/file.txt", // real intermediate path + "/1/file.txt", // real intermediate path + "/2/real-file.txt", // final resolved path on the reference (that does not exist) + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equalf(t, tt.want, tt.subject.RequestResolutionPath(), "RequestResolutionPath()") + }) + } +} + +func TestResolution_References(t *testing.T) { + type fields struct { + ReferenceResolution Resolution + LeafLinkResolution []Resolution + } + tests := []struct { + name string + subject Resolution + want []Reference + }{ + { + name: "empty", + subject: Resolution{ + LinkResolutions: nil, + }, + want: nil, + }, + { + name: "single ref", + subject: Resolution{ + RequestPath: "/home/wagoodman/file.txt", + Reference: &Reference{ + id: 1, + RealPath: "/home/wagoodman/file.txt", + }, + LinkResolutions: nil, + }, + want: []Reference{ + { + id: 1, + RealPath: "/home/wagoodman/file.txt", + }, + }, + }, + { + // /home -> /another/place + name: "ref with 1 leaf link resolutions", + subject: Resolution{ + RequestPath: "/home", + Reference: &Reference{RealPath: "/another/place"}, + LinkResolutions: []Resolution{ + { + RequestPath: "/home", + Reference: &Reference{RealPath: "/home"}, + }, + }, + }, + want: []Reference{ + {RealPath: "/home"}, + {RealPath: "/another/place"}, + }, + }, + { + // /home/wagoodman/file.txt -> /place/wagoodman/file.txt -> /1/file.txt -> /2/real-file.txt + + // this is the current state of the filetree + // . + // ├── 1 + // │ ├── file.txt -> 2/real-file.txt + // │ └── link-to-place -> place + // ├── 2 + // │ └── real-file.txt + // ├── home -> link-to-1/link-to-place + // ├── link-to-1 -> 1 + // └── place + // └── wagoodman + // └── file.txt -> link-to-1/file.txt + + name: "ref with 2 leaf link resolutions", + subject: Resolution{ + RequestPath: "/home/wagoodman/file.txt", + Reference: &Reference{RealPath: "/2/real-file.txt"}, + LinkResolutions: []Resolution{ + { + RequestPath: "/place/wagoodman/file.txt", + Reference: &Reference{RealPath: "/place/wagoodman/file.txt"}, + }, + { + RequestPath: "/1/file.txt", + Reference: &Reference{RealPath: "/1/file.txt"}, + }, + }, + }, + want: []Reference{ + {RealPath: "/place/wagoodman/file.txt"}, + {RealPath: "/1/file.txt"}, + {RealPath: "/2/real-file.txt"}, + }, + }, + { + // /home/wagoodman/file.txt -> /place/wagoodman/file.txt -> /1/file.txt -> /2/real-file.txt + + // this is the current state of the filetree + // . + // ├── 1 + // │ ├── file.txt -> 2/real-file.txt + // │ └── link-to-place -> place + // ├── home -> link-to-1/link-to-place + // ├── link-to-1 -> 1 + // └── place + // └── wagoodman + // └── file.txt -> link-to-1/file.txt + + name: "ref with dead link", + subject: Resolution{ + RequestPath: "/home/wagoodman/file.txt", + // note: this falls back to the last path that exists which is the behavior for link resolution options: + // []LinkResolutionOption{FollowBasenameLinks, DoNotFollowDeadBasenameLinks} + Reference: &Reference{RealPath: "/1/file.txt"}, + LinkResolutions: []Resolution{ + { + RequestPath: "/place/wagoodman/file.txt", + Reference: &Reference{RealPath: "/place/wagoodman/file.txt"}, + }, + { + RequestPath: "/1/file.txt", + Reference: &Reference{RealPath: "/1/file.txt"}, + }, + { + RequestPath: "/2/real-file.txt", + // nope! it's dead! + //Reference: &file.Reference{RealPath: "/2/real-file.txt"}, + }, + }, + }, + want: []Reference{ + {RealPath: "/place/wagoodman/file.txt"}, + {RealPath: "/1/file.txt"}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equalf(t, tt.want, tt.subject.References(), "References()") + + }) + } +} diff --git a/pkg/file/tarutil.go b/pkg/file/tarutil.go index b08416b2..4a01c09a 100644 --- a/pkg/file/tarutil.go +++ b/pkg/file/tarutil.go @@ -108,7 +108,7 @@ func MetadataFromTar(reader io.ReadCloser, tarPath string) (Metadata, error) { if entry.Header.Size > 0 { content = reader } - m := NewMetadata(entry.Header, entry.Sequence, content) + m := NewMetadata(entry.Header, content) metadata = &m return ErrTarStopIteration } diff --git a/pkg/file/tarutil_test.go b/pkg/file/tarutil_test.go index cd67f23e..2103f242 100644 --- a/pkg/file/tarutil_test.go +++ b/pkg/file/tarutil_test.go @@ -62,40 +62,36 @@ func TestMetadataFromTar(t *testing.T) { name: "path/branch/two/file-2.txt", fixture: "fixture-1", expected: Metadata{ - Path: "/path/branch/two/file-2.txt", - TarHeaderName: "path/branch/two/file-2.txt", - TarSequence: 5, - Linkname: "", - Size: 12, - UserID: 1337, - GroupID: 5432, - TypeFlag: 0x30, - IsDir: false, - Mode: 0x1ed, - MIMEType: "application/octet-stream", + Path: "/path/branch/two/file-2.txt", + LinkDestination: "", + Size: 12, + UserID: 1337, + GroupID: 5432, + Type: TypeRegular, + IsDir: false, + Mode: 0x1ed, + MIMEType: "application/octet-stream", }, }, { name: "path/branch/two/", fixture: "fixture-1", expected: Metadata{ - Path: "/path/branch/two", - TarHeaderName: "path/branch/two/", - TarSequence: 4, - Linkname: "", - Size: 0, - UserID: 1337, - GroupID: 5432, - TypeFlag: 0x35, - IsDir: true, - Mode: 0x800001ed, - MIMEType: "", + Path: "/path/branch/two", + LinkDestination: "", + Size: 0, + UserID: 1337, + GroupID: 5432, + Type: TypeDirectory, + IsDir: true, + Mode: 0x800001ed, + MIMEType: "", }, }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - f := getTarFixture(t, "fixture-1") + f := getTarFixture(t, test.fixture) metadata, err := MetadataFromTar(f, test.name) assert.NoError(t, err) assert.Equal(t, test.expected, metadata) diff --git a/pkg/file/test-fixtures/symlinks-simple/link_to_link_to_new_readme b/pkg/file/test-fixtures/symlinks-simple/link_to_link_to_new_readme new file mode 120000 index 00000000..e348d807 --- /dev/null +++ b/pkg/file/test-fixtures/symlinks-simple/link_to_link_to_new_readme @@ -0,0 +1 @@ +link_to_new_readme \ No newline at end of file diff --git a/pkg/file/test-fixtures/symlinks-simple/link_to_new_readme b/pkg/file/test-fixtures/symlinks-simple/link_to_new_readme new file mode 120000 index 00000000..ea786ff2 --- /dev/null +++ b/pkg/file/test-fixtures/symlinks-simple/link_to_new_readme @@ -0,0 +1 @@ +readme \ No newline at end of file diff --git a/pkg/file/test-fixtures/symlinks-simple/readme b/pkg/file/test-fixtures/symlinks-simple/readme new file mode 100644 index 00000000..df85b76a --- /dev/null +++ b/pkg/file/test-fixtures/symlinks-simple/readme @@ -0,0 +1,2 @@ +this directory exists for unit tests on irregular files. You can't see other files here because they are removed after each test. +This readme is a better version of Russell's teapot. diff --git a/pkg/file/type.go b/pkg/file/type.go index c2f9db47..67562ae9 100644 --- a/pkg/file/type.go +++ b/pkg/file/type.go @@ -1,25 +1,109 @@ package file -import "archive/tar" +import ( + "archive/tar" + "os" +) const ( - TypeReg Type = tar.TypeReg - TypeDir Type = tar.TypeDir - TypeSymlink Type = tar.TypeSymlink - TypeHardLink Type = tar.TypeLink - TypeCharacterDevice Type = tar.TypeChar - TypeBlockDevice Type = tar.TypeBlock - TypeFifo Type = tar.TypeFifo + TypeRegular Type = iota + TypeHardLink + TypeSymLink + TypeCharacterDevice + TypeBlockDevice + TypeDirectory + TypeFIFO + TypeSocket + TypeIrregular ) -var AllTypes = []Type{ - TypeReg, - TypeDir, - TypeSymlink, - TypeHardLink, - TypeCharacterDevice, - TypeBlockDevice, - TypeFifo, +// why use a rune type? we're looking for something that is memory compact but is easily human interpretable. + +type Type int + +func AllTypes() []Type { + return []Type{ + TypeRegular, + TypeHardLink, + TypeSymLink, + TypeCharacterDevice, + TypeBlockDevice, + TypeDirectory, + TypeFIFO, + TypeSocket, + TypeIrregular, + } +} + +func TypeFromTarType(ty byte) Type { + switch ty { + case tar.TypeReg, tar.TypeRegA: // nolint: staticcheck + return TypeRegular + case tar.TypeLink: + return TypeHardLink + case tar.TypeSymlink: + return TypeSymLink + case tar.TypeChar: + return TypeCharacterDevice + case tar.TypeBlock: + return TypeBlockDevice + case tar.TypeDir: + return TypeDirectory + case tar.TypeFifo: + return TypeFIFO + default: + return TypeIrregular + } } -type Type rune +func TypeFromMode(mode os.FileMode) Type { + switch { + case isSet(mode, os.ModeSymlink): + return TypeSymLink + case isSet(mode, os.ModeIrregular): + return TypeIrregular + case isSet(mode, os.ModeCharDevice): + return TypeCharacterDevice + case isSet(mode, os.ModeDevice): + return TypeBlockDevice + case isSet(mode, os.ModeNamedPipe): + return TypeFIFO + case isSet(mode, os.ModeSocket): + return TypeSocket + case mode.IsDir(): + return TypeDirectory + case mode.IsRegular(): + return TypeRegular + default: + return TypeIrregular + } +} + +func isSet(mode, field os.FileMode) bool { + return mode&field != 0 +} + +func (t Type) String() string { + switch t { + case TypeRegular: + return "RegularFile" + case TypeHardLink: + return "HardLink" + case TypeSymLink: + return "SymbolicLink" + case TypeCharacterDevice: + return "CharacterDevice" + case TypeBlockDevice: + return "BlockDevice" + case TypeDirectory: + return "Directory" + case TypeFIFO: + return "FIFONode" + case TypeSocket: + return "Socket" + case TypeIrregular: + return "IrregularFile" + default: + return "Unknown" + } +} diff --git a/pkg/filetree/builder.go b/pkg/filetree/builder.go new file mode 100644 index 00000000..1f017f8e --- /dev/null +++ b/pkg/filetree/builder.go @@ -0,0 +1,56 @@ +package filetree + +import ( + "fmt" + + "github.com/anchore/stereoscope/pkg/file" +) + +// Builder is a helper for building a filetree and accompanying index in a coordinated fashion. +type Builder struct { + tree Writer + index IndexWriter +} + +func NewBuilder(tree Writer, index IndexWriter) *Builder { + return &Builder{ + tree: tree, + index: index, + } +} + +func (b *Builder) Add(metadata file.Metadata) (*file.Reference, error) { + var ( + ref *file.Reference + err error + ) + switch metadata.Type { + case file.TypeSymLink: + ref, err = b.tree.AddSymLink(file.Path(metadata.Path), file.Path(metadata.LinkDestination)) + if err != nil { + return nil, err + } + case file.TypeHardLink: + ref, err = b.tree.AddHardLink(file.Path(metadata.Path), file.Path(metadata.LinkDestination)) + if err != nil { + return nil, err + } + case file.TypeDirectory: + ref, err = b.tree.AddDir(file.Path(metadata.Path)) + if err != nil { + return nil, err + } + default: + ref, err = b.tree.AddFile(file.Path(metadata.Path)) + if err != nil { + return nil, err + } + } + if ref == nil { + return nil, fmt.Errorf("could not add path=%q link=%q during tar iteration", metadata.Path, metadata.LinkDestination) + } + + b.index.Add(*ref, metadata) + + return ref, nil +} diff --git a/pkg/filetree/depth_first_path_walker.go b/pkg/filetree/depth_first_path_walker.go index f246d8a9..71d3d7f4 100644 --- a/pkg/filetree/depth_first_path_walker.go +++ b/pkg/filetree/depth_first_path_walker.go @@ -58,12 +58,15 @@ func NewDepthFirstPathWalker(tree *FileTree, visitor FileNodeVisitor, conditions func (w *DepthFirstPathWalker) Walk(from file.Path) (file.Path, *filenode.FileNode, error) { w.pathStack.Push(from) - var currentPath file.Path - var currentNode *filenode.FileNode - var err error + var ( + currentPath file.Path + currentNode *nodeAccess + err error + ) for w.pathStack.Size() > 0 { currentPath = w.pathStack.Pop() + // TODO: should we make these link resolutions configurable so you can observe the links on walk as well? (take link resolution options as a parameter) currentNode, err = w.tree.node(currentPath, linkResolutionStrategy{ FollowAncestorLinks: true, FollowBasenameLinks: true, @@ -72,32 +75,32 @@ func (w *DepthFirstPathWalker) Walk(from file.Path) (file.Path, *filenode.FileNo if err != nil { return "", nil, err } - if currentNode == nil { + if !currentNode.HasFileNode() { return "", nil, fmt.Errorf("nil Node at path=%q", currentPath) } // prevent infinite loop if strings.Count(string(currentPath.Normalize()), file.DirSeparator) >= maxDirDepth { - return currentPath, currentNode, ErrMaxTraversalDepth + return currentPath, currentNode.FileNode, ErrMaxTraversalDepth } - if w.conditions.ShouldTerminate != nil && w.conditions.ShouldTerminate(currentPath, *currentNode) { - return currentPath, currentNode, nil + if w.conditions.ShouldTerminate != nil && w.conditions.ShouldTerminate(currentPath, *currentNode.FileNode) { + return currentPath, currentNode.FileNode, nil } currentPath = currentPath.Normalize() // visit if w.visitor != nil && !w.visitedPaths.Contains(currentPath) { - if w.conditions.ShouldVisit == nil || w.conditions.ShouldVisit != nil && w.conditions.ShouldVisit(currentPath, *currentNode) { - err := w.visitor(currentPath, *currentNode) + if w.conditions.ShouldVisit == nil || w.conditions.ShouldVisit != nil && w.conditions.ShouldVisit(currentPath, *currentNode.FileNode) { + err := w.visitor(currentPath, *currentNode.FileNode) if err != nil { - return currentPath, currentNode, err + return currentPath, currentNode.FileNode, err } w.visitedPaths.Add(currentPath) } } - if w.conditions.ShouldContinueBranch != nil && !w.conditions.ShouldContinueBranch(currentPath, *currentNode) { + if w.conditions.ShouldContinueBranch != nil && !w.conditions.ShouldContinueBranch(currentPath, *currentNode.FileNode) { continue } @@ -112,7 +115,7 @@ func (w *DepthFirstPathWalker) Walk(from file.Path) (file.Path, *filenode.FileNo } } - return currentPath, currentNode, nil + return currentPath, currentNode.FileNode, nil } func (w *DepthFirstPathWalker) WalkAll() error { diff --git a/pkg/filetree/depth_first_path_walker_test.go b/pkg/filetree/depth_first_path_walker_test.go index ee65a695..74678709 100644 --- a/pkg/filetree/depth_first_path_walker_test.go +++ b/pkg/filetree/depth_first_path_walker_test.go @@ -10,7 +10,7 @@ import ( ) func dfsTestTree(t *testing.T) (*FileTree, map[string]*file.Reference) { - tr := NewFileTree() + tr := New() possiblePaths := make(map[string]*file.Reference) @@ -233,7 +233,7 @@ func TestDFS_WalkAll_ConditionalBranchPruning(t *testing.T) { } func TestDFS_WalkAll_MaxDirDepthTerminatesTraversal(t *testing.T) { - tr := NewFileTree() + tr := New() possiblePaths := make(map[string]*file.Reference) diff --git a/pkg/filetree/filenode/filenode.go b/pkg/filetree/filenode/filenode.go index aa9b0fdf..14f0fe7a 100644 --- a/pkg/filetree/filenode/filenode.go +++ b/pkg/filetree/filenode/filenode.go @@ -2,6 +2,7 @@ package filenode import ( "path" + "path/filepath" "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/stereoscope/pkg/tree/node" @@ -17,7 +18,7 @@ type FileNode struct { func NewDir(p file.Path, ref *file.Reference) *FileNode { return &FileNode{ RealPath: p, - FileType: file.TypeDir, + FileType: file.TypeDirectory, Reference: ref, } } @@ -25,7 +26,7 @@ func NewDir(p file.Path, ref *file.Reference) *FileNode { func NewFile(p file.Path, ref *file.Reference) *FileNode { return &FileNode{ RealPath: p, - FileType: file.TypeReg, + FileType: file.TypeRegular, Reference: ref, } } @@ -33,7 +34,7 @@ func NewFile(p file.Path, ref *file.Reference) *FileNode { func NewSymLink(p, linkPath file.Path, ref *file.Reference) *FileNode { return &FileNode{ RealPath: p, - FileType: file.TypeSymlink, + FileType: file.TypeSymLink, LinkPath: linkPath, Reference: ref, } @@ -64,9 +65,27 @@ func (n *FileNode) Copy() node.Node { } func (n *FileNode) IsLink() bool { - return n.FileType == file.TypeHardLink || n.FileType == file.TypeSymlink + return n.FileType == file.TypeHardLink || n.FileType == file.TypeSymLink } func IDByPath(p file.Path) node.ID { return node.ID(p) } + +func (n *FileNode) RenderLinkDestination() file.Path { + if !n.IsLink() { + return "" + } + + if n.LinkPath.IsAbsolutePath() { + // use links with absolute paths blindly + return n.LinkPath + } + + // resolve relative link paths + var parentDir string + parentDir, _ = filepath.Split(string(n.RealPath)) // TODO: alex: should this be path.Split, not filepath.Split? + + // assemble relative link path by normalizing: "/cur/dir/../file1.txt" --> "/cur/file1.txt" + return file.Path(path.Clean(path.Join(parentDir, string(n.LinkPath)))) +} diff --git a/pkg/filetree/filetree.go b/pkg/filetree/filetree.go index 5ca0f413..9352e8d9 100644 --- a/pkg/filetree/filetree.go +++ b/pkg/filetree/filetree.go @@ -4,10 +4,13 @@ import ( "errors" "fmt" "path" - "path/filepath" + "sort" "strings" - "github.com/anchore/stereoscope/internal" + "github.com/scylladb/go-set/strset" + + "github.com/scylladb/go-set/iset" + "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/stereoscope/pkg/filetree/filenode" "github.com/anchore/stereoscope/pkg/tree" @@ -24,7 +27,13 @@ type FileTree struct { } // NewFileTree creates a new FileTree instance. +// Deprecated: use New() instead. func NewFileTree() *FileTree { + return New() +} + +// New creates a new FileTree instance. +func New() *FileTree { t := tree.NewTree() // Initialize FileTree with a root "/" Node @@ -36,8 +45,8 @@ func NewFileTree() *FileTree { } // Copy returns a Copy of the current FileTree. -func (t *FileTree) Copy() (*FileTree, error) { - ct := NewFileTree() +func (t *FileTree) Copy() (ReadWriter, error) { + ct := New() ct.tree = t.tree.Copy() return ct, nil } @@ -45,18 +54,18 @@ func (t *FileTree) Copy() (*FileTree, error) { // AllFiles returns all files within the FileTree (defaults to regular files only, but you can provide one or more allow types). func (t *FileTree) AllFiles(types ...file.Type) []file.Reference { if len(types) == 0 { - types = []file.Type{file.TypeReg} + types = []file.Type{file.TypeRegular} } - typeSet := internal.NewStringSet() + typeSet := iset.New() for _, t := range types { - typeSet.Add(string(t)) + typeSet.Add(int(t)) } var files []file.Reference for _, n := range t.tree.Nodes() { f := n.(*filenode.FileNode) - if typeSet.Contains(string(f.FileType)) && f.Reference != nil { + if typeSet.Has(int(f.FileType)) && f.Reference != nil { files = append(files, *f.Reference) } } @@ -75,7 +84,7 @@ func (t *FileTree) AllRealPaths() []file.Path { } func (t *FileTree) ListPaths(dir file.Path) ([]file.Path, error) { - n, err := t.node(dir, linkResolutionStrategy{ + fna, err := t.node(dir, linkResolutionStrategy{ FollowAncestorLinks: true, FollowBasenameLinks: true, }) @@ -83,16 +92,16 @@ func (t *FileTree) ListPaths(dir file.Path) ([]file.Path, error) { return nil, err } - if n == nil { + if !fna.HasFileNode() { return nil, nil } - if n.FileType != file.TypeDir { + if fna.FileNode.FileType != file.TypeDirectory { return nil, nil } var listing []file.Path - children := t.tree.Children(n) + children := t.tree.Children(fna.FileNode) for _, child := range children { if child == nil { continue @@ -106,13 +115,25 @@ func (t *FileTree) ListPaths(dir file.Path) ([]file.Path, error) { return nil, err } - listing = append(listing, file.Path(path.Join(string(dir), fn.RealPath.Basename()))) + listing = append(listing, file.Path(path.Join(string(dir), fn.FileNode.RealPath.Basename()))) } return listing, nil } // File fetches a file.Reference for the given path. Returns nil if the path does not exist in the FileTree. -func (t *FileTree) File(path file.Path, options ...LinkResolutionOption) (bool, *file.Reference, error) { +func (t *FileTree) File(path file.Path, options ...LinkResolutionOption) (bool, *file.Resolution, error) { + currentNode, err := t.file(path, options...) + if err != nil { + return false, nil, err + } + if currentNode.HasFileNode() { + return true, currentNode.FileResolution(), err + } + return false, nil, err +} + +// file fetches a file.Reference for the given path. Returns nil if the path does not exist in the FileTree. +func (t *FileTree) file(path file.Path, options ...LinkResolutionOption) (*nodeAccess, error) { userStrategy := newLinkResolutionStrategy(options...) // For: /some/path/here // Where: /some/path -> /other/place @@ -131,13 +152,12 @@ func (t *FileTree) File(path file.Path, options ...LinkResolutionOption) (bool, // // Therefore we can safely lookup the path first without worrying about symlink resolution yet... if there is a // hit, return it! If not, fallback to symlink resolution. - currentNode, err := t.node(path, linkResolutionStrategy{}) if err != nil { - return false, nil, err + return nil, err } - if currentNode != nil && (!currentNode.IsLink() || currentNode.IsLink() && !userStrategy.FollowBasenameLinks) { - return true, currentNode.Reference, nil + if currentNode.HasFileNode() && (!currentNode.FileNode.IsLink() || currentNode.FileNode.IsLink() && !userStrategy.FollowBasenameLinks) { + return currentNode, nil } // symlink resolution!... within the context of container images (which is outside of the responsibility of this object) @@ -148,59 +168,97 @@ func (t *FileTree) File(path file.Path, options ...LinkResolutionOption) (bool, FollowBasenameLinks: userStrategy.FollowBasenameLinks, DoNotFollowDeadBasenameLinks: userStrategy.DoNotFollowDeadBasenameLinks, }) - if currentNode != nil { - return true, currentNode.Reference, err + if currentNode.HasFileNode() { + return currentNode, err } - return false, nil, err + return nil, err +} + +func newResolutions(nodePath []nodeAccess) []file.Resolution { + var refPath []file.Resolution + for i, n := range nodePath { + if i == len(nodePath)-1 && n.FileNode != nil { + // this is already on the parent Access object (unless it is a dead link) + break + } + access := file.Resolution{ + RequestPath: n.RequestPath, + } + if n.FileNode != nil { + access.Reference = n.FileNode.Reference + } + + refPath = append(refPath, access) + } + return refPath } -func (t *FileTree) node(p file.Path, strategy linkResolutionStrategy) (*filenode.FileNode, error) { +func (t *FileTree) node(p file.Path, strategy linkResolutionStrategy) (*nodeAccess, error) { normalizedPath := p.Normalize() nodeID := filenode.IDByPath(normalizedPath) if !strategy.FollowLinks() { n := t.tree.Node(nodeID) if n == nil { - return nil, nil + return &nodeAccess{ + RequestPath: normalizedPath, + FileNode: nil, + }, nil } - return n.(*filenode.FileNode), nil + return &nodeAccess{ + RequestPath: normalizedPath, + FileNode: n.(*filenode.FileNode), + }, nil } - var currentNode *filenode.FileNode + var currentNode *nodeAccess var err error if strategy.FollowAncestorLinks { currentNode, err = t.resolveAncestorLinks(normalizedPath, nil) if err != nil { + if currentNode != nil { + currentNode.RequestPath = normalizedPath + } return currentNode, err } } else { n := t.tree.Node(nodeID) if n != nil { - currentNode = n.(*filenode.FileNode) + currentNode = &nodeAccess{ + RequestPath: normalizedPath, + FileNode: n.(*filenode.FileNode), + } } } // link resolution has come up with nothing, return what we have so far - if currentNode == nil { + if !currentNode.HasFileNode() { + if currentNode != nil { + currentNode.RequestPath = normalizedPath + } return currentNode, nil } if strategy.FollowBasenameLinks { currentNode, err = t.resolveNodeLinks(currentNode, !strategy.DoNotFollowDeadBasenameLinks, nil) } + if currentNode != nil { + currentNode.RequestPath = normalizedPath + } + return currentNode, err } // return FileNode of the basename in the given path (no resolution is done at or past the basename). Note: it is // assumed that the given path has already been normalized. -func (t *FileTree) resolveAncestorLinks(path file.Path, attemptedPaths internal.Set) (*filenode.FileNode, error) { +func (t *FileTree) resolveAncestorLinks(path file.Path, attemptedPaths file.PathSet) (*nodeAccess, error) { // performance optimization... see if there is a node at the path (as if it is a real path). If so, // use it, otherwise, continue with ancestor resolution - currentNode, err := t.node(path, linkResolutionStrategy{}) + currentNodeAccess, err := t.node(path, linkResolutionStrategy{}) if err != nil { return nil, err } - if currentNode != nil { - return currentNode, nil + if currentNodeAccess.HasFileNode() { + return currentNodeAccess, nil } var pathParts = strings.Split(string(path), file.DirSeparator) @@ -219,101 +277,100 @@ func (t *FileTree) resolveAncestorLinks(path file.Path, attemptedPaths internal. currentPathStr = string(currentPath) // fetch the Node with NO link resolution strategy - currentNode, err = t.node(currentPath, linkResolutionStrategy{}) + currentNodeAccess, err = t.node(currentPath, linkResolutionStrategy{}) if err != nil { // should never occur return nil, err } - if currentNode == nil { + if !currentNodeAccess.HasFileNode() { // we've reached a point where the given path that has never been observed. This can happen for one reason: // 1. the current path is really invalid and we should return NIL indicating that it cannot be resolved. // 2. the current path is a link? no, this isn't possible since we are iterating through constituent paths // in order, so we are guaranteed to hit parent links in which we should adjust the search path accordingly. - return nil, nil + return currentNodeAccess, nil } // keep track of what we've resolved to so far... - currentPath = currentNode.RealPath + currentPath = currentNodeAccess.FileNode.RealPath // this is positively a path, however, there is no information about this Node. This may be OK since we // allow for adding children before parents (and even don't require the parent to ever be added --which is // potentially valid given the underlying messy data [tar headers]). In this case we keep building the path // (which we've already done at this point) and continue. - if currentNode.Reference == nil { + if currentNodeAccess.FileNode.Reference == nil { continue } // by this point we definitely have a file reference, if this is a link (and not the basename) resolve any // links until the next Node is resolved (or not). isLastPart := idx == len(pathParts)-1 - if !isLastPart && currentNode.IsLink() { - currentNode, err = t.resolveNodeLinks(currentNode, true, attemptedPaths) + if !isLastPart && currentNodeAccess.FileNode.IsLink() { + currentNodeAccess, err = t.resolveNodeLinks(currentNodeAccess, true, attemptedPaths) if err != nil { // only expected to happen on cycles - return currentNode, err + return currentNodeAccess, err } - if currentNode != nil { - currentPath = currentNode.RealPath + if currentNodeAccess.HasFileNode() { + currentPath = currentNodeAccess.FileNode.RealPath } currentPathStr = string(currentPath) } } // by this point we have processed all constituent paths; there were no un-added paths and the path is guaranteed // to have followed link resolution. - return currentNode, nil + return currentNodeAccess, nil } -// followNode takes the given FileNode and resolves all links at the base of the real path for the node (this implies +// resolveNodeLinks takes the given FileNode and resolves all links at the base of the real path for the node (this implies // that NO ancestors are considered). -func (t *FileTree) resolveNodeLinks(n *filenode.FileNode, followDeadBasenameLinks bool, attemptedPaths internal.Set) (*filenode.FileNode, error) { +// nolint: funlen +func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, attemptedPaths file.PathSet) (*nodeAccess, error) { if n == nil { return nil, fmt.Errorf("cannot resolve links with nil Node given") } // we need to short-circuit link resolution that never resolves (cycles) due to a cycle referencing nodes that do not exist if attemptedPaths == nil { - attemptedPaths = internal.NewStringSet() + attemptedPaths = file.NewPathSet() } // note: this assumes that callers are passing paths in which the constituent parts are NOT symlinks - var lastNode *filenode.FileNode + var lastNode *nodeAccess + var nodePath []nodeAccess + var nextPath file.Path - currentNode := n + currentNodeAccess := n // keep resolving links until a regular file or directory is found - alreadySeen := internal.NewStringSet() + alreadySeen := strset.New() var err error for { + nodePath = append(nodePath, *currentNodeAccess) + // if there is no next path, return this reference (dead link) - if currentNode == nil { + if !currentNodeAccess.HasFileNode() { + // the last path we tried to resolve is a dead link, persist the original path as the failed request + if len(nodePath) > 0 { + nodePath[len(nodePath)-1].RequestPath = nextPath + } break } - if alreadySeen.Contains(string(currentNode.RealPath)) { + if alreadySeen.Has(string(currentNodeAccess.FileNode.RealPath)) { return nil, ErrLinkCycleDetected } - if !currentNode.IsLink() { + if !currentNodeAccess.FileNode.IsLink() { // no resolution and there is no next link (pseudo dead link)... return what you found // any content fetches will fail, but that's ok break } // prepare for the next iteration - alreadySeen.Add(string(currentNode.RealPath)) + alreadySeen.Add(string(currentNodeAccess.FileNode.RealPath)) - var nextPath file.Path - if currentNode.LinkPath.IsAbsolutePath() { - // use links with absolute paths blindly - nextPath = currentNode.LinkPath - } else { - // resolve relative link paths - var parentDir string - parentDir, _ = filepath.Split(string(currentNode.RealPath)) - // assemble relative link path by normalizing: "/cur/dir/../file1.txt" --> "/cur/file1.txt" - nextPath = file.Path(path.Clean(path.Join(parentDir, string(currentNode.LinkPath)))) - } + nextPath = currentNodeAccess.FileNode.RenderLinkDestination() // no more links to follow if string(nextPath) == "" { @@ -321,32 +378,42 @@ func (t *FileTree) resolveNodeLinks(n *filenode.FileNode, followDeadBasenameLink } // preserve the current Node for the next loop (in case we shouldn't follow a potentially dead link) - lastNode = currentNode + lastNode = currentNodeAccess // break any cycles with non-existent paths (before attempting to look the path up again) - if attemptedPaths.Contains(string(nextPath)) { + if attemptedPaths.Contains(nextPath) { return nil, ErrLinkCycleDetected } // get the next Node (based on the next path) - attemptedPaths.Add(string(nextPath)) - currentNode, err = t.resolveAncestorLinks(nextPath, attemptedPaths) + attemptedPaths.Add(nextPath) + currentNodeAccess, err = t.resolveAncestorLinks(nextPath, attemptedPaths) if err != nil { + if currentNodeAccess != nil { + currentNodeAccess.LeafLinkResolution = append(currentNodeAccess.LeafLinkResolution, nodePath...) + } + // only expected to occur upon cycle detection - return currentNode, err + return currentNodeAccess, err } } - if currentNode == nil && !followDeadBasenameLinks { + if !currentNodeAccess.HasFileNode() && !followDeadBasenameLinks { + if lastNode != nil { + lastNode.LeafLinkResolution = append(lastNode.LeafLinkResolution, nodePath...) + } return lastNode, nil } - return currentNode, nil + if currentNodeAccess != nil { + currentNodeAccess.LeafLinkResolution = append(currentNodeAccess.LeafLinkResolution, nodePath...) + } + return currentNodeAccess, nil } // FilesByGlob fetches zero to many file.References for the given glob pattern (considers symlinks). -func (t *FileTree) FilesByGlob(query string, options ...LinkResolutionOption) ([]GlobResult, error) { - results := make([]GlobResult, 0) +func (t *FileTree) FilesByGlob(query string, options ...LinkResolutionOption) ([]file.Resolution, error) { + var results []file.Resolution if len(query) == 0 { return nil, fmt.Errorf("no glob pattern given") @@ -379,7 +446,7 @@ func (t *FileTree) FilesByGlob(query string, options ...LinkResolutionOption) ([ if !path.IsAbs(match) { matchPath = file.Path(path.Join("/", match)) } - fn, err := t.node(matchPath, linkResolutionStrategy{ + fna, err := t.node(matchPath, linkResolutionStrategy{ FollowAncestorLinks: true, FollowBasenameLinks: true, DoNotFollowDeadBasenameLinks: doNotFollowDeadBasenameLinks, @@ -388,20 +455,20 @@ func (t *FileTree) FilesByGlob(query string, options ...LinkResolutionOption) ([ return nil, err } // the Node must exist and should not be a directory - if fn != nil && fn.FileType != file.TypeDir { - result := GlobResult{ - MatchPath: matchPath, - RealPath: fn.RealPath, - // we should not be given a link Node UNLESS it is dead - IsDeadLink: fn.IsLink(), + if fna.HasFileNode() && fna.FileNode.FileType != file.TypeDirectory { + result := file.NewResolution( + matchPath, + fna.FileNode.Reference, + newResolutions(fna.LeafLinkResolution), + ) + if result != nil { + results = append(results, *result) } - if fn.Reference != nil { - result.Reference = *fn.Reference - } - results = append(results, result) } } + sort.Sort(file.Resolutions(results)) + return results, nil } @@ -410,20 +477,20 @@ func (t *FileTree) FilesByGlob(query string, options ...LinkResolutionOption) ([ // hardlink resolution is performed on the given path --which implies that the given path MUST be a real path (have no // links in constituent paths) func (t *FileTree) AddFile(realPath file.Path) (*file.Reference, error) { - fn, err := t.node(realPath, linkResolutionStrategy{}) + fna, err := t.node(realPath, linkResolutionStrategy{}) if err != nil { return nil, err } - if fn != nil { + if fna.HasFileNode() { // this path already exists - if fn.FileType != file.TypeReg { + if fna.FileNode.FileType != file.TypeRegular { return nil, fmt.Errorf("path=%q already exists but is NOT a regular file", realPath) } // this is a regular file, provide a new or existing file.Reference - if fn.Reference == nil { - fn.Reference = file.NewFileReference(realPath) + if fna.FileNode.Reference == nil { + fna.FileNode.Reference = file.NewFileReference(realPath) } - return fn.Reference, nil + return fna.FileNode.Reference, nil } // this is a new path... add the new Node + parents @@ -438,20 +505,20 @@ func (t *FileTree) AddFile(realPath file.Path) (*file.Reference, error) { // link path captured and returned. Note: NO symlink or hardlink resolution is performed on the given path --which // implies that the given path MUST be a real path (have no links in constituent paths) func (t *FileTree) AddSymLink(realPath file.Path, linkPath file.Path) (*file.Reference, error) { - fn, err := t.node(realPath, linkResolutionStrategy{}) + fna, err := t.node(realPath, linkResolutionStrategy{}) if err != nil { return nil, err } - if fn != nil { + if fna.HasFileNode() { // this path already exists - if fn.FileType != file.TypeSymlink { + if fna.FileNode.FileType != file.TypeSymLink { return nil, fmt.Errorf("path=%q already exists but is NOT a symlink file", realPath) } // this is a symlink file, provide a new or existing file.Reference - if fn.Reference == nil { - fn.Reference = file.NewFileReference(realPath) + if fna.FileNode.Reference == nil { + fna.FileNode.Reference = file.NewFileReference(realPath) } - return fn.Reference, nil + return fna.FileNode.Reference, nil } // this is a new path... add the new Node + parents @@ -466,20 +533,20 @@ func (t *FileTree) AddSymLink(realPath file.Path, linkPath file.Path) (*file.Ref // path captured and returned. Note: NO symlink or hardlink resolution is performed on the given path --which // implies that the given path MUST be a real path (have no links in constituent paths) func (t *FileTree) AddHardLink(realPath file.Path, linkPath file.Path) (*file.Reference, error) { - fn, err := t.node(realPath, linkResolutionStrategy{}) + fna, err := t.node(realPath, linkResolutionStrategy{}) if err != nil { return nil, err } - if fn != nil { + if fna.HasFileNode() { // this path already exists - if fn.FileType != file.TypeHardLink { + if fna.FileNode.FileType != file.TypeHardLink { return nil, fmt.Errorf("path=%q already exists but is NOT a symlink file", realPath) } // this is a symlink file, provide a new or existing file.Reference - if fn.Reference == nil { - fn.Reference = file.NewFileReference(realPath) + if fna.FileNode.Reference == nil { + fna.FileNode.Reference = file.NewFileReference(realPath) } - return fn.Reference, nil + return fna.FileNode.Reference, nil } // this is a new path... add the new Node + parents @@ -497,20 +564,20 @@ func (t *FileTree) AddHardLink(realPath file.Path, linkPath file.Path) (*file.Re // Note: NO symlink or hardlink resolution is performed on the given path --which implies that the given path MUST // be a real path (have no links in constituent paths) func (t *FileTree) AddDir(realPath file.Path) (*file.Reference, error) { - fn, err := t.node(realPath, linkResolutionStrategy{}) + fna, err := t.node(realPath, linkResolutionStrategy{}) if err != nil { return nil, err } - if fn != nil { + if fna.HasFileNode() { // this path already exists - if fn.FileType != file.TypeDir { + if fna.FileNode.FileType != file.TypeDirectory { return nil, fmt.Errorf("path=%q already exists but is NOT a symlink file", realPath) } - // this is a symlink file, provide a new or existing file.Reference - if fn.Reference == nil { - fn.Reference = file.NewFileReference(realPath) + // this is a directory, provide a new or existing file.Reference + if fna.FileNode.Reference == nil { + fna.FileNode.Reference = file.NewFileReference(realPath) } - return fn.Reference, nil + return fna.FileNode.Reference, nil } // this is a new path... add the new Node + parents @@ -532,22 +599,22 @@ func (t *FileTree) addParentPaths(realPath file.Path) error { return fmt.Errorf("unable to determine parent path while adding path=%q: %w", realPath, err) } - fn, err := t.node(parentPath, linkResolutionStrategy{}) + fna, err := t.node(parentPath, linkResolutionStrategy{}) if err != nil { return err } - if fn == nil { + if !fna.HasFileNode() { // add parents of the Node until an existent parent is found it's important to do this in reverse order // to ensure we are checking the fewest amount of parents possible. var pathsToAdd []file.Path parentPaths := realPath.ConstituentPaths() for idx := len(parentPaths) - 1; idx >= 0; idx-- { - fn, err := t.node(parentPaths[idx], linkResolutionStrategy{}) + resolvedFna, err := t.node(parentPaths[idx], linkResolutionStrategy{}) if err != nil { return err } - if fn != nil { + if resolvedFna.HasFileNode() { break } pathsToAdd = append(pathsToAdd, parentPaths[idx]) @@ -584,11 +651,11 @@ func (t *FileTree) setFileNode(fn *filenode.FileNode) error { if err != nil { return err } - if parentNode == nil { + if !parentNode.HasFileNode() { return fmt.Errorf("unable to find parent path=%q while adding path=%q", parentPath, fn.RealPath) } - return t.tree.AddChild(parentNode, fn) + return t.tree.AddChild(parentNode.FileNode, fn) } // RemovePath deletes the file.Reference from the FileTree by the given path. If the basename of the given path @@ -599,18 +666,18 @@ func (t *FileTree) RemovePath(path file.Path) error { return ErrRemovingRoot } - fn, err := t.node(path, linkResolutionStrategy{ + fna, err := t.node(path, linkResolutionStrategy{ FollowAncestorLinks: true, FollowBasenameLinks: false, }) if err != nil { return err } - if fn == nil { + if !fna.HasFileNode() { return nil } - _, err = t.tree.RemoveNode(fn) + _, err = t.tree.RemoveNode(fna.FileNode) if err != nil { return err } @@ -621,18 +688,18 @@ func (t *FileTree) RemovePath(path file.Path) error { // basename is a symlink, then the symlink is followed before resolving children. If the path does not exist, this is a // nop. func (t *FileTree) RemoveChildPaths(path file.Path) error { - fn, err := t.node(path, linkResolutionStrategy{ + fna, err := t.node(path, linkResolutionStrategy{ FollowAncestorLinks: true, FollowBasenameLinks: true, }) if err != nil { return err } - if fn == nil { + if !fna.HasFileNode() { // can't remove child paths for Node that doesn't exist! return nil } - for _, child := range t.tree.Children(fn) { + for _, child := range t.tree.Children(fna.FileNode) { _, err := t.tree.RemoveNode(child) if err != nil { return err @@ -641,31 +708,31 @@ func (t *FileTree) RemoveChildPaths(path file.Path) error { return nil } -// Reader returns a tree.Reader useful for Tree traversal. -func (t *FileTree) Reader() tree.Reader { +// TreeReader returns a tree.Reader useful for Tree traversal. +func (t *FileTree) TreeReader() tree.Reader { return t.tree } // PathDiff shows the path differences between two trees (useful for testing) func (t *FileTree) PathDiff(other *FileTree) (extra, missing []file.Path) { - ourPaths := internal.NewStringSet() + ourPaths := strset.New() for _, fn := range t.tree.Nodes() { ourPaths.Add(string(fn.ID())) } - theirPaths := internal.NewStringSet() + theirPaths := strset.New() for _, fn := range other.tree.Nodes() { theirPaths.Add(string(fn.ID())) } for _, fn := range other.tree.Nodes() { - if !ourPaths.Contains(string(fn.ID())) { + if !ourPaths.Has(string(fn.ID())) { extra = append(extra, file.Path(fn.ID())) } } for _, fn := range t.tree.Nodes() { - if !theirPaths.Contains(string(fn.ID())) { + if !theirPaths.Has(string(fn.ID())) { missing = append(missing, file.Path(fn.ID())) } } @@ -698,12 +765,12 @@ func (t *FileTree) Walk(fn func(path file.Path, f filenode.FileNode) error, cond return NewDepthFirstPathWalker(t, fn, conditions).WalkAll() } -// merge takes the given Tree and combines it with the current Tree, preferring files in the other Tree if there +// Merge takes the given Tree and combines it with the current Tree, preferring files in the other Tree if there // are path conflicts. This is the basis function for squashing (where the current Tree is the bottom Tree and the // given Tree is the top Tree). // //nolint:gocognit,funlen -func (t *FileTree) merge(upper *FileTree) error { +func (t *FileTree) Merge(upper Reader) error { conditions := tree.WalkConditions{ ShouldContinueBranch: func(n node.Node) bool { p := file.Path(n.ID()) @@ -721,22 +788,22 @@ func (t *FileTree) merge(upper *FileTree) error { } upperNode := n.(*filenode.FileNode) // opaque directories must be processed first - if upper.hasOpaqueDirectory(upperNode.RealPath) { + if hasOpaqueDirectory(upper, upperNode.RealPath) { err := t.RemoveChildPaths(upperNode.RealPath) if err != nil { - return fmt.Errorf("filetree merge failed to remove child paths (upperPath=%s): %w", upperNode.RealPath, err) + return fmt.Errorf("filetree Merge failed to remove child paths (upperPath=%s): %w", upperNode.RealPath, err) } } if upperNode.RealPath.IsWhiteout() { lowerPath, err := upperNode.RealPath.UnWhiteoutPath() if err != nil { - return fmt.Errorf("filetree merge failed to find original upperPath for whiteout (upperPath=%s): %w", upperNode.RealPath, err) + return fmt.Errorf("filetree Merge failed to find original upperPath for whiteout (upperPath=%s): %w", upperNode.RealPath, err) } err = t.RemovePath(lowerPath) if err != nil { - return fmt.Errorf("filetree merge failed to remove upperPath (upperPath=%s): %w", lowerPath, err) + return fmt.Errorf("filetree Merge failed to remove upperPath (upperPath=%s): %w", lowerPath, err) } return nil @@ -747,9 +814,9 @@ func (t *FileTree) merge(upper *FileTree) error { FollowBasenameLinks: false, }) if err != nil { - return fmt.Errorf("filetree merge failed when looking for path=%q : %w", upperNode.RealPath, err) + return fmt.Errorf("filetree Merge failed when looking for path=%q : %w", upperNode.RealPath, err) } - if lowerNode == nil { + if !lowerNode.HasFileNode() { // there is no existing Node... add parents and prepare to set if err := t.addParentPaths(upperNode.RealPath); err != nil { return fmt.Errorf("could not add parent paths to lower: %w", err) @@ -759,21 +826,21 @@ func (t *FileTree) merge(upper *FileTree) error { nodeCopy := *upperNode // keep original file references if the upper tree does not have them (only for the same file types) - if lowerNode != nil && lowerNode.Reference != nil && upperNode.Reference == nil && upperNode.FileType == lowerNode.FileType { - nodeCopy.Reference = lowerNode.Reference + if lowerNode.HasFileNode() && lowerNode.FileNode.Reference != nil && upperNode.Reference == nil && upperNode.FileType == lowerNode.FileNode.FileType { + nodeCopy.Reference = lowerNode.FileNode.Reference } - if lowerNode != nil && upperNode.FileType != file.TypeDir && lowerNode.FileType == file.TypeDir { + if lowerNode.HasFileNode() && upperNode.FileType != file.TypeDirectory && lowerNode.FileNode.FileType == file.TypeDirectory { // NOTE: both upperNode and lowerNode paths are the same, and does not have an effect // on removal of child paths err := t.RemoveChildPaths(upperNode.RealPath) if err != nil { - return fmt.Errorf("filetree merge failed to remove children for non-directory upper node (%s): %w", upperNode.RealPath, err) + return fmt.Errorf("filetree Merge failed to remove children for non-directory upper node (%s): %w", upperNode.RealPath, err) } } // graft a copy of the upper Node with potential lower information into the lower tree if err := t.setFileNode(&nodeCopy); err != nil { - return fmt.Errorf("filetree merge failed to set file Node (Node=%+v): %w", nodeCopy, err) + return fmt.Errorf("filetree Merge failed to set file Node (Node=%+v): %w", nodeCopy, err) } return nil @@ -782,10 +849,10 @@ func (t *FileTree) merge(upper *FileTree) error { // we are using the tree walker instead of the path walker to only look at an resolve merging of real files // with no consideration to virtual paths (paths that are valid in the filetree because constituent paths // contain symlinks). - return tree.NewDepthFirstWalkerWithConditions(upper.Reader(), visitor, conditions).WalkAll() + return tree.NewDepthFirstWalkerWithConditions(upper.TreeReader(), visitor, conditions).WalkAll() } -func (t *FileTree) hasOpaqueDirectory(directoryPath file.Path) bool { +func hasOpaqueDirectory(t Reader, directoryPath file.Path) bool { opaqueWhiteoutChild := file.Path(path.Join(string(directoryPath), file.OpaqueWhiteout)) return t.HasPath(opaqueWhiteoutChild) } diff --git a/pkg/filetree/filetree_test.go b/pkg/filetree/filetree_test.go index 0d3e0c27..e2d592cc 100644 --- a/pkg/filetree/filetree_test.go +++ b/pkg/filetree/filetree_test.go @@ -2,18 +2,20 @@ package filetree import ( "errors" - "fmt" - "github.com/stretchr/testify/require" + "github.com/scylladb/go-set/strset" "testing" - "github.com/anchore/stereoscope/internal" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/stretchr/testify/require" + "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/stereoscope/pkg/filetree/filenode" "github.com/stretchr/testify/assert" ) func TestFileTree_AddPath(t *testing.T) { - tr := NewFileTree() + tr := New() path := file.Path("/home") fileNode, err := tr.AddFile(path) if err != nil { @@ -21,13 +23,13 @@ func TestFileTree_AddPath(t *testing.T) { } _, f, _ := tr.File(path) - if f != fileNode { + if f.Reference != fileNode { t.Fatal("expected pointer to the newly created fileNode") } } func TestFileTree_AddPathAndMissingAncestors(t *testing.T) { - tr := NewFileTree() + tr := New() path := file.Path("/home/wagoodman/awesome/file.txt") fileNode, err := tr.AddFile(path) if err != nil { @@ -35,7 +37,7 @@ func TestFileTree_AddPathAndMissingAncestors(t *testing.T) { } _, f, _ := tr.File(path) - if f != fileNode { + if f.Reference != fileNode { t.Fatal("expected pointer to the newly created fileNode") } @@ -46,7 +48,7 @@ func TestFileTree_AddPathAndMissingAncestors(t *testing.T) { if err != nil { t.Fatalf("could not get parent Node: %+v", err) } - children := tr.tree.Children(n) + children := tr.tree.Children(n.FileNode) if len(children) != 1 { t.Fatal("unexpected child count", len(children)) @@ -58,7 +60,7 @@ func TestFileTree_AddPathAndMissingAncestors(t *testing.T) { } func TestFileTree_RemovePath(t *testing.T) { - tr := NewFileTree() + tr := New() path := file.Path("/home/wagoodman/awesome/file.txt") _, err := tr.AddFile(path) if err != nil { @@ -85,8 +87,40 @@ func TestFileTree_RemovePath(t *testing.T) { } } +func TestFileTree_FilesByGlob_AncestorSymlink(t *testing.T) { + var err error + tr := New() + + _, err = tr.AddSymLink("/parent-link", "/parent") + require.NoError(t, err) + + _, err = tr.AddDir("/parent") + require.NoError(t, err) + + expectedRef, err := tr.AddFile("/parent/file.txt") + require.NoError(t, err) + + expected := []file.Resolution{ + { + RequestPath: "/parent-link/file.txt", + Reference: expectedRef, + LinkResolutions: nil, + }, + } + + requestGlob := "**/parent-link/file.txt" + linkOptions := []LinkResolutionOption{FollowBasenameLinks} + ref, err := tr.FilesByGlob(requestGlob, linkOptions...) + require.NoError(t, err) + + opt := cmp.AllowUnexported(file.Reference{}) + if d := cmp.Diff(expected, ref, opt); d != "" { + t.Errorf("unexpected file reference (-want +got):\n%s", d) + } +} + func TestFileTree_FilesByGlob(t *testing.T) { - tr := NewFileTree() + tr := New() paths := []string{ "/home/wagoodman/awesome/file.txt", @@ -301,22 +335,22 @@ func TestFileTree_FilesByGlob(t *testing.T) { return } - actualSet := internal.NewStringSet() - expectedSet := internal.NewStringSet() + actualSet := strset.New() + expectedSet := strset.New() for _, r := range actual { - actualSet.Add(string(r.MatchPath)) + actualSet.Add(string(r.RequestPath)) } for _, e := range test.expected { expectedSet.Add(e) - if !actualSet.Contains(e) { + if !actualSet.Has(e) { t.Errorf("missing search hit: %s", e) } } for _, r := range actual { - if !expectedSet.Contains(string(r.MatchPath)) { + if !expectedSet.Has(string(r.RequestPath)) { t.Errorf("extra search hit: %+v", r) } } @@ -327,14 +361,14 @@ func TestFileTree_FilesByGlob(t *testing.T) { } func TestFileTree_Merge(t *testing.T) { - tr1 := NewFileTree() + tr1 := New() tr1.AddFile("/home/wagoodman/awesome/file-1.txt") - tr2 := NewFileTree() + tr2 := New() tr2.AddFile("/home/wagoodman/awesome/file-2.txt") - if err := tr1.merge(tr2); err != nil { - t.Fatalf("error on merge : %+v", err) + if err := tr1.Merge(tr2); err != nil { + t.Fatalf("error on Merge : %+v", err) } for _, p := range []file.Path{"/home/wagoodman/awesome/file-1.txt", "/home/wagoodman/awesome/file-2.txt"} { @@ -345,32 +379,34 @@ func TestFileTree_Merge(t *testing.T) { } func TestFileTree_Merge_Overwrite(t *testing.T) { - tr1 := NewFileTree() + tr1 := New() tr1.AddFile("/home/wagoodman/awesome/file.txt") - tr2 := NewFileTree() + tr2 := New() newRef, _ := tr2.AddFile("/home/wagoodman/awesome/file.txt") - if err := tr1.merge(tr2); err != nil { - t.Fatalf("error on merge : %+v", err) + if err := tr1.Merge(tr2); err != nil { + t.Fatalf("error on Merge : %+v", err) } _, f, _ := tr1.File("/home/wagoodman/awesome/file.txt") if f.ID() != newRef.ID() { - t.Fatalf("did not overwrite paths on merge") + t.Fatalf("did not overwrite paths on Merge") } } func TestFileTree_Merge_OpaqueWhiteout(t *testing.T) { - tr1 := NewFileTree() - tr1.AddFile("/home/wagoodman/awesome/file.txt") + tr1 := New() + _, err := tr1.AddFile("/home/wagoodman/awesome/file.txt") + require.NoError(t, err) - tr2 := NewFileTree() - tr2.AddFile("/home/wagoodman/.wh..wh..opq") + tr2 := New() + _, err = tr2.AddFile("/home/wagoodman/.wh..wh..opq") + require.NoError(t, err) - if err := tr1.merge(tr2); err != nil { - t.Fatalf("error on merge : %+v", err) + if err := tr1.Merge(tr2); err != nil { + t.Fatalf("error on Merge : %+v", err) } for _, p := range []file.Path{"/home/wagoodman", "/home"} { @@ -388,14 +424,14 @@ func TestFileTree_Merge_OpaqueWhiteout(t *testing.T) { } func TestFileTree_Merge_OpaqueWhiteout_NoLowerDirectory(t *testing.T) { - tr1 := NewFileTree() + tr1 := New() tr1.AddFile("/home") - tr2 := NewFileTree() + tr2 := New() tr2.AddFile("/home/luhring/.wh..wh..opq") - if err := tr1.merge(tr2); err != nil { - t.Fatalf("error on merge : %+v", err) + if err := tr1.Merge(tr2); err != nil { + t.Fatalf("error on Merge : %+v", err) } for _, p := range []file.Path{"/home/luhring", "/home"} { @@ -406,14 +442,14 @@ func TestFileTree_Merge_OpaqueWhiteout_NoLowerDirectory(t *testing.T) { } func TestFileTree_Merge_Whiteout(t *testing.T) { - tr1 := NewFileTree() + tr1 := New() tr1.AddFile("/home/wagoodman/awesome/file.txt") - tr2 := NewFileTree() + tr2 := New() tr2.AddFile("/home/wagoodman/awesome/.wh.file.txt") - if err := tr1.merge(tr2); err != nil { - t.Fatalf("error on merge : %+v", err) + if err := tr1.Merge(tr2); err != nil { + t.Fatalf("error on Merge : %+v", err) } for _, p := range []file.Path{"/home/wagoodman/awesome", "/home/wagoodman", "/home"} { @@ -431,14 +467,14 @@ func TestFileTree_Merge_Whiteout(t *testing.T) { } func TestFileTree_Merge_DirOverride(t *testing.T) { - tr1 := NewFileTree() + tr1 := New() tr1.AddFile("/home/wagoodman/awesome/place") - tr2 := NewFileTree() + tr2 := New() tr2.AddFile("/home/wagoodman/awesome/place/thing.txt") - if err := tr1.merge(tr2); err != nil { - t.Fatalf("error on merge : %+v", err) + if err := tr1.Merge(tr2); err != nil { + t.Fatalf("error on Merge : %+v", err) } for _, p := range []file.Path{"/home/wagoodman/awesome/place", "/home/wagoodman/awesome/place/thing.txt"} { @@ -455,24 +491,24 @@ func TestFileTree_Merge_DirOverride(t *testing.T) { t.Fatalf("somehow override path does not exist?") } - if n.FileType != file.TypeDir { + if n.FileNode.FileType != file.TypeDirectory { t.Errorf("did not override to dir") } } func TestFileTree_Merge_RemoveChildPathsOnOverride(t *testing.T) { - lowerTree := NewFileTree() + lowerTree := New() // add a file in the lower tree, which implicitly adds "/home/wagoodman/awesome/place" as a directory type lowerTree.AddFile("/home/wagoodman/awesome/place/thing.txt") - upperTree := NewFileTree() + upperTree := New() // add "/home/wagoodman/awesome/place" as a file type in the upper treee upperTree.AddFile("/home/wagoodman/awesome/place") // merge the upper tree into the lower tree - if err := lowerTree.merge(upperTree); err != nil { - t.Fatalf("error on merge : %+v", err) + if err := lowerTree.Merge(upperTree); err != nil { + t.Fatalf("error on Merge : %+v", err) } // the directory should still exist @@ -494,90 +530,302 @@ func TestFileTree_Merge_RemoveChildPathsOnOverride(t *testing.T) { t.Fatalf("somehow override path does not exist?") } - if fileNode.FileType != file.TypeReg { + if fileNode.FileNode.FileType != file.TypeRegular { t.Errorf("did not override to dir") } } +func TestFileTree_File_MultiSymlink(t *testing.T) { + var err error + tr := New() + + _, err = tr.AddSymLink("/home", "/link-to-1/link-to-place") + require.NoError(t, err) + + _, err = tr.AddSymLink("/link-to-1", "/1") + require.NoError(t, err) + + _, err = tr.AddDir("/1") + require.NoError(t, err) + + _, err = tr.AddFile("/2/real-file.txt") + require.NoError(t, err) + + _, err = tr.AddSymLink("/1/file.txt", "/2/real-file.txt") + require.NoError(t, err) + + _, err = tr.AddSymLink("/1/link-to-place", "/place") + require.NoError(t, err) + + _, err = tr.AddSymLink("/place/wagoodman/file.txt", "/link-to-1/file.txt") + require.NoError(t, err) + + // this is the current state of the filetree + // . + // ├── 1 + // │ ├── file.txt -> 2/real-file.txt + // │ └── link-to-place -> place + // ├── 2 + // │ └── real-file.txt + // ├── home -> link-to-1/link-to-place + // ├── link-to-1 -> 1 + // └── place + // └── wagoodman + // └── file.txt -> link-to-1/file.txt + + // request: /home/wagoodman/file.txt + // reference: /2/real-file.txt + // ancestor resolution: + // - /home -> /link-to-1/link-to-place + // - /link-to-1 -> /1 + // - /1/link-to-place -> /place + // leaf resolution: + // - /place/wagoodman/file.txt -> /link-to-1/file.txt + // - /link-to-1 -> /1 + // - /1/file.txt -> /2/real-file.txt + // path: + // - home -> link-to-1/link-to-place -> place + // - place/wagoodman + // - place/wagoodman/file.txt -> link-to-1/file.txt -> 1/file.txt -> 2/real-file.txt + + expected := &file.Resolution{ + RequestPath: "/home/wagoodman/file.txt", + Reference: &file.Reference{RealPath: "/2/real-file.txt"}, + LinkResolutions: []file.Resolution{ + { + RequestPath: "/place/wagoodman/file.txt", + Reference: &file.Reference{RealPath: "/place/wagoodman/file.txt"}, + }, + { + RequestPath: "/1/file.txt", + Reference: &file.Reference{RealPath: "/1/file.txt"}, + }, + }, + } + + requestPath := "/home/wagoodman/file.txt" + linkOptions := []LinkResolutionOption{FollowBasenameLinks} + _, ref, err := tr.File(file.Path(requestPath), linkOptions...) + require.NoError(t, err) + + // compare the remaining expectations, ignoring any reference IDs + ignoreIDs := cmpopts.IgnoreUnexported(file.Reference{}) + if d := cmp.Diff(expected, ref, ignoreIDs); d != "" { + t.Errorf("unexpected file reference (-want +got):\n%s", d) + } + +} + +func TestFileTree_File_MultiSymlink_deadlink(t *testing.T) { + var err error + tr := New() + + _, err = tr.AddSymLink("/home", "/link-to-1/link-to-place") + require.NoError(t, err) + + _, err = tr.AddSymLink("/link-to-1", "/1") + require.NoError(t, err) + + _, err = tr.AddDir("/1") + require.NoError(t, err) + + // causes the dead link + //_, err = tr.AddFile("/2/real-file.txt") + //require.NoError(t, err) + + _, err = tr.AddSymLink("/1/file.txt", "/2/real-file.txt") + require.NoError(t, err) + + _, err = tr.AddSymLink("/1/link-to-place", "/place") + require.NoError(t, err) + + _, err = tr.AddSymLink("/place/wagoodman/file.txt", "/link-to-1/file.txt") + require.NoError(t, err) + + // this is the current state of the filetree + // . + // ├── 1 + // │ ├── file.txt -> 2/real-file.txt + // │ └── link-to-place -> place + // ├── home -> link-to-1/link-to-place + // ├── link-to-1 -> 1 + // └── place + // └── wagoodman + // └── file.txt -> link-to-1/file.txt + + // request: /home/wagoodman/file.txt + // reference: /2/real-file.txt + // ancestor resolution: + // - /home -> /link-to-1/link-to-place + // - /link-to-1 -> /1 + // - /1/link-to-place -> /place + // leaf resolution: + // - /place/wagoodman/file.txt -> /link-to-1/file.txt + // - /link-to-1 -> /1 + // - /1/file.txt -> /2/real-file.txt + // path: + // - home -> link-to-1/link-to-place -> place + // - place/wagoodman + // - place/wagoodman/file.txt -> link-to-1/file.txt -> 1/file.txt -> 2/real-file.txt + + expected := &file.Resolution{ + RequestPath: "/home/wagoodman/file.txt", + Reference: &file.Reference{RealPath: "/1/file.txt"}, + LinkResolutions: []file.Resolution{ + { + RequestPath: "/place/wagoodman/file.txt", + Reference: &file.Reference{RealPath: "/place/wagoodman/file.txt"}, + }, + { + RequestPath: "/1/file.txt", + Reference: &file.Reference{RealPath: "/1/file.txt"}, + }, + { + RequestPath: "/2/real-file.txt", + //Reference: &file.Reference{RealPath: "/2/real-file.txt"}, + }, + }, + } + + requestPath := "/home/wagoodman/file.txt" + + { + linkOptions := []LinkResolutionOption{FollowBasenameLinks} + _, ref, err := tr.File(file.Path(requestPath), linkOptions...) + require.Nil(t, ref) + require.NoError(t, err) + } + + { + linkOptions := []LinkResolutionOption{FollowBasenameLinks, DoNotFollowDeadBasenameLinks} + _, ref, err := tr.File(file.Path(requestPath), linkOptions...) + require.NoError(t, err) + + // compare the remaining expectations, ignoring any reference IDs + ignoreIDs := cmpopts.IgnoreUnexported(file.Reference{}) + if d := cmp.Diff(expected, ref, ignoreIDs); d != "" { + t.Errorf("unexpected file reference (-want +got):\n%s", d) + } + } + +} + func TestFileTree_File_Symlink(t *testing.T) { tests := []struct { - name string - buildLinkSource file.Path // ln -s DEST - buildLinkDest file.Path // ln -s SOURCE - buildRealPath file.Path // a real file that should exist (or not if "") - linkOptions []LinkResolutionOption - requestPath file.Path // the path to check against - expectedExists bool // if the request path should exist or not - expectedResolvedPath file.Path // the expected path for a request result - expectedErr bool // if an error is expected from the request - expectedRealRef bool // if the resolved reference should match the built reference from "buildRealPath" + name string + buildLinkSource file.Path // ln -s DEST + buildLinkDest file.Path // ln -s SOURCE + buildRealPath file.Path // a real file that should exist (or not if "") + linkOptions []LinkResolutionOption + requestPath file.Path // the path to check against + expectedExists bool // if the request path should exist or not + expectedErr bool // if an error is expected from the request + expectedRealRef bool // if the resolved reference should match the built reference from "buildRealPath" + expected *file.Resolution }{ - /////////////// + /////////////////// { - name: "request base is ABSOLUTE symlink", - buildLinkSource: "/home", - buildLinkDest: "/another/place", - buildRealPath: "/another/place", - linkOptions: []LinkResolutionOption{FollowBasenameLinks}, - requestPath: "/home", - expectedExists: true, - expectedResolvedPath: "/another/place", + name: "request base is ABSOLUTE symlink", + buildLinkSource: "/home", + buildLinkDest: "/another/place", + buildRealPath: "/another/place", + linkOptions: []LinkResolutionOption{FollowBasenameLinks}, + requestPath: "/home", // /another/place is the "real" reference that we followed, so we should expect the IDs to match upon lookup expectedRealRef: true, + expectedExists: true, + expected: &file.Resolution{ + RequestPath: "/home", + Reference: &file.Reference{RealPath: "/another/place"}, + LinkResolutions: []file.Resolution{ + { + RequestPath: "/home", + Reference: &file.Reference{RealPath: "/home"}, + }, + }, + }, }, { - name: "request base is ABSOLUTE symlink", - buildLinkSource: "/home", - buildLinkDest: "/another/place", - buildRealPath: "/another/place", - linkOptions: []LinkResolutionOption{}, - requestPath: "/home", - expectedExists: true, - expectedResolvedPath: "/home", - // /home is just a symlink, not the real file (which is at /another/place) + name: "request base is ABSOLUTE symlink, request no link resolution", + buildLinkSource: "/home", + buildLinkDest: "/another/place", + buildRealPath: "/another/place", + linkOptions: []LinkResolutionOption{}, + requestPath: "/home", + // /home is just a symlink, not the real file (which is at /another/place)... and we've provided no symlink resolution expectedRealRef: false, + expectedExists: true, + expected: &file.Resolution{ + RequestPath: "/home", + Reference: &file.Reference{RealPath: "/home"}, + LinkResolutions: nil, + }, }, - /////////////// + ///////////////////// { - name: "request parent is ABSOLUTE symlink", - buildLinkSource: "/home", - buildLinkDest: "/another/place", - buildRealPath: "/another/place/wagoodman", - linkOptions: []LinkResolutionOption{FollowBasenameLinks}, // a nop for this case (note the expected path and ref) - requestPath: "/home/wagoodman", - expectedExists: true, - expectedResolvedPath: "/another/place/wagoodman", - expectedRealRef: true, + name: "request parent is ABSOLUTE symlink", + buildLinkSource: "/home", + buildLinkDest: "/another/place", + buildRealPath: "/another/place/wagoodman", + linkOptions: []LinkResolutionOption{FollowBasenameLinks}, // a nop for this case (note the expected path and ref) + requestPath: "/home/wagoodman", + expectedExists: true, + expectedRealRef: true, + expected: &file.Resolution{ + RequestPath: "/home/wagoodman", + Reference: &file.Reference{RealPath: "/another/place/wagoodman"}, + // note: the request is on the leaf, which is within a symlink, but is not a symlink itself. + // this means that all resolution is on the ancestors (thus not a link resolution on the leaf) + LinkResolutions: nil, + }, }, { - name: "request parent is ABSOLUTE symlink", - buildLinkSource: "/home", - buildLinkDest: "/another/place", - buildRealPath: "/another/place/wagoodman", - linkOptions: []LinkResolutionOption{}, // a nop for this case (note the expected path and ref) - requestPath: "/home/wagoodman", - expectedExists: true, - expectedResolvedPath: "/another/place/wagoodman", - expectedRealRef: true, + name: "request parent is ABSOLUTE symlink, request no link resolution", + buildLinkSource: "/home", + buildLinkDest: "/another/place", + buildRealPath: "/another/place/wagoodman", + linkOptions: []LinkResolutionOption{}, // a nop for this case (note the expected path and ref) + requestPath: "/home/wagoodman", + expectedExists: true, + expectedRealRef: true, + // why are we seeing a result that requires link resolution but we've requested no link resolution? + // because there is always ancestor link resolution by default, and this example is only via + // ancestors, thus the leaf is still resolved (since it doesn't have a link). + expected: &file.Resolution{ + RequestPath: "/home/wagoodman", + Reference: &file.Reference{RealPath: "/another/place/wagoodman"}, + // note: the request is on the leaf, which is within a symlink, but is not a symlink itself. + // this means that all resolution is on the ancestors (thus not a link resolution on the leaf) + LinkResolutions: nil, + }, }, - /////////////// + ///////////////// { - name: "request base is RELATIVE symlink", - buildLinkSource: "/home", - buildLinkDest: "../../another/place", - buildRealPath: "/another/place", - linkOptions: []LinkResolutionOption{FollowBasenameLinks}, - requestPath: "/home", - expectedExists: true, - expectedResolvedPath: "/another/place", - expectedRealRef: true, + name: "request base is RELATIVE symlink", + buildLinkSource: "/home", + buildLinkDest: "../../another/place", + buildRealPath: "/another/place", + linkOptions: []LinkResolutionOption{FollowBasenameLinks}, + requestPath: "/home", + expectedExists: true, + expectedRealRef: true, + expected: &file.Resolution{ + RequestPath: "/home", + Reference: &file.Reference{RealPath: "/another/place"}, + LinkResolutions: []file.Resolution{ + { + RequestPath: "/home", + Reference: &file.Reference{RealPath: "/home"}, + }, + }, + }, }, { - name: "request base is RELATIVE symlink", + name: "request base is RELATIVE symlink, no link resolution requested", buildLinkSource: "/home", buildLinkDest: "../../another/place/wagoodman", buildRealPath: "/another/place/wagoodman", @@ -585,42 +833,62 @@ func TestFileTree_File_Symlink(t *testing.T) { requestPath: "/home", expectedExists: true, // note that since the request matches the link source and we are NOT following, we get the link ref back - expectedResolvedPath: "/home", - expectedRealRef: false, + expectedRealRef: false, + expected: &file.Resolution{ + RequestPath: "/home", + Reference: &file.Reference{RealPath: "/home"}, + LinkResolutions: nil, + }, }, - /////////////// + ///////////////// { - name: "request parent is RELATIVE symlink", - buildLinkSource: "/home", - buildLinkDest: "../../another/place", - buildRealPath: "/another/place/wagoodman", - linkOptions: []LinkResolutionOption{FollowBasenameLinks}, // this is a nop since the parent is a link - requestPath: "/home/wagoodman", - expectedExists: true, - expectedResolvedPath: "/another/place/wagoodman", - expectedRealRef: true, - }, - { - name: "request parent is RELATIVE symlink", - buildLinkSource: "/home", - buildLinkDest: "../../another/place", - buildRealPath: "/another/place/wagoodman", - linkOptions: []LinkResolutionOption{}, // this is a nop since the parent is a link - requestPath: "/home/wagoodman", - expectedExists: true, - expectedResolvedPath: "/another/place/wagoodman", - expectedRealRef: true, + name: "request parent is RELATIVE symlink", + buildLinkSource: "/home", + buildLinkDest: "../../another/place", + buildRealPath: "/another/place/wagoodman", + linkOptions: []LinkResolutionOption{FollowBasenameLinks}, // this is a nop since the parent is a link + requestPath: "/home/wagoodman", + expectedExists: true, + expectedRealRef: true, + expected: &file.Resolution{ + RequestPath: "/home/wagoodman", + Reference: &file.Reference{RealPath: "/another/place/wagoodman"}, + // note: the request is on the leaf, which is within a symlink, but is not a symlink itself. + // (the symlink is for an ancestor... so we don't show link resolutions) + LinkResolutions: nil, + }, + }, + { + name: "request parent is RELATIVE symlink, no link resolution requested", + buildLinkSource: "/home", + buildLinkDest: "../../another/place", + buildRealPath: "/another/place/wagoodman", + linkOptions: []LinkResolutionOption{}, // this is a nop since the parent is a link + requestPath: "/home/wagoodman", + expectedExists: true, + expectedRealRef: true, + expected: &file.Resolution{ + RequestPath: "/home/wagoodman", + Reference: &file.Reference{RealPath: "/another/place/wagoodman"}, + // note: the request is on the leaf, which is within a symlink, but is not a symlink itself. + // (the symlink is for an ancestor... so we don't show link resolutions) + LinkResolutions: nil, + }, }, /////////////// { - name: "request base is DEAD symlink", + name: "request base is DEAD symlink, request no link resolution", buildLinkSource: "/home", buildLinkDest: "/mwahaha/i/go/to/nowhere", linkOptions: []LinkResolutionOption{}, requestPath: "/home", // since we did not follow, the paths should exist to the symlink file - expectedResolvedPath: "/home", - expectedExists: true, + expectedExists: true, + expected: &file.Resolution{ + RequestPath: "/home", + Reference: &file.Reference{RealPath: "/home"}, + LinkResolutions: nil, + }, }, { name: "request base is DEAD symlink", @@ -629,8 +897,8 @@ func TestFileTree_File_Symlink(t *testing.T) { linkOptions: []LinkResolutionOption{FollowBasenameLinks}, requestPath: "/home", // we are following the path, which goes to nowhere.... the first failed path is resolved and returned - expectedResolvedPath: "/mwahaha", - expectedExists: false, + expectedExists: false, + expected: nil, }, { name: "request base is DEAD symlink (which we don't follow)", @@ -639,38 +907,59 @@ func TestFileTree_File_Symlink(t *testing.T) { linkOptions: []LinkResolutionOption{FollowBasenameLinks, DoNotFollowDeadBasenameLinks}, requestPath: "/home", // we are following the path, which goes to nowhere.... the first failed path is resolved and returned - expectedResolvedPath: "/home", - expectedExists: true, + expectedExists: true, + expected: &file.Resolution{ + RequestPath: "/home", + Reference: &file.Reference{RealPath: "/home"}, + LinkResolutions: []file.Resolution{ + { + RequestPath: "/home", + Reference: &file.Reference{RealPath: "/home"}, + }, + // this entry represents the dead symlink, note there is no file reference to fetch from the catalog + { + RequestPath: "/mwahaha/i/go/to/nowhere", + }, + }, + }, }, - /////////////// + ///////////////// // trying to resolve to above root { - name: "request parent is RELATIVE symlink to ABOVE root", - buildLinkSource: "/home", - buildLinkDest: "../../../../../../../../../../../../another/place", - buildRealPath: "/another/place/wagoodman", - linkOptions: []LinkResolutionOption{FollowBasenameLinks}, // this is a nop since the parent is a link - requestPath: "/home/wagoodman", - expectedExists: true, - expectedResolvedPath: "/another/place/wagoodman", - expectedRealRef: true, + name: "request parent is RELATIVE symlink to ABOVE root", + buildLinkSource: "/home", + buildLinkDest: "../../../../../../../../../../../../another/place", + buildRealPath: "/another/place/wagoodman", + linkOptions: []LinkResolutionOption{FollowBasenameLinks}, // this is a nop since the parent is a link + requestPath: "/home/wagoodman", + expectedExists: true, + expectedRealRef: true, + expected: &file.Resolution{ + RequestPath: "/home/wagoodman", + Reference: &file.Reference{RealPath: "/another/place/wagoodman"}, + LinkResolutions: nil, + }, }, { - name: "request parent is RELATIVE symlink to ABOVE root", - buildLinkSource: "/home", - buildLinkDest: "../../../../../../../../../../../../another/place", - buildRealPath: "/another/place/wagoodman", - linkOptions: []LinkResolutionOption{}, // this is a nop since the parent is a link - requestPath: "/home/wagoodman", - expectedExists: true, - expectedResolvedPath: "/another/place/wagoodman", - expectedRealRef: true, + name: "request parent is RELATIVE symlink to ABOVE root", + buildLinkSource: "/home", + buildLinkDest: "../../../../../../../../../../../../another/place", + buildRealPath: "/another/place/wagoodman", + linkOptions: []LinkResolutionOption{}, // this is a nop since the parent is a link + requestPath: "/home/wagoodman", + expectedExists: true, + expectedRealRef: true, + expected: &file.Resolution{ + RequestPath: "/home/wagoodman", + Reference: &file.Reference{RealPath: "/another/place/wagoodman"}, + LinkResolutions: nil, + }, }, } for _, test := range tests { - t.Run(fmt.Sprintf("%s (follow=%+v)", test.name, test.linkOptions), func(t *testing.T) { - tr := NewFileTree() + t.Run(test.name, func(t *testing.T) { + tr := New() _, err := tr.AddSymLink(test.buildLinkSource, test.buildLinkDest) if err != nil { t.Fatalf("unexpected an error on add link: %+v", err) @@ -700,29 +989,24 @@ func TestFileTree_File_Symlink(t *testing.T) { t.Fatalf("expected path to exist, but does NOT") } - // validate ref... - if realRef != nil && ref != nil { - // validate path... - if ref.RealPath != test.expectedResolvedPath { - t.Fatalf("unexpected path difference: %+v != %v", ref.RealPath, test.expectedResolvedPath) - } + // validate the resolved reference against the real reference added to the tree + if !test.expectedRealRef && ref.HasReference() && realRef != nil && ref.ID() == realRef.ID() { + t.Errorf("refs should not be the same: resolve(%+v) == reaal(%+v)", ref, realRef) + } else if test.expectedRealRef && ref.ID() != realRef.ID() { + t.Errorf("refs should be the same: resolve(%+v) != real(%+v)", ref, realRef) + } - if ref.ID() == realRef.ID() && !test.expectedRealRef { - t.Errorf("refs should not be the same: resolve(%+v) == reaal(%+v)", ref, realRef) - } else if ref.ID() != realRef.ID() && test.expectedRealRef { - t.Errorf("refs should be the same: resolve(%+v) != real(%+v)", ref, realRef) - } - } else { - if test.expectedRealRef { - t.Fatalf("expected to test a real reference, but could not") - } + // compare the remaining expectations, ignoring any reference IDs + ignoreIDs := cmpopts.IgnoreUnexported(file.Reference{}) + if d := cmp.Diff(test.expected, ref, ignoreIDs); d != "" { + t.Errorf("unexpected file reference (-want +got):\n%s", d) } }) } } func TestFileTree_File_MultipleIndirections(t *testing.T) { - tr := NewFileTree() + tr := New() // first indirection _, err := tr.AddSymLink("/home", "/another/place") if err != nil { @@ -763,7 +1047,7 @@ func TestFileTree_File_MultipleIndirections(t *testing.T) { } func TestFileTree_File_CycleDetection(t *testing.T) { - tr := NewFileTree() + tr := New() // first indirection _, err := tr.AddSymLink("/home", "/another/place") if err != nil { @@ -789,7 +1073,7 @@ func TestFileTree_File_CycleDetection(t *testing.T) { } func TestFileTree_File_DeadCycleDetection(t *testing.T) { - tr := NewFileTree() + tr := New() _, err := tr.AddSymLink("/somewhere/acorn", "noobaa-core/../acorn/bin/acorn") require.NoError(t, err) @@ -806,7 +1090,7 @@ func TestFileTree_File_DeadCycleDetection(t *testing.T) { } func TestFileTree_AllFiles(t *testing.T) { - tr := NewFileTree() + tr := New() paths := []string{ "/home/a-file.txt", @@ -816,30 +1100,26 @@ func TestFileTree_AllFiles(t *testing.T) { for _, p := range paths { _, err := tr.AddFile(file.Path(p)) - if err != nil { - t.Fatalf("failed to add path ('%s'): %+v", p, err) - } + require.NoError(t, err) } var err error + var f *file.Reference // dir - _, err = tr.AddDir("/home") - if err != nil { - t.Fatalf("could not setup dir: %+v", err) - } + f, err = tr.AddDir("/home") + require.NotNil(t, f) + require.NoError(t, err) // relative symlink - _, err = tr.AddSymLink("/home/symlink", "../../../sym-linked-dest") - if err != nil { - t.Fatalf("could not setup link: %+v", err) - } + f, err = tr.AddSymLink("/home/symlink", "../../../sym-linked-dest") + require.NotNil(t, f) + require.NoError(t, err) // hardlink - _, err = tr.AddHardLink("/home/hardlink", "/hard-linked-dest") - if err != nil { - t.Fatalf("could not setup link: %+v", err) - } + f, err = tr.AddHardLink("/home/hardlink", "/hard-linked-dest") + require.NotNil(t, f) + require.NoError(t, err) tests := []struct { name string @@ -853,7 +1133,7 @@ func TestFileTree_AllFiles(t *testing.T) { }, { name: "reg", - types: []file.Type{file.TypeReg}, + types: []file.Type{file.TypeRegular}, expected: []string{"/home/a-file.txt", "/sym-linked-dest/a-.gif", "/hard-linked-dest/b-.gif"}, }, { @@ -863,17 +1143,17 @@ func TestFileTree_AllFiles(t *testing.T) { }, { name: "symlink", - types: []file.Type{file.TypeSymlink}, + types: []file.Type{file.TypeSymLink}, expected: []string{"/home/symlink"}, }, { name: "multiple", - types: []file.Type{file.TypeReg, file.TypeSymlink}, + types: []file.Type{file.TypeRegular, file.TypeSymLink}, expected: []string{"/home/a-file.txt", "/sym-linked-dest/a-.gif", "/hard-linked-dest/b-.gif", "/home/symlink"}, }, { name: "dir", - types: []file.Type{file.TypeDir}, + types: []file.Type{file.TypeDirectory}, // note: only explicitly added directories exist in the catalog expected: []string{"/home"}, }, diff --git a/pkg/filetree/glob.go b/pkg/filetree/glob.go index 0a650c6f..a99ad154 100644 --- a/pkg/filetree/glob.go +++ b/pkg/filetree/glob.go @@ -19,13 +19,6 @@ var _ fs.FS = (*osAdapter)(nil) var _ fs.FileInfo = (*fileinfoAdapter)(nil) var _ fs.DirEntry = (*fileinfoAdapter)(nil) -type GlobResult struct { - MatchPath file.Path - RealPath file.Path - IsDeadLink bool - Reference file.Reference -} - // fileAdapter is an object meant to implement the doublestar.File for getting Lstat results for an entire directory. type fileAdapter struct { os *osAdapter @@ -52,14 +45,16 @@ func isInPathResolutionLoop(path string, ft *FileTree) (bool, error) { allPathSet := file.NewPathSet() allPaths := file.Path(path).AllPaths() for _, p := range allPaths { - fn, err := ft.node(p, linkResolutionStrategy{ + fna, err := ft.node(p, linkResolutionStrategy{ FollowAncestorLinks: true, FollowBasenameLinks: true, }) if err != nil { return false, err } - allPathSet.Add(file.Path(fn.ID())) + if fna.HasFileNode() { + allPathSet.Add(file.Path(fna.FileNode.ID())) + } } // we want to allow for getting children out of the first iteration of a infinite path, but NOT allowing // beyond the second iteration down an infinite path. @@ -93,23 +88,23 @@ func (f *fileAdapter) ReadDir(n int) ([]fs.DirEntry, error) { return nil, os.ErrInvalid } var ret = make([]fs.DirEntry, 0) - fn, err := f.filetree.node(file.Path(f.name), linkResolutionStrategy{ + fna, err := f.filetree.node(file.Path(f.name), linkResolutionStrategy{ FollowAncestorLinks: true, FollowBasenameLinks: true, }) if err != nil { return ret, err } - if fn == nil { + if !fna.HasFileNode() { return ret, nil } - isInPathResolutionLoop, err := isInPathResolutionLoop(f.name, f.filetree) - if err != nil || isInPathResolutionLoop { + isInLoop, err := isInPathResolutionLoop(f.name, f.filetree) + if err != nil || isInLoop { return ret, err } - for idx, child := range f.filetree.tree.Children(fn) { + for idx, child := range f.filetree.tree.Children(fna.FileNode) { if idx == n && n != -1 { break } @@ -132,23 +127,23 @@ type osAdapter struct { func (a *osAdapter) ReadDir(name string) ([]fs.DirEntry, error) { var ret = make([]fs.DirEntry, 0) - fn, err := a.filetree.node(file.Path(name), linkResolutionStrategy{ + fna, err := a.filetree.node(file.Path(name), linkResolutionStrategy{ FollowAncestorLinks: true, FollowBasenameLinks: true, }) if err != nil { return ret, err } - if fn == nil { + if !fna.HasFileNode() { return ret, nil } - isInPathResolutionLoop, err := isInPathResolutionLoop(name, a.filetree) - if err != nil || isInPathResolutionLoop { + isInLoop, err := isInPathResolutionLoop(name, a.filetree) + if err != nil || isInLoop { return ret, err } - for _, child := range a.filetree.tree.Children(fn) { + for _, child := range a.filetree.tree.Children(fna.FileNode) { requestPath := path.Join(name, filepath.Base(string(child.ID()))) r, err := a.Lstat(requestPath) if err == nil { @@ -164,7 +159,7 @@ func (a *osAdapter) ReadDir(name string) ([]fs.DirEntry, error) { // Lstat returns a FileInfo describing the named file. If the file is a symbolic link, the returned // FileInfo describes the symbolic link. Lstat makes no attempt to follow the link. func (a *osAdapter) Lstat(name string) (fs.FileInfo, error) { - fn, err := a.filetree.node(file.Path(name), linkResolutionStrategy{ + fna, err := a.filetree.node(file.Path(name), linkResolutionStrategy{ FollowAncestorLinks: true, // Lstat by definition requires that basename symlinks are not followed FollowBasenameLinks: false, @@ -173,13 +168,13 @@ func (a *osAdapter) Lstat(name string) (fs.FileInfo, error) { if err != nil { return &fileinfoAdapter{}, err } - if fn == nil { + if !fna.HasFileNode() { return &fileinfoAdapter{}, os.ErrNotExist } return &fileinfoAdapter{ VirtualPath: file.Path(name), - Node: *fn, + Node: *fna.FileNode, }, nil } @@ -194,7 +189,7 @@ func (a *osAdapter) Open(name string) (fs.File, error) { // Stat returns a FileInfo describing the named file. func (a *osAdapter) Stat(name string) (fs.FileInfo, error) { - fn, err := a.filetree.node(file.Path(name), linkResolutionStrategy{ + fna, err := a.filetree.node(file.Path(name), linkResolutionStrategy{ FollowAncestorLinks: true, FollowBasenameLinks: true, DoNotFollowDeadBasenameLinks: a.doNotFollowDeadBasenameLinks, @@ -202,12 +197,12 @@ func (a *osAdapter) Stat(name string) (fs.FileInfo, error) { if err != nil { return &fileinfoAdapter{}, err } - if fn == nil { + if !fna.HasFileNode() { return &fileinfoAdapter{}, os.ErrNotExist } return &fileinfoAdapter{ VirtualPath: file.Path(name), - Node: *fn, + Node: *fna.FileNode, }, nil } @@ -248,7 +243,7 @@ func (a *fileinfoAdapter) Mode() os.FileMode { // the underlying implementation for symlinks and hardlinks share the same semantics in the tree implementation // (meaning resolution is required) where as in a real file system this is taken care of by the driver // by making the file point to the same inode as another --making the indirection transparent to applications. - if a.Node.FileType == file.TypeSymlink || a.Node.FileType == file.TypeHardLink { + if a.Node.FileType == file.TypeSymLink || a.Node.FileType == file.TypeHardLink { mode |= os.ModeSymlink } return mode @@ -261,7 +256,7 @@ func (a *fileinfoAdapter) ModTime() time.Time { // IsDir is an abbreviation for Mode().IsDir(). func (a *fileinfoAdapter) IsDir() bool { - return a.Node.FileType == file.TypeDir + return a.Node.FileType == file.TypeDirectory } // Sys contains underlying data source (nothing in this case). diff --git a/pkg/filetree/glob_parser.go b/pkg/filetree/glob_parser.go new file mode 100644 index 00000000..5f10acbd --- /dev/null +++ b/pkg/filetree/glob_parser.go @@ -0,0 +1,371 @@ +package filetree + +import ( + "regexp" + "strings" +) + +const ( + // searchByGlob is the default, unparsed/processed glob value searched directly against the filetree. + searchByGlob searchBasis = iota + + // searchByFullPath indicates that the given glob value is not a glob, thus a (simpler) path lookup against the filetree should be performed as the search. + searchByFullPath + + // searchByExtension indicates cases like "**/*.py" where the only specific glob element indicates the file or directory extension. + searchByExtension + + // searchByBasename indicates cases like "**/bin/python" where the only specific glob element indicates the file or directory basename (e.g. "python"). + searchByBasename + + // searchByBasenameGlob indicates cases like "**/bin/python*" where the search space is limited to the full set of all basenames that match the given glob. + searchByBasenameGlob + + // searchBySubDirectory indicates cases like "**/var/lib/dpkg/status.d/*" where we're interested in selecting all files within a directory (but not the directory itself). + searchBySubDirectory +) + +type searchBasis int + +func (s searchBasis) String() string { + switch s { + case searchByGlob: + return "glob" + case searchByFullPath: + return "full-path" + case searchByExtension: + return "extension" + case searchByBasename: + return "basename" + case searchByBasenameGlob: + return "basename-glob" + case searchBySubDirectory: + return "subdirectory" + } + return "unknown search basis" +} + +type searchRequest struct { + searchBasis + value string + requirement string +} + +func (s searchRequest) String() string { + value := s.searchBasis.String() + ": " + s.value + if s.requirement != "" { + value += " (requirement: " + s.requirement + ")" + } + return value +} + +func parseGlob(glob string) []searchRequest { + glob = cleanGlob(glob) + + if !strings.ContainsAny(glob, "*?[]{}") { + return []searchRequest{ + { + searchBasis: searchByFullPath, + value: glob, + }, + } + } + + beforeBasename, basename := splitAtBasename(glob) + + if basename == "*" { + _, nestedBasename := splitAtBasename(beforeBasename) + if !strings.ContainsAny(nestedBasename, "*?[]{}") { + // special case: glob is a parent glob + requests := []searchRequest{ + { + searchBasis: searchBySubDirectory, + value: nestedBasename, + requirement: beforeBasename, + }, + } + return requests + } + } + + requests := parseGlobBasename(basename) + for i := range requests { + applyRequirement(&requests[i], beforeBasename, glob) + } + + return requests +} + +func splitAtBasename(glob string) (string, string) { + // TODO: need to correctly avoid indexes within [] and {} groups + basenameSplitAt := strings.LastIndex(glob, "/") + + var basename string + var beforeBasename string + if basenameSplitAt == -1 { + // note: this has no glob path prefix, thus no requirement... + // this can only be a basename, basename glob, or extension + basename = glob + beforeBasename = "" + } else if basenameSplitAt < len(glob)-1 { + basename = glob[basenameSplitAt+1:] + } + + if basenameSplitAt >= 0 && basenameSplitAt < len(glob)-1 { + beforeBasename = glob[:basenameSplitAt] + } + + return beforeBasename, basename +} + +func applyRequirement(request *searchRequest, beforeBasename, glob string) { + var requirement string + + if beforeBasename != "" { + requirement = glob + switch beforeBasename { + case "**", request.requirement: + if request.searchBasis != searchByExtension { + requirement = "" + } + } + } else { + requirement = "" + } + + request.requirement = requirement + + if request.searchBasis == searchByGlob { + request.value = glob + if glob == request.requirement { + request.requirement = "" + } + } +} + +func parseGlobBasename(basenameInput string) []searchRequest { + if strings.ContainsAny(basenameInput, "[]{}") { + return parseBasenameAltAndClassGlobSections(basenameInput) + } + + extensionFields := strings.Split(basenameInput, "*.") + if len(extensionFields) == 2 && extensionFields[0] == "" { + possibleExtension := extensionFields[1] + if !strings.ContainsAny(possibleExtension, "*?") { + // special case, this is plain extension + return []searchRequest{ + { + searchBasis: searchByExtension, + value: "." + possibleExtension, + }, + } + } + } + + if !strings.ContainsAny(basenameInput, "*?") { + // special case, this is plain basename + return []searchRequest{ + { + searchBasis: searchByBasename, + value: basenameInput, + }, + } + } + + if strings.ReplaceAll(strings.ReplaceAll(basenameInput, "?", ""), "*", "") == "" { + // special case, this is a glob that is only asterisks... do not process! + return []searchRequest{ + { + searchBasis: searchByGlob, + // note: we let the parent caller attach the full glob value + }, + } + } + + return []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: basenameInput, + }, + } +} + +func parseBasenameAltAndClassGlobSections(basenameInput string) []searchRequest { + // TODO: process escape sequences + + altStartCount := strings.Count(basenameInput, "{") + altEndCount := strings.Count(basenameInput, "}") + classStartCount := strings.Count(basenameInput, "[") + classEndCount := strings.Count(basenameInput, "]") + + if altStartCount != altEndCount || classStartCount != classEndCount { + // imbalanced braces, this is not a valid glob relative to just the basename + return []searchRequest{ + { + searchBasis: searchByGlob, + // note: we let the parent caller attach the full glob value + }, + } + } + + if classStartCount > 0 { + // parsing this is not supported at this time + return []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: basenameInput, + }, + } + } + + // if the glob is the simplest list form, them allow for breaking into sub-searches + if altStartCount == 1 { + indexStartIsPrefix := strings.Index(basenameInput, "{") == 0 + indexEndIsSuffix := strings.Index(basenameInput, "}") == len(basenameInput)-1 + if indexStartIsPrefix && indexEndIsSuffix { + // this is a simple list, split it up + // e.g. {a,b,c} -> a, b, c + altSections := strings.Split(basenameInput[1:len(basenameInput)-1], ",") + if len(altSections) > 1 { + var requests []searchRequest + for _, altSection := range altSections { + basis := searchByBasename + if strings.ContainsAny(altSection, "*?") { + basis = searchByBasenameGlob + } + + requests = append(requests, searchRequest{ + searchBasis: basis, + value: altSection, + }) + } + return requests + } + } + } + + // there is some sort of alt usage, but it is not a simple list... just treat it as a glob + return []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: basenameInput, + }, + } +} + +func cleanGlob(glob string) string { + glob = strings.TrimSpace(glob) + glob = removeRedundantCountGlob(glob, '/', 1) + glob = removeRedundantCountGlob(glob, '*', 2) + if len(glob) > 1 { + // input case: / + // then preserve the slash + glob = strings.TrimRight(glob, "/") + } + // e.g. replace "/bar**/" with "/bar*/" + glob = simplifyMultipleGlobAsterisks(glob) + glob = simplifyGlobRecursion(glob) + return glob +} + +func simplifyMultipleGlobAsterisks(glob string) string { + // this will replace any recursive globs (**) that are not clearly indicating recursive tree searches with a single * + + var sb strings.Builder + var asteriskBuff strings.Builder + var withinRecursiveStreak bool + + for idx, c := range glob { + isAsterisk := c == '*' + isSlash := c == '/' + + // special case, this is the first character in the glob and it is an asterisk... + // treat this like a recursive streak + if idx == 0 && isAsterisk { + withinRecursiveStreak = true + asteriskBuff.WriteRune(c) + continue + } + + if isAsterisk { + asteriskBuff.WriteRune(c) + continue + } + + if isSlash { + if withinRecursiveStreak { + // this is a confirmed recursive streak + // keep all asterisks! + sb.WriteString(asteriskBuff.String()) + asteriskBuff.Reset() + } + + if asteriskBuff.Len() > 0 { + // this is NOT a recursive streak, but there are asterisks + // keep only one asterisk + sb.WriteRune('*') + asteriskBuff.Reset() + } + + // this is potentially a new streak... + withinRecursiveStreak = true + } else { + // ... and this is NOT a recursive streak + if asteriskBuff.Len() > 0 { + // ... keep only one asterisk, since it's not recursive + sb.WriteRune('*') + } + asteriskBuff.Reset() + withinRecursiveStreak = false + } + + sb.WriteRune(c) + } + + if asteriskBuff.Len() > 0 { + if withinRecursiveStreak { + sb.WriteString(asteriskBuff.String()) + } else { + sb.WriteRune('*') + } + } + + return sb.String() +} + +var globRecursionRightPattern = regexp.MustCompile(`(\*\*/?)+`) + +func simplifyGlobRecursion(glob string) string { + // this function assumes that all redundant asterisks have been removed (e.g. /****/ -> /**/) + // and that all seemingly recursive globs have been replaced with a single asterisk (e.g. /bar**/ -> /bar*/) + glob = globRecursionRightPattern.ReplaceAllString(glob, "**/") + glob = strings.ReplaceAll(glob, "//", "/") + if strings.HasPrefix(glob, "/**/") { + glob = strings.TrimPrefix(glob, "/") + } + if len(glob) > 1 { + // input case: /** + // then preserve the slash + glob = strings.TrimRight(glob, "/") + } + return glob +} + +func removeRedundantCountGlob(glob string, val rune, count int) string { + var sb strings.Builder + + var streak int + for _, c := range glob { + if c == val { + streak++ + if streak > count { + continue + } + } else { + streak = 0 + } + + sb.WriteRune(c) + } + return sb.String() +} diff --git a/pkg/filetree/glob_parser_test.go b/pkg/filetree/glob_parser_test.go new file mode 100644 index 00000000..64ea8380 --- /dev/null +++ b/pkg/filetree/glob_parser_test.go @@ -0,0 +1,663 @@ +package filetree + +import ( + "github.com/stretchr/testify/assert" + "testing" +) + +func Test_parseGlob(t *testing.T) { + + tests := []struct { + name string + glob string + want []searchRequest + }{ + { + name: "relative path", + glob: "foo/bar/basename.txt", + want: []searchRequest{ + { + searchBasis: searchByFullPath, + value: "foo/bar/basename.txt", + }, + }, + }, + { + name: "absolute path", + glob: "/foo/bar/basename.txt", + want: []searchRequest{ + { + searchBasis: searchByFullPath, + value: "/foo/bar/basename.txt", + }, + }, + }, + { + name: "extension", + glob: "*.txt", + want: []searchRequest{ + { + searchBasis: searchByExtension, + value: ".txt", + }, + }, + }, + { + name: "extension anywhere", + glob: "**/*.txt", + want: []searchRequest{ + { + searchBasis: searchByExtension, + value: ".txt", + requirement: "**/*.txt", + }, + }, + }, + { + name: "basename glob search with requirement", + glob: "bas*nam?.txt", + want: []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: "bas*nam?.txt", + }, + }, + }, + { + name: "extension with path requirement", + glob: "foo/bar/**/*.txt", + want: []searchRequest{ + { + searchBasis: searchByExtension, + value: ".txt", + requirement: "foo/bar/**/*.txt", + }, + }, + }, + { + name: "basename but without a path prefix", + glob: "basename.txt", + want: []searchRequest{ + { + searchBasis: searchByFullPath, + value: "basename.txt", + }, + }, + }, + { + name: "basename anywhere", + glob: "**/basename.txt", + want: []searchRequest{ + { + searchBasis: searchByBasename, + value: "basename.txt", + }, + }, + }, + { + name: "basename with requirement", + glob: "foo/b*/basename.txt", + want: []searchRequest{ + { + searchBasis: searchByBasename, + value: "basename.txt", + requirement: "foo/b*/basename.txt", + }, + }, + }, + { + name: "basename glob", + glob: "basename.*", + want: []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: "basename.*", + }, + }, + }, + { + name: "basename glob with requirement", + glob: "**/foo/bar/basename.*", + want: []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: "basename.*", + requirement: "**/foo/bar/basename.*", + }, + }, + }, + { + name: "basename wildcard glob with requirement", + glob: "**/foo/bar/basenam?.txt", + want: []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: "basenam?.txt", + requirement: "**/foo/bar/basenam?.txt", + }, + }, + }, + { + name: "glob classes within a basename", + glob: "**/foo/bar/basena[me][me].txt", + want: []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: "basena[me][me].txt", + requirement: "**/foo/bar/basena[me][me].txt", + }, + }, + }, + { + name: "glob classes within a the path", + glob: "**/foo/[bB]ar/basena[me][me].txt", + want: []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: "basena[me][me].txt", + requirement: "**/foo/[bB]ar/basena[me][me].txt", + }, + }, + }, + { + name: "alt clobbers basename extraction", + glob: "**/foo/bar/{nested/basena[me][me].txt,another.txt}", + want: []searchRequest{ + { + searchBasis: searchByGlob, + value: "**/foo/bar/{nested/basena[me][me].txt,another.txt}", + }, + }, + }, + { + name: "class clobbers basename extraction", + glob: "**/foo/bar/[me][m/e].txt,another.txt", + want: []searchRequest{ + { + searchBasis: searchByGlob, + value: "**/foo/bar/[me][m/e].txt,another.txt", + }, + }, + }, + { + name: "match alternative matches in the basename", + glob: "**/var/lib/rpm/{Packages,Packages.db,rpmdb.sqlite}", + want: []searchRequest{ + { + searchBasis: searchByBasename, + value: "Packages", + requirement: "**/var/lib/rpm/{Packages,Packages.db,rpmdb.sqlite}", + }, + { + searchBasis: searchByBasename, + value: "Packages.db", + requirement: "**/var/lib/rpm/{Packages,Packages.db,rpmdb.sqlite}", + }, + { + searchBasis: searchByBasename, + value: "rpmdb.sqlite", + requirement: "**/var/lib/rpm/{Packages,Packages.db,rpmdb.sqlite}", + }, + }, + }, + { + name: "match fallback to glob search on non-simple alternatives", + glob: "**/var/lib/rpm/{Packa{ges}{GES},Packages.db,rpmdb.sqlite}", + want: []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: "{Packa{ges}{GES},Packages.db,rpmdb.sqlite}", + requirement: "**/var/lib/rpm/{Packa{ges}{GES},Packages.db,rpmdb.sqlite}", + }, + }, + }, + { + name: "dynamic extraction of basename and basename glob for alternatives", + glob: "**/var/lib/rpm/{Pack???s,Packages.db,rpm*.sqlite}", + want: []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: "Pack???s", + requirement: "**/var/lib/rpm/{Pack???s,Packages.db,rpm*.sqlite}", + }, + { + searchBasis: searchByBasename, + value: "Packages.db", + requirement: "**/var/lib/rpm/{Pack???s,Packages.db,rpm*.sqlite}", + }, + { + searchBasis: searchByBasenameGlob, + value: "rpm*.sqlite", + requirement: "**/var/lib/rpm/{Pack???s,Packages.db,rpm*.sqlite}", + }, + }, + }, + { + name: "fallback to full glob search", + glob: "**/foo/bar/**?/**", + want: []searchRequest{ + { + searchBasis: searchByGlob, + value: "**/foo/bar/*?/**", + }, + }, + }, + { + name: "use parent basename for directory contents", + glob: "**/foo/bar/*", + want: []searchRequest{ + { + searchBasis: searchBySubDirectory, + value: "bar", + requirement: "**/foo/bar", + }, + }, + }, + // special cases + { + name: "empty string", + glob: "", + want: []searchRequest{ + { + searchBasis: searchByFullPath, + }, + }, + }, + { + name: "only a slash", + glob: "/", + want: []searchRequest{ + { + searchBasis: searchByFullPath, + value: "/", + }, + }, + }, + { + name: "cleanup to single slash", + glob: "///", + want: []searchRequest{ + { + searchBasis: searchByFullPath, + value: "/", + }, + }, + }, + { + name: "ends with slash", + glob: "/foo/b*r/", + want: []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: "b*r", + requirement: "/foo/b*r", // note that the slash is removed since this should be a clean path + }, + }, + }, + { + name: "ends with *", + glob: "**/foo/b*r/*", + want: []searchRequest{ + { + searchBasis: searchByGlob, + value: "**/foo/b*r/*", + }, + }, + }, + { + name: "ends with ***", + glob: "**/foo/b*r/**", + want: []searchRequest{ + { + searchBasis: searchByGlob, + value: "**/foo/b*r/**", + }, + }, + }, + { + name: "spaces around everything", + glob: " /foo/b*r/ .txt ", + want: []searchRequest{ + { + searchBasis: searchByBasename, + value: " .txt", // note the space + requirement: "/foo/b*r/ .txt", // note the space in the middle, but otherwise clean on the front and back + }, + }, + }, + { + name: "fallback to full glob search", + glob: "**/foo/bar/***.*****.******", + want: []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: "*.*.*", // note that the basename glob is cleaned up + requirement: "**/foo/bar/*.*.*", // note that the glob is cleaned up + }, + }, + }, + { + name: "odd glob input still honors basename searches", + glob: "**/foo/**.***.****bar/***thin*.txt", + want: []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: "*thin*.txt", // note that the basename glob is cleaned up + requirement: "**/foo/*.*.*bar/*thin*.txt", // note that the glob is cleaned up + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equalf(t, tt.want, parseGlob(tt.glob), "parseGlob(%v)", tt.glob) + }) + } +} + +func Test_parseGlobBasename(t *testing.T) { + tests := []struct { + name string + input string + want []searchRequest + }{ + { + name: "empty string", + input: "", + want: []searchRequest{ + { + searchBasis: searchByBasename, + }, + }, + }, + { + name: "everything-ish", + input: "*?", + want: []searchRequest{ + { + searchBasis: searchByGlob, + }, + }, + }, + { + name: "everything recursive", + input: "**", + want: []searchRequest{ + { + searchBasis: searchByGlob, + }, + }, + }, + { + name: "simple basename", + input: "basename.txt", + want: []searchRequest{ + { + searchBasis: searchByBasename, + value: "basename.txt", + }, + }, + }, + { + name: "basename with prefix glob", + input: "*basename.txt", + want: []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: "*basename.txt", + }, + }, + }, + { + name: "basename with pattern", + input: "bas*nam?.txt", + want: []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: "bas*nam?.txt", + }, + }, + }, + { + name: "extension", + input: "*.txt", + want: []searchRequest{ + { + searchBasis: searchByExtension, + value: ".txt", + }, + }, + }, + { + name: "possible extension that should be searched by glob", + input: "*.*.txt", + want: []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: "*.*.txt", + }, + }, + }, + { + name: "tricky basename", + input: ".txt", + want: []searchRequest{ + { + searchBasis: searchByBasename, + value: ".txt", + }, + }, + }, + { + name: "basename glob with extension", + input: "*thin*.txt", + want: []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: "*thin*.txt", + }, + }, + }, + { + name: "basename alternates", + input: "{Packages,Packages.db,rpmdb.sqlite}", + want: []searchRequest{ + { + searchBasis: searchByBasename, + value: "Packages", + }, + { + searchBasis: searchByBasename, + value: "Packages.db", + }, + { + searchBasis: searchByBasename, + value: "rpmdb.sqlite", + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equalf(t, tt.want, parseGlobBasename(tt.input), "parseGlobBasename(%v)", tt.input) + }) + } +} + +func Test_cleanGlob(t *testing.T) { + tests := []struct { + name string + glob string + want string + }{ + { + name: "empty string", + glob: "", + want: "", + }, + { + name: "remove spaces from glob edges", + glob: " **/foo/ **/ bar.txt ", + want: "**/foo/ */ bar.txt", + }, + { + name: "simplify slashes", + glob: "///foo/////**///**////", + want: "/foo/**", + }, + { + name: "simplify larger recursive glob", + glob: "**/foo/**/*/***/*bar**/***.*****.******", + want: "**/foo/**/*/**/*bar*/*.*.*", + }, + { + name: "simplify glob prefix", + glob: "***/foo.txt", + want: "**/foo.txt", + }, + { + name: "simplify glob within multiple path", + glob: "bar**/ba**r*/***/**/bar***/**/foo.txt", + want: "bar*/ba*r*/**/bar*/**/foo.txt", + }, + { + name: "simplify prefix and suffix glob", + glob: "***/foo/**/****", + want: "**/foo/**", + }, + { + name: "simplify multiple recursive requests", + glob: "/**/**/foo/**/**", + want: "**/foo/**", + }, + { + name: "simplify slashes and asterisks", + glob: "/***/****///foo/**//****////", + want: "**/foo/**", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equalf(t, tt.want, cleanGlob(tt.glob), "cleanGlob(%v)", tt.glob) + }) + } +} + +func Test_removeRedundantCountGlob(t *testing.T) { + type args struct { + glob string + val rune + count int + } + tests := []struct { + name string + args args + want string + }{ + { + name: "empty string", + args: args{ + glob: "", + val: '*', + count: 1, + }, + want: "", + }, + { + name: "simplify on edges and body", + args: args{ + glob: "**/foo/***/****", + val: '*', + count: 2, + }, + want: "**/foo/**/**", + }, + { + name: "simplify slashes", + args: args{ + glob: "///something/**///here?/*/will//work///", + val: '/', + count: 1, + }, + want: "/something/**/here?/*/will/work/", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equalf(t, tt.want, removeRedundantCountGlob(tt.args.glob, tt.args.val, tt.args.count), "removeRedundantCountGlob(%v, %v, %v)", tt.args.glob, tt.args.val, tt.args.count) + }) + } +} + +func Test_simplifyMultipleGlobAsterisks(t *testing.T) { + tests := []struct { + name string + glob string + want string + }{ + { + name: "simplify glob suffix", + glob: "foo/.***", + want: "foo/.*", + }, + { + name: "simplify glob within path", + glob: "**/bar**/foo.txt", + want: "**/bar*/foo.txt", + }, + { + name: "simplify glob within multiple path", + glob: "bar**/ba**r*/**/**/bar**/**/foo.txt", + want: "bar*/ba*r*/**/**/bar*/**/foo.txt", + }, + { + name: "simplify glob within path prefix", + glob: "bar**/foo.txt", + want: "bar*/foo.txt", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equalf(t, tt.want, simplifyMultipleGlobAsterisks(tt.glob), "simplifyMultipleGlobAsterisks(%v)", tt.glob) + }) + } +} + +func Test_simplifyGlobRecursion(t *testing.T) { + tests := []struct { + name string + glob string + want string + }{ + { + name: "single instance with slash prefix", + glob: "/**", + want: "**", + }, + { + name: "single instance with slash suffix", + glob: "**/", + want: "**", + }, + { + name: "no slash prefix", + glob: "**/**/fo*o/**/**", + want: "**/fo*o/**", + }, + { + name: "within body", + glob: "/fo*o/**/**/bar", + want: "/fo*o/**/bar", + }, + { + name: "with slash prefix", + glob: "/**/**/foo/**/**", + want: "**/foo/**", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equalf(t, tt.want, simplifyGlobRecursion(tt.glob), "simplifyGlobRecursion(%v)", tt.glob) + }) + } +} diff --git a/pkg/filetree/glob_test.go b/pkg/filetree/glob_test.go index 6d63b1de..e5671561 100644 --- a/pkg/filetree/glob_test.go +++ b/pkg/filetree/glob_test.go @@ -10,7 +10,7 @@ import ( ) func TestFileInfoAdapter(t *testing.T) { - tr := NewFileTree() + tr := New() tr.AddFile("/home/thing.txt") tr.AddDir("/home/wagoodman") tr.AddSymLink("/home/thing", "./thing.txt") @@ -23,21 +23,21 @@ func TestFileInfoAdapter(t *testing.T) { VirtualPath: "/home/thing.txt", Node: filenode.FileNode{ RealPath: "/home/thing.txt", - FileType: file.TypeReg, + FileType: file.TypeRegular, }, }, "/home/wagoodman": { VirtualPath: "/home/wagoodman", Node: filenode.FileNode{ RealPath: "/home/wagoodman", - FileType: file.TypeDir, + FileType: file.TypeDirectory, }, }, "/home/thing": { VirtualPath: "/home/thing", Node: filenode.FileNode{ RealPath: "/home/thing", - FileType: file.TypeSymlink, + FileType: file.TypeSymLink, LinkPath: "./thing.txt", }, }, @@ -118,7 +118,7 @@ func TestFileInfoAdapter(t *testing.T) { } func TestOsAdapter_PreventInfiniteLoop(t *testing.T) { - tr := NewFileTree() + tr := New() tr.AddFile("/usr/bin/busybox") tr.AddSymLink("/usr/bin/X11", ".") @@ -167,7 +167,7 @@ func TestOsAdapter_PreventInfiniteLoop(t *testing.T) { } func TestFileInfoAdapter_PreventInfiniteLoop(t *testing.T) { - tr := NewFileTree() + tr := New() tr.AddFile("/usr/bin/busybox") tr.AddSymLink("/usr/bin/X11", ".") @@ -240,20 +240,20 @@ func TestOSAdapter_ReadDir(t *testing.T) { expected: []fileinfoAdapter{ { VirtualPath: "/home/thing.txt", - Node: filenode.FileNode{RealPath: "/home/thing.txt", FileType: 48}, + Node: filenode.FileNode{RealPath: "/home/thing.txt", FileType: file.TypeRegular}, }, { VirtualPath: "/home/wagoodman", - Node: filenode.FileNode{RealPath: "/home/wagoodman", FileType: 53}, + Node: filenode.FileNode{RealPath: "/home/wagoodman", FileType: file.TypeDirectory}, }, { VirtualPath: "/home/thing", - Node: filenode.FileNode{RealPath: "/home/thing", FileType: 50, LinkPath: "./thing.txt"}, + Node: filenode.FileNode{RealPath: "/home/thing", FileType: file.TypeSymLink, LinkPath: "./thing.txt"}, }, { VirtualPath: "/home/place", - Node: filenode.FileNode{RealPath: "/home/place", FileType: 49, LinkPath: "/somewhere-else"}, + Node: filenode.FileNode{RealPath: "/home/place", FileType: file.TypeHardLink, LinkPath: "/somewhere-else"}, }, }, shouldErr: false, @@ -312,7 +312,7 @@ func TestOSAdapter_Lstat(t *testing.T) { VirtualPath: "/home", Node: filenode.FileNode{ RealPath: "/home", - FileType: file.TypeDir, + FileType: file.TypeDirectory, }, }, }, @@ -324,7 +324,7 @@ func TestOSAdapter_Lstat(t *testing.T) { VirtualPath: "/home/thing", Node: filenode.FileNode{ RealPath: "/home/thing", - FileType: file.TypeSymlink, + FileType: file.TypeSymLink, LinkPath: "./thing.txt", }, }, @@ -400,7 +400,7 @@ func TestOSAdapter_Stat(t *testing.T) { VirtualPath: "/home", Node: filenode.FileNode{ RealPath: "/home", - FileType: file.TypeDir, + FileType: file.TypeDirectory, }, }, }, @@ -413,7 +413,7 @@ func TestOSAdapter_Stat(t *testing.T) { VirtualPath: "/home/thing", Node: filenode.FileNode{ RealPath: "/home/thing.txt", - FileType: file.TypeReg, + FileType: file.TypeRegular, }, }, }, @@ -470,7 +470,7 @@ func TestOSAdapter_Stat(t *testing.T) { } func newHelperTree() *FileTree { - tr := NewFileTree() + tr := New() tr.AddFile("/home/thing.txt") tr.AddDir("/home/wagoodman") tr.AddSymLink("/home/thing", "./thing.txt") diff --git a/pkg/filetree/index.go b/pkg/filetree/index.go new file mode 100644 index 00000000..a7433d48 --- /dev/null +++ b/pkg/filetree/index.go @@ -0,0 +1,297 @@ +package filetree + +import ( + "fmt" + "os" + "path" + "sort" + "strings" + "sync" + + "github.com/anchore/stereoscope/internal/log" + + "github.com/anchore/stereoscope/pkg/file" + "github.com/becheran/wildmatch-go" + "github.com/scylladb/go-set/strset" +) + +type Index interface { + IndexReader + IndexWriter +} + +type IndexReader interface { + Exists(f file.Reference) bool + Get(f file.Reference) (IndexEntry, error) + GetByMIMEType(mTypes ...string) ([]IndexEntry, error) + GetByFileType(fTypes ...file.Type) ([]IndexEntry, error) + GetByExtension(extensions ...string) ([]IndexEntry, error) + GetByBasename(basenames ...string) ([]IndexEntry, error) + GetByBasenameGlob(globs ...string) ([]IndexEntry, error) + Basenames() []string +} + +type IndexWriter interface { + Add(f file.Reference, m file.Metadata) +} + +// Index represents all file metadata and source tracing for all files contained within the image layer +// blobs (i.e. everything except for the image index/manifest/metadata files). +type index struct { + *sync.RWMutex + index map[file.ID]IndexEntry + byFileType map[file.Type]file.IDSet + byMIMEType map[string]file.IDSet + byExtension map[string]file.IDSet + byBasename map[string]file.IDSet + basenames *strset.Set +} + +// NewIndex returns an empty Index. +func NewIndex() Index { + return &index{ + RWMutex: &sync.RWMutex{}, + index: make(map[file.ID]IndexEntry), + byFileType: make(map[file.Type]file.IDSet), + byMIMEType: make(map[string]file.IDSet), + byExtension: make(map[string]file.IDSet), + byBasename: make(map[string]file.IDSet), + basenames: strset.New(), + } +} + +// IndexEntry represents all stored metadata for a single file reference. +type IndexEntry struct { + file.Reference + file.Metadata +} + +// Add creates a new IndexEntry for the given file reference and metadata, cataloged by the ID of the +// file reference (overwriting any existing entries without warning). +func (c *index) Add(f file.Reference, m file.Metadata) { + c.Lock() + defer c.Unlock() + + id := f.ID() + + if _, ok := c.index[id]; ok { + log.WithFields("id", id, "path", f.RealPath).Debug("overwriting existing file index entry") + } + + if m.MIMEType != "" { + if _, ok := c.byMIMEType[m.MIMEType]; !ok { + c.byMIMEType[m.MIMEType] = file.NewIDSet() + } + // an empty MIME type means that we didn't have the contents of the file to determine the MIME type. If we have + // the contents and the MIME type could not be determined then the default value is application/octet-stream. + c.byMIMEType[m.MIMEType].Add(id) + } + + basename := path.Base(string(f.RealPath)) + + if _, ok := c.byBasename[basename]; !ok { + c.byBasename[basename] = file.NewIDSet() + } + + c.byBasename[basename].Add(id) + c.basenames.Add(basename) + + for _, ext := range fileExtensions(string(f.RealPath)) { + if _, ok := c.byExtension[ext]; !ok { + c.byExtension[ext] = file.NewIDSet() + } + c.byExtension[ext].Add(id) + } + + if _, ok := c.byFileType[m.Type]; !ok { + c.byFileType[m.Type] = file.NewIDSet() + } + c.byFileType[m.Type].Add(id) + + c.index[id] = IndexEntry{ + Reference: f, + Metadata: m, + } +} + +// Exists indicates if the given file reference exists in the index. +func (c *index) Exists(f file.Reference) bool { + c.RLock() + defer c.RUnlock() + _, ok := c.index[f.ID()] + return ok +} + +// Get fetches a IndexEntry for the given file reference, or returns an error if the file reference has not +// been added to the index. +func (c *index) Get(f file.Reference) (IndexEntry, error) { + c.RLock() + defer c.RUnlock() + value, ok := c.index[f.ID()] + if !ok { + return IndexEntry{}, os.ErrNotExist + } + return value, nil +} + +func (c *index) Basenames() []string { + c.RLock() + defer c.RUnlock() + + bns := c.basenames.List() + sort.Strings(bns) + + return bns +} + +func (c *index) GetByFileType(fTypes ...file.Type) ([]IndexEntry, error) { + c.RLock() + defer c.RUnlock() + + var entries []IndexEntry + + for _, fType := range fTypes { + fileIDs, ok := c.byFileType[fType] + if !ok { + continue + } + + for _, id := range fileIDs.Sorted() { + entry, ok := c.index[id] + if !ok { + return nil, os.ErrNotExist + } + entries = append(entries, entry) + } + } + + return entries, nil +} + +func (c *index) GetByMIMEType(mTypes ...string) ([]IndexEntry, error) { + c.RLock() + defer c.RUnlock() + + var entries []IndexEntry + + for _, mType := range mTypes { + fileIDs, ok := c.byMIMEType[mType] + if !ok { + continue + } + + for _, id := range fileIDs.Sorted() { + entry, ok := c.index[id] + if !ok { + return nil, os.ErrNotExist + } + entries = append(entries, entry) + } + } + + return entries, nil +} + +func (c *index) GetByExtension(extensions ...string) ([]IndexEntry, error) { + c.RLock() + defer c.RUnlock() + + var entries []IndexEntry + + for _, extension := range extensions { + fileIDs, ok := c.byExtension[extension] + if !ok { + continue + } + + for _, id := range fileIDs.Sorted() { + entry, ok := c.index[id] + if !ok { + return nil, os.ErrNotExist + } + entries = append(entries, entry) + } + } + + return entries, nil +} + +func (c *index) GetByBasename(basenames ...string) ([]IndexEntry, error) { + c.RLock() + defer c.RUnlock() + + var entries []IndexEntry + + for _, basename := range basenames { + if strings.Contains(basename, "/") { + return nil, fmt.Errorf("found directory separator in a basename") + } + + fileIDs, ok := c.byBasename[basename] + if !ok { + continue + } + + for _, id := range fileIDs.Sorted() { + entry, ok := c.index[id] + if !ok { + return nil, os.ErrNotExist + } + entries = append(entries, entry) + } + } + + return entries, nil +} + +func (c *index) GetByBasenameGlob(globs ...string) ([]IndexEntry, error) { + c.RLock() + defer c.RUnlock() + + var entries []IndexEntry + for _, glob := range globs { + if strings.Contains(glob, "**") { + return nil, fmt.Errorf("basename glob patterns with '**' are not supported") + } + if strings.Contains(glob, "/") { + return nil, fmt.Errorf("found directory separator in a basename") + } + + patternObj := wildmatch.NewWildMatch(glob) + for _, b := range c.Basenames() { + if patternObj.IsMatch(b) { + bns, err := c.GetByBasename(b) + if err != nil { + return nil, fmt.Errorf("unable to fetch file references by basename (%q): %w", b, err) + } + entries = append(entries, bns...) + } + } + } + + return entries, nil +} + +func fileExtensions(p string) []string { + var exts []string + p = strings.TrimSpace(p) + + // ignore oddities + if strings.HasSuffix(p, ".") { + return exts + } + + // ignore directories + if strings.HasSuffix(p, "/") { + return exts + } + + // ignore . which indicate a hidden file + p = strings.TrimLeft(path.Base(p), ".") + for i := len(p) - 1; i >= 0; i-- { + if p[i] == '.' { + exts = append(exts, p[i:]) + } + } + return exts +} diff --git a/pkg/filetree/index_test.go b/pkg/filetree/index_test.go new file mode 100644 index 00000000..d8cd6b91 --- /dev/null +++ b/pkg/filetree/index_test.go @@ -0,0 +1,776 @@ +//go:build !windows +// +build !windows + +package filetree + +import ( + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "testing" + + "github.com/anchore/stereoscope/pkg/file" +) + +func commonIndexFixture(t *testing.T) Index { + t.Helper() + + tree := New() + idx := NewIndex() + + addDir := func(path file.Path) { + ref, err := tree.AddDir(path) + require.NoError(t, err, "failed to add DIR reference to index") + require.NotNil(t, ref, "failed to add DIR reference to index (nil ref") + idx.Add(*ref, file.Metadata{Path: string(path), Type: file.TypeDirectory, IsDir: true}) + } + + addFile := func(path file.Path) { + ref, err := tree.AddFile(path) + require.NoError(t, err, "failed to add FILE reference to index") + require.NotNil(t, ref, "failed to add FILE reference to index (nil ref") + idx.Add(*ref, file.Metadata{Path: string(path), Type: file.TypeRegular, MIMEType: "text/plain"}) + } + + addLink := func(from, to file.Path) { + ref, err := tree.AddSymLink(from, to) + require.NoError(t, err, "failed to add LINK reference to index") + require.NotNil(t, ref, "failed to add LINK reference to index (nil ref") + idx.Add(*ref, file.Metadata{Path: string(from), LinkDestination: string(to), Type: file.TypeSymLink}) + } + + // mkdir -p path/branch.d/one + // mkdir -p path/branch.d/two + // mkdir -p path/common + + // note: we need to add all paths explicitly to the index + addDir("/path") + addDir("/path/branch.d") + addDir("/path/branch.d/one") + addDir("/path/branch.d/two") + addDir("/path/common") + + // echo "first file" > path/branch.d/one/file-1.txt + // echo "forth file" > path/branch.d/one/file-4.d + // echo "multi ext file" > path/branch.d/one/file-4.tar.gz + // echo "hidden file" > path/branch.d/one/.file-4.tar.gz + + addFile("/path/branch.d/one/file-1.txt") + addFile("/path/branch.d/one/file-4.d") + addFile("/path/branch.d/one/file-4.tar.gz") + addFile("/path/branch.d/one/.file-4.tar.gz") + + // ln -s path/branch.d path/common/branch.d + // ln -s path/branch.d path/common/branch + // ln -s path/branch.d/one/file-4.d path/common/file-4 + // ln -s path/branch.d/one/file-1.txt path/common/file-1.d + + addLink("/path/common/branch.d", "path/branch.d") + addLink("/path/common/branch", "path/branch.d") + addLink("/path/common/file-4", "path/branch.d/one/file-4.d") + addLink("/path/common/file-1.d", "path/branch.d/one/file-1.txt") + + // echo "second file" > path/branch.d/two/file-2.txt + // echo "third file" > path/file-3.txt + + addFile("/path/branch.d/two/file-2.txt") + addFile("/path/file-3.txt") + + return idx +} + +func Test_fileExtensions(t *testing.T) { + tests := []struct { + name string + path string + want []string + }{ + { + name: "empty", + path: "", + }, + { + name: "directory", + path: "/somewhere/to/nowhere/", + }, + { + name: "directory with ext", + path: "/somewhere/to/nowhere.d/", + }, + { + name: "single extension", + path: "/somewhere/to/my.tar", + want: []string{".tar"}, + }, + { + name: "multiple extensions", + path: "/somewhere/to/my.tar.gz", + want: []string{".gz", ".tar.gz"}, + }, + { + name: "ignore . prefix", + path: "/somewhere/to/.my.tar.gz", + want: []string{".gz", ".tar.gz"}, + }, + { + name: "ignore more . prefixes", + path: "/somewhere/to/...my.tar.gz", + want: []string{".gz", ".tar.gz"}, + }, + { + name: "ignore . suffixes", + path: "/somewhere/to/my.tar.gz...", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, fileExtensions(tt.path)) + }) + } +} + +func TestFileCatalog_GetByFileType(t *testing.T) { + fileIndex := commonIndexFixture(t) + + tests := []struct { + name string + input []file.Type + want []IndexEntry + wantErr require.ErrorAssertionFunc + }{ + { + name: "get real file", + input: []file.Type{file.TypeRegular}, + want: []IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-1.txt", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + { + Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.d"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-4.d", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + { + Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.tar.gz"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-4.tar.gz", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + { + Reference: file.Reference{RealPath: "/path/branch.d/one/.file-4.tar.gz"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/.file-4.tar.gz", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + { + + Reference: file.Reference{RealPath: "/path/branch.d/two/file-2.txt"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/two/file-2.txt", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + { + Reference: file.Reference{RealPath: "/path/file-3.txt"}, + Metadata: file.Metadata{ + Path: "/path/file-3.txt", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + }, + }, + { + name: "get directories", + input: []file.Type{file.TypeDirectory}, + want: []IndexEntry{ + { + Reference: file.Reference{RealPath: "/path"}, + Metadata: file.Metadata{ + Path: "/path", + Type: file.TypeDirectory, + IsDir: true, + }, + }, + { + + Reference: file.Reference{RealPath: "/path/branch.d"}, + Metadata: file.Metadata{ + Path: "/path/branch.d", + Type: file.TypeDirectory, + IsDir: true, + }, + }, + { + Reference: file.Reference{RealPath: "/path/branch.d/one"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one", + Type: file.TypeDirectory, + IsDir: true, + }, + }, + { + Reference: file.Reference{RealPath: "/path/branch.d/two"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/two", + Type: file.TypeDirectory, + IsDir: true, + }, + }, + { + Reference: file.Reference{RealPath: "/path/common"}, + Metadata: file.Metadata{ + Path: "/path/common", + Type: file.TypeDirectory, + IsDir: true, + }, + }, + }, + }, + { + name: "get links", + input: []file.Type{file.TypeHardLink, file.TypeSymLink}, + want: []IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/common/branch.d"}, + Metadata: file.Metadata{ + Path: "/path/common/branch.d", + LinkDestination: "path/branch.d", + Type: file.TypeSymLink, + }, + }, + { + Reference: file.Reference{RealPath: "/path/common/branch"}, + Metadata: file.Metadata{ + Path: "/path/common/branch", + LinkDestination: "path/branch.d", + Type: file.TypeSymLink, + }, + }, + { + Reference: file.Reference{RealPath: "/path/common/file-4"}, + Metadata: file.Metadata{ + Path: "/path/common/file-4", + LinkDestination: "path/branch.d/one/file-4.d", + Type: file.TypeSymLink, + }, + }, + { + Reference: file.Reference{RealPath: "/path/common/file-1.d"}, + Metadata: file.Metadata{ + Path: "/path/common/file-1.d", + LinkDestination: "path/branch.d/one/file-1.txt", + Type: file.TypeSymLink, + }, + }, + }, + }, + { + name: "get non-existent types", + input: []file.Type{file.TypeBlockDevice, file.TypeCharacterDevice, file.TypeFIFO, file.TypeSocket, file.TypeIrregular}, + want: []IndexEntry{}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + actual, err := fileIndex.GetByFileType(tt.input...) + tt.wantErr(t, err) + if err != nil { + return + } + if d := cmp.Diff(tt.want, actual, + cmpopts.EquateEmpty(), + cmpopts.IgnoreUnexported(file.Reference{}), + cmpopts.IgnoreFields(file.Metadata{}, "Mode", "GroupID", "UserID", "Size"), + ); d != "" { + t.Errorf("diff: %s", d) + } + }) + } +} + +func TestFileCatalog_GetByExtension(t *testing.T) { + fileIndex := commonIndexFixture(t) + + tests := []struct { + name string + input string + want []IndexEntry + wantErr require.ErrorAssertionFunc + }{ + { + name: "get simple extension", + input: ".txt", + want: []IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-1.txt", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + { + + Reference: file.Reference{RealPath: "/path/branch.d/two/file-2.txt"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/two/file-2.txt", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + { + Reference: file.Reference{RealPath: "/path/file-3.txt"}, + Metadata: file.Metadata{ + Path: "/path/file-3.txt", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + }, + }, + { + name: "get mixed type extension", + input: ".d", + want: []IndexEntry{ + { + + Reference: file.Reference{RealPath: "/path/branch.d"}, + Metadata: file.Metadata{ + Path: "/path/branch.d", + Type: file.TypeDirectory, + IsDir: true, + }, + }, + { + + Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.d"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-4.d", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + + { + + Reference: file.Reference{RealPath: "/path/common/branch.d"}, + Metadata: file.Metadata{ + Path: "/path/common/branch.d", + LinkDestination: "path/branch.d", + Type: file.TypeSymLink, + }, + }, + { + + Reference: file.Reference{RealPath: "/path/common/file-1.d"}, + Metadata: file.Metadata{ + Path: "/path/common/file-1.d", + LinkDestination: "path/branch.d/one/file-1.txt", + Type: file.TypeSymLink, + }, + }, + }, + }, + { + name: "get long extension", + input: ".tar.gz", + want: []IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.tar.gz"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-4.tar.gz", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + { + Reference: file.Reference{RealPath: "/path/branch.d/one/.file-4.tar.gz"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/.file-4.tar.gz", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + }, + }, + { + name: "get short extension", + input: ".gz", + want: []IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.tar.gz"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-4.tar.gz", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + { + Reference: file.Reference{RealPath: "/path/branch.d/one/.file-4.tar.gz"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/.file-4.tar.gz", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + }, + }, + { + name: "get non-existent extension", + input: ".blerg-123", + want: []IndexEntry{}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + actual, err := fileIndex.GetByExtension(tt.input) + tt.wantErr(t, err) + if err != nil { + return + } + if d := cmp.Diff(tt.want, actual, + cmpopts.EquateEmpty(), + cmpopts.IgnoreUnexported(file.Reference{}), + cmpopts.IgnoreFields(file.Metadata{}, "Mode", "GroupID", "UserID", "Size"), + ); d != "" { + t.Errorf("diff: %s", d) + } + }) + } +} + +func TestFileCatalog_GetByBasename(t *testing.T) { + fileIndex := commonIndexFixture(t) + + tests := []struct { + name string + input string + want []IndexEntry + wantErr require.ErrorAssertionFunc + }{ + { + name: "get existing file name", + input: "file-1.txt", + want: []IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-1.txt", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + }, + }, + { + name: "get non-existing name", + input: "file-11.txt", + want: []IndexEntry{}, + }, + { + name: "get directory name", + input: "branch.d", + want: []IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/branch.d"}, + Metadata: file.Metadata{ + Path: "/path/branch.d", + Type: file.TypeDirectory, + IsDir: true, + }, + }, + { + Reference: file.Reference{RealPath: "/path/common/branch.d"}, + Metadata: file.Metadata{ + Path: "/path/common/branch.d", + LinkDestination: "path/branch.d", + Type: file.TypeSymLink, + }, + }, + }, + }, + { + name: "get symlink name", + input: "file-1.d", + want: []IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/common/file-1.d"}, + Metadata: file.Metadata{ + Path: "/path/common/file-1.d", + LinkDestination: "path/branch.d/one/file-1.txt", + Type: file.TypeSymLink, + }, + }, + }, + }, + { + name: "get basename with path expression", + input: "somewhere/file-1.d", + wantErr: require.Error, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + actual, err := fileIndex.GetByBasename(tt.input) + tt.wantErr(t, err) + if err != nil { + return + } + if d := cmp.Diff(tt.want, actual, + cmpopts.EquateEmpty(), + cmpopts.IgnoreUnexported(file.Reference{}), + cmpopts.IgnoreFields(file.Metadata{}, "Mode", "GroupID", "UserID", "Size"), + ); d != "" { + t.Errorf("diff: %s", d) + } + }) + } +} + +func TestFileCatalog_GetByBasenameGlob(t *testing.T) { + fileIndex := commonIndexFixture(t) + + tests := []struct { + name string + input string + want []IndexEntry + wantErr require.ErrorAssertionFunc + }{ + { + name: "get existing file name", + input: "file-1.*", + want: []IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/common/file-1.d"}, + Metadata: file.Metadata{ + Path: "/path/common/file-1.d", + LinkDestination: "path/branch.d/one/file-1.txt", + Type: file.TypeSymLink, + }, + }, + { + Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-1.txt", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + }, + }, + { + name: "get non-existing name", + input: "blerg-*.txt", + want: []IndexEntry{}, + }, + { + name: "get directory name", + input: "bran*.d", + want: []IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/branch.d"}, + Metadata: file.Metadata{ + Path: "/path/branch.d", + Type: file.TypeDirectory, + IsDir: true, + }, + }, + { + Reference: file.Reference{RealPath: "/path/common/branch.d"}, + Metadata: file.Metadata{ + Path: "/path/common/branch.d", + LinkDestination: "path/branch.d", + Type: file.TypeSymLink, + }, + }, + }, + }, + { + name: "get symlink name", + input: "file?1.d", + want: []IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/common/file-1.d"}, + Metadata: file.Metadata{ + Path: "/path/common/file-1.d", + LinkDestination: "path/branch.d/one/file-1.txt", + Type: file.TypeSymLink, + }, + }, + }, + }, + { + name: "get basename with path expression", + input: "somewhere/file?1.d", + wantErr: require.Error, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + actual, err := fileIndex.GetByBasenameGlob(tt.input) + tt.wantErr(t, err) + if err != nil { + return + } + if d := cmp.Diff(tt.want, actual, + cmpopts.EquateEmpty(), + cmpopts.IgnoreUnexported(file.Reference{}), + cmpopts.IgnoreFields(file.Metadata{}, "Mode", "GroupID", "UserID", "Size"), + ); d != "" { + t.Errorf("diff: %s", d) + } + }) + } +} + +func TestFileCatalog_GetByMimeType(t *testing.T) { + fileIndex := commonIndexFixture(t) + + tests := []struct { + name string + input string + want []IndexEntry + wantErr require.ErrorAssertionFunc + }{ + { + name: "get existing file mimetype", + input: "text/plain", + want: []IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-1.txt", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + { + Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.d"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-4.d", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + { + Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.tar.gz"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-4.tar.gz", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + { + Reference: file.Reference{RealPath: "/path/branch.d/one/.file-4.tar.gz"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/.file-4.tar.gz", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + { + Reference: file.Reference{RealPath: "/path/branch.d/two/file-2.txt"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/two/file-2.txt", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + { + Reference: file.Reference{RealPath: "/path/file-3.txt"}, + Metadata: file.Metadata{ + Path: "/path/file-3.txt", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + }, + }, + { + name: "get non-existing mimetype", + input: "text/bogus", + want: []IndexEntry{}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + actual, err := fileIndex.GetByMIMEType(tt.input) + tt.wantErr(t, err) + if err != nil { + return + } + if d := cmp.Diff(tt.want, actual, + cmpopts.EquateEmpty(), + cmpopts.IgnoreUnexported(file.Reference{}), + cmpopts.IgnoreFields(file.Metadata{}, "Mode", "GroupID", "UserID", "Size"), + ); d != "" { + t.Errorf("diff: %s", d) + } + }) + } +} + +func TestFileCatalog_GetBasenames(t *testing.T) { + fileIndex := commonIndexFixture(t) + + tests := []struct { + name string + want []string + }{ + { + name: "go case", + want: []string{ + ".file-4.tar.gz", + "branch", + "branch.d", + "common", + "file-1.d", + "file-1.txt", + "file-2.txt", + "file-3.txt", + "file-4", + "file-4.d", + "file-4.tar.gz", + "one", + "path", + "two", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + actual := fileIndex.Basenames() + assert.ElementsMatchf(t, tt.want, actual, "diff: %s", cmp.Diff(tt.want, actual)) + }) + } +} diff --git a/pkg/filetree/interfaces.go b/pkg/filetree/interfaces.go new file mode 100644 index 00000000..9d813fc4 --- /dev/null +++ b/pkg/filetree/interfaces.go @@ -0,0 +1,45 @@ +package filetree + +import ( + "github.com/anchore/stereoscope/pkg/file" + "github.com/anchore/stereoscope/pkg/filetree/filenode" + "github.com/anchore/stereoscope/pkg/tree" +) + +type ReadWriter interface { + Reader + Writer +} + +type Reader interface { + AllFiles(types ...file.Type) []file.Reference + TreeReader() tree.Reader + PathReader + Walker + Copier +} + +type PathReader interface { + File(path file.Path, options ...LinkResolutionOption) (bool, *file.Resolution, error) + FilesByGlob(query string, options ...LinkResolutionOption) ([]file.Resolution, error) + AllRealPaths() []file.Path + ListPaths(dir file.Path) ([]file.Path, error) + HasPath(path file.Path, options ...LinkResolutionOption) bool +} + +type Copier interface { + Copy() (ReadWriter, error) +} + +type Walker interface { + Walk(fn func(path file.Path, f filenode.FileNode) error, conditions *WalkConditions) error +} + +type Writer interface { + AddFile(realPath file.Path) (*file.Reference, error) + AddSymLink(realPath file.Path, linkPath file.Path) (*file.Reference, error) + AddHardLink(realPath file.Path, linkPath file.Path) (*file.Reference, error) + AddDir(realPath file.Path) (*file.Reference, error) + RemovePath(path file.Path) error + Merge(upper Reader) error +} diff --git a/pkg/filetree/node_access.go b/pkg/filetree/node_access.go new file mode 100644 index 00000000..dda333bd --- /dev/null +++ b/pkg/filetree/node_access.go @@ -0,0 +1,50 @@ +package filetree + +import ( + "github.com/anchore/stereoscope/pkg/file" + "github.com/anchore/stereoscope/pkg/filetree/filenode" +) + +// nodeAccess represents a request into the tree for a specific path and the resulting node, which may have a different path. +type nodeAccess struct { + RequestPath file.Path + FileNode *filenode.FileNode // note: it is important that nodeAccess does not behave like FileNode (then it can be added to the tree directly) + LeafLinkResolution []nodeAccess +} + +func (na *nodeAccess) HasFileNode() bool { + if na == nil { + return false + } + return na.FileNode != nil +} + +func (na *nodeAccess) FileResolution() *file.Resolution { + if !na.HasFileNode() { + return nil + } + return file.NewResolution( + na.RequestPath, + na.FileNode.Reference, + newResolutions(na.LeafLinkResolution), + ) +} + +func (na *nodeAccess) References() []file.Reference { + if !na.HasFileNode() { + return nil + } + var refs []file.Reference + + if na.FileNode.Reference != nil { + refs = append(refs, *na.FileNode.Reference) + } + + for _, l := range na.LeafLinkResolution { + if l.HasFileNode() && l.FileNode.Reference != nil { + refs = append(refs, *l.FileNode.Reference) + } + } + + return refs +} diff --git a/pkg/filetree/search.go b/pkg/filetree/search.go new file mode 100644 index 00000000..e5152ab7 --- /dev/null +++ b/pkg/filetree/search.go @@ -0,0 +1,477 @@ +package filetree + +import ( + "fmt" + "path" + "sort" + + "github.com/anchore/stereoscope/internal/log" + "github.com/anchore/stereoscope/pkg/filetree/filenode" + "github.com/anchore/stereoscope/pkg/tree/node" + + "github.com/anchore/stereoscope/pkg/file" + "github.com/bmatcuk/doublestar/v4" +) + +// Searcher is a facade for searching a file tree with optional indexing support. +type Searcher interface { + SearchByPath(path string, options ...LinkResolutionOption) (*file.Resolution, error) + SearchByGlob(patterns string, options ...LinkResolutionOption) ([]file.Resolution, error) + SearchByMIMEType(mimeTypes ...string) ([]file.Resolution, error) +} + +type searchContext struct { + tree *FileTree // this is the tree which all index search results are filtered against + index IndexReader // this index is relative to one or more trees, not just necessarily one + + // the following enables correct link resolution when searching via the index + linkBackwardRefs map[node.ID]node.IDSet // {link-destination-node-id: str([link-node-id, ...])} +} + +func NewSearchContext(tree Reader, index IndexReader) Searcher { + c := &searchContext{ + tree: tree.(*FileTree), + index: index, + linkBackwardRefs: make(map[node.ID]node.IDSet), + } + + if err := c.buildLinkResolutionIndex(); err != nil { + log.WithFields("error", err).Warn("unable to build link resolution index for filetree search context") + } + + return c +} + +func (sc *searchContext) buildLinkResolutionIndex() error { + entries, err := sc.index.GetByFileType(file.TypeSymLink, file.TypeHardLink) + if err != nil { + return err + } + + // filter the results relative to the tree + nodes, err := sc.fileNodesInTree(entries) + if err != nil { + return err + } + + // note: the remaining references are all links that exist in the tree + + for _, fn := range nodes { + destinationFna, err := sc.tree.file(fn.RenderLinkDestination()) + if err != nil { + return fmt.Errorf("unable to get node for path=%q: %w", fn.RealPath, err) + } + + if !destinationFna.HasFileNode() { + // we were unable to resolve the link destination, this could be due to the fact that the destination simply + continue + } + + linkID := fn.ID() + destinationID := destinationFna.FileNode.ID() + + // add backward reference... + if _, ok := sc.linkBackwardRefs[destinationID]; !ok { + sc.linkBackwardRefs[destinationID] = node.NewIDSet() + } + sc.linkBackwardRefs[destinationID].Add(linkID) + } + + return nil +} + +func (sc searchContext) SearchByPath(path string, options ...LinkResolutionOption) (*file.Resolution, error) { + // TODO: one day this could leverage indexes outside of the tree, but today this is not implemented + log.WithFields("path", path).Trace("searching filetree by path") + + options = append(options, FollowBasenameLinks) + _, ref, err := sc.tree.File(file.Path(path), options...) + return ref, err +} + +func (sc searchContext) SearchByMIMEType(mimeTypes ...string) ([]file.Resolution, error) { + log.WithFields("types", mimeTypes).Trace("searching filetree by MIME types") + + var fileEntries []IndexEntry + + for _, mType := range mimeTypes { + entries, err := sc.index.GetByMIMEType(mType) + if err != nil { + return nil, fmt.Errorf("unable to fetch file references by MIME type (%q): %w", mType, err) + } + fileEntries = append(fileEntries, entries...) + } + + refs, err := sc.referencesInTree(fileEntries) + if err != nil { + return nil, err + } + + sort.Sort(file.Resolutions(refs)) + + return refs, nil +} + +// add case for status.d/* like things that hook up directly into filetree.ListPaths() + +func (sc searchContext) SearchByGlob(pattern string, options ...LinkResolutionOption) ([]file.Resolution, error) { + log.WithFields("glob", pattern).Trace("searching filetree by glob") + + if sc.index == nil { + options = append(options, FollowBasenameLinks) + refs, err := sc.tree.FilesByGlob(pattern, options...) + if err != nil { + return nil, fmt.Errorf("unable to search by glob=%q: %w", pattern, err) + } + sort.Sort(file.Resolutions(refs)) + return refs, nil + } + + var allRefs []file.Resolution + for _, request := range parseGlob(pattern) { + refs, err := sc.searchByRequest(request, options...) + if err != nil { + return nil, fmt.Errorf("unable to search by glob=%q: %w", pattern, err) + } + allRefs = append(allRefs, refs...) + } + + sort.Sort(file.Resolutions(allRefs)) + + return allRefs, nil +} + +func (sc searchContext) searchByRequest(request searchRequest, options ...LinkResolutionOption) ([]file.Resolution, error) { + switch request.searchBasis { + case searchByFullPath: + options = append(options, FollowBasenameLinks) + ref, err := sc.SearchByPath(request.value, options...) + if err != nil { + return nil, err + } + if ref == nil { + return nil, nil + } + return []file.Resolution{*ref}, nil + case searchByBasename: + indexes, err := sc.index.GetByBasename(request.value) + if err != nil { + return nil, fmt.Errorf("unable to search by basename=%q: %w", request.value, err) + } + refs, err := sc.referencesWithRequirement(request.requirement, indexes) + if err != nil { + return nil, err + } + return refs, nil + case searchByBasenameGlob: + indexes, err := sc.index.GetByBasenameGlob(request.value) + if err != nil { + return nil, fmt.Errorf("unable to search by basename-glob=%q: %w", request.value, err) + } + refs, err := sc.referencesWithRequirement(request.requirement, indexes) + if err != nil { + return nil, err + } + return refs, nil + case searchByExtension: + indexes, err := sc.index.GetByExtension(request.value) + if err != nil { + return nil, fmt.Errorf("unable to search by extension=%q: %w", request.value, err) + } + refs, err := sc.referencesWithRequirement(request.requirement, indexes) + if err != nil { + return nil, err + } + return refs, nil + case searchBySubDirectory: + return sc.searchByParentBasename(request) + + case searchByGlob: + log.WithFields("glob", request.value).Trace("glob provided is an expensive search, consider using a more specific indexed search") + + options = append(options, FollowBasenameLinks) + return sc.tree.FilesByGlob(request.value, options...) + } + + return nil, fmt.Errorf("invalid search request: %+v", request.searchBasis) +} + +func (sc searchContext) searchByParentBasename(request searchRequest) ([]file.Resolution, error) { + indexes, err := sc.index.GetByBasename(request.value) + if err != nil { + return nil, fmt.Errorf("unable to search by extension=%q: %w", request.value, err) + } + refs, err := sc.referencesWithRequirement(request.requirement, indexes) + if err != nil { + return nil, err + } + + var results []file.Resolution + for _, ref := range refs { + paths, err := sc.tree.ListPaths(ref.RequestPath) + if err != nil { + // this may not be a directory, that's alright, just continue... + continue + } + for _, p := range paths { + _, nestedRef, err := sc.tree.File(p, FollowBasenameLinks) + if err != nil { + return nil, fmt.Errorf("unable to fetch file reference from parent path %q for path=%q: %w", ref.RequestPath, p, err) + } + if !nestedRef.HasReference() { + continue + } + // note: the requirement was written for the parent, so we need to consider the new + // child path by adding /* to match all children + matches, err := matchesRequirement(*nestedRef, request.requirement+"/*") + if err != nil { + return nil, err + } + if matches { + results = append(results, *nestedRef) + } + } + } + + return results, nil +} + +func (sc searchContext) referencesWithRequirement(requirement string, entries []IndexEntry) ([]file.Resolution, error) { + refs, err := sc.referencesInTree(entries) + if err != nil { + return nil, err + } + + if requirement == "" { + return refs, nil + } + + var results []file.Resolution + for _, ref := range refs { + matches, err := matchesRequirement(ref, requirement) + if err != nil { + return nil, err + } + if matches { + results = append(results, ref) + } + } + + return results, nil +} + +func matchesRequirement(ref file.Resolution, requirement string) (bool, error) { + allRefPaths := ref.AllRequestPaths() + for _, p := range allRefPaths { + matched, err := doublestar.Match(requirement, string(p)) + if err != nil { + return false, fmt.Errorf("unable to match glob pattern=%q to path=%q: %w", requirement, p, err) + } + if matched { + return true, nil + } + } + return false, nil +} + +type cacheRequest struct { + RealPath file.Path +} + +type cacheResult struct { + Paths file.PathSet + Error error +} + +func (sc searchContext) allPathsToNode(fn *filenode.FileNode) ([]file.Path, error) { + if fn == nil { + return nil, nil + } + + observedPaths := file.NewPathSet() + + cache := map[cacheRequest]cacheResult{} + + paths, err := sc.pathsToNode(fn, observedPaths, cache) + if err != nil { + return nil, err + } + + pathsList := paths.List() + sort.Sort(file.Paths(pathsList)) + + // TODO: filter to only paths that exist in the tree + + return pathsList, nil +} + +func (sc searchContext) pathsToNode(fn *filenode.FileNode, observedPaths file.PathSet, cache map[cacheRequest]cacheResult) (file.PathSet, error) { + req := cacheRequest{ + RealPath: fn.RealPath, + } + + if result, ok := cache[req]; ok { + return result.Paths, result.Error + } + + paths, err := sc._pathsToNode(fn, observedPaths, cache) + + cache[req] = cacheResult{ + Paths: paths, + Error: err, + } + + return paths, err +} + +// nolint: funlen +func (sc searchContext) _pathsToNode(fn *filenode.FileNode, observedPaths file.PathSet, cache map[cacheRequest]cacheResult) (file.PathSet, error) { + if fn == nil { + return nil, nil + } + + paths := file.NewPathSet() + paths.Add(fn.RealPath) + + if observedPaths != nil { + if observedPaths.Contains(fn.RealPath) { + // we've already observed this path, so we can stop here + return nil, nil + } + observedPaths.Add(fn.RealPath) + } + + nodeID := fn.ID() + + addPath := func(suffix string, ps ...file.Path) { + for _, p := range ps { + if suffix != "" { + p = file.Path(path.Join(string(p), suffix)) + } + paths.Add(p) + } + } + + // add all paths to the node that are linked to it + for _, linkSrcID := range sc.linkBackwardRefs[nodeID].List() { + pfn := sc.tree.tree.Node(linkSrcID) + if pfn == nil { + log.WithFields("id", nodeID, "parent", linkSrcID).Warn("found non-existent parent link") + continue + } + linkSrcPaths, err := sc.pathsToNode(pfn.(*filenode.FileNode), observedPaths, cache) + if err != nil { + return nil, err + } + + addPath("", linkSrcPaths.List()...) + } + + // crawl up the tree to find all paths to our parent and repeat + for _, p := range paths.List() { + nextNestedSuffix := p.Basename() + allParentPaths := p.ConstituentPaths() + sort.Sort(sort.Reverse(file.Paths(allParentPaths))) + + for _, pp := range allParentPaths { + if pp == "/" { + break + } + + nestedSuffix := nextNestedSuffix + nextNestedSuffix = path.Join(pp.Basename(), nestedSuffix) + + pna, err := sc.tree.node(pp, linkResolutionStrategy{ + FollowAncestorLinks: true, + FollowBasenameLinks: false, + }) + if err != nil { + return nil, fmt.Errorf("unable to get parent node for path=%q: %w", pp, err) + } + + if !pna.HasFileNode() { + continue + } + + parentLinkPaths, err := sc.pathsToNode(pna.FileNode, observedPaths, cache) + if err != nil { + return nil, err + } + addPath(nestedSuffix, parentLinkPaths.List()...) + } + } + observedPaths.Remove(fn.RealPath) + + return paths, nil +} + +func (sc searchContext) fileNodesInTree(fileEntries []IndexEntry) ([]*filenode.FileNode, error) { + var nodes []*filenode.FileNode +allFileEntries: + for _, entry := range fileEntries { + // note: it is important that we don't enable any basename link resolution + na, err := sc.tree.file(entry.Reference.RealPath) + if err != nil { + return nil, fmt.Errorf("unable to get ref for path=%q: %w", entry.Reference.RealPath, err) + } + + if !na.HasFileNode() { + continue + } + + // only check the resolved node matches the index entries reference, not via link resolutions... + if na.FileNode.Reference != nil && na.FileNode.Reference.ID() == entry.Reference.ID() { + nodes = append(nodes, na.FileNode) + continue allFileEntries + } + + // we did not find a matching file ID in the tree, so drop this entry + } + return nodes, nil +} + +// referencesInTree does two things relative to the index entries given: +// 1) it expands the index entries to include all possible access paths to the file node (by considering all possible link resolutions) +// 2) it filters the index entries to only include those that exist in the tree +func (sc searchContext) referencesInTree(fileEntries []IndexEntry) ([]file.Resolution, error) { + var refs []file.Resolution + + for _, entry := range fileEntries { + na, err := sc.tree.file(entry.Reference.RealPath, FollowBasenameLinks) + if err != nil { + return nil, fmt.Errorf("unable to get ref for path=%q: %w", entry.Reference.RealPath, err) + } + + // this filters out any index entries that do not exist in the tree + if !na.HasFileNode() { + continue + } + + // expand the index results with more possible access paths from the link resolution cache + var expandedRefs []file.Resolution + allPathsToNode, err := sc.allPathsToNode(na.FileNode) + if err != nil { + return nil, fmt.Errorf("unable to get all paths to node for path=%q: %w", entry.Reference.RealPath, err) + } + for _, p := range allPathsToNode { + _, ref, err := sc.tree.File(p, FollowBasenameLinks) + if err != nil { + return nil, fmt.Errorf("unable to get ref for path=%q: %w", p, err) + } + if !ref.HasReference() { + continue + } + expandedRefs = append(expandedRefs, *ref) + } + + for _, ref := range expandedRefs { + for _, accessRef := range ref.References() { + if accessRef.ID() == entry.Reference.ID() { + // we know this entry exists in the tree, keep track of the reference for this file + refs = append(refs, ref) + } + } + } + } + return refs, nil +} diff --git a/pkg/filetree/search_test.go b/pkg/filetree/search_test.go new file mode 100644 index 00000000..6033646c --- /dev/null +++ b/pkg/filetree/search_test.go @@ -0,0 +1,1015 @@ +package filetree + +import ( + "fmt" + "github.com/anchore/stereoscope/pkg/file" + "github.com/anchore/stereoscope/pkg/filetree/filenode" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "testing" +) + +func Test_searchContext_SearchByPath(t *testing.T) { + type fields struct { + tree *FileTree + index Index + } + type args struct { + path string + options []LinkResolutionOption + } + + tree := New() + ref, err := tree.AddFile("/path/to/file.txt") + require.NoError(t, err) + require.NotNil(t, ref) + + idx := NewIndex() + idx.Add(*ref, file.Metadata{MIMEType: "plain/text"}) + + defaultFields := fields{ + tree: tree, + index: idx, + } + + tests := []struct { + name string + fields fields + args args + want *file.Resolution + wantErr require.ErrorAssertionFunc + }{ + { + name: "path exists", + fields: defaultFields, + args: args{ + path: "/path/to/file.txt", + }, + want: &file.Resolution{ + RequestPath: "/path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", + }, + }, + }, + { + name: "path does not exists", + fields: defaultFields, + args: args{ + path: "/NOT/path/to/file.txt", + }, + want: nil, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + i := searchContext{ + tree: tt.fields.tree, + index: tt.fields.index, + } + got, err := i.SearchByPath(tt.args.path, tt.args.options...) + tt.wantErr(t, err, fmt.Sprintf("SearchByPath(%v, %v)", tt.args.path, tt.args.options)) + if err != nil { + return + } + + opts := []cmp.Option{ + cmpopts.IgnoreFields(file.Reference{}, "id"), + } + + if d := cmp.Diff(tt.want, got, opts...); d != "" { + t.Errorf("SearchByPath() mismatch (-want +got):\n%s", d) + } + }) + } +} + +func Test_searchContext_SearchByGlob(t *testing.T) { + type fields struct { + tree *FileTree + index Index + } + type args struct { + glob string + options []LinkResolutionOption + } + + tree := New() + doubleLinkToPathRef, err := tree.AddSymLink("/double-link-to-path", "/link-to-path") + require.NoError(t, err) + require.NotNil(t, doubleLinkToPathRef) + + linkToPathRef, err := tree.AddSymLink("/link-to-path", "/path") + require.NoError(t, err) + require.NotNil(t, linkToPathRef) + + linkToFileRef, err := tree.AddSymLink("/link-to-file", "/path/to/file.txt") + require.NoError(t, err) + require.NotNil(t, linkToFileRef) + + fileRef, err := tree.AddFile("/path/to/file.txt") + require.NoError(t, err) + require.NotNil(t, fileRef) + + toRef, err := tree.AddDir("/path/to") + require.NoError(t, err) + require.NotNil(t, toRef) + + idx := NewIndex() + idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeRegular}) + idx.Add(*linkToFileRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*linkToPathRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*doubleLinkToPathRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*toRef, file.Metadata{Type: file.TypeDirectory}) + + defaultFields := fields{ + tree: tree, + index: idx, + } + + tests := []struct { + name string + fields fields + args args + want []file.Resolution + wantErr require.ErrorAssertionFunc + }{ + { + name: "path exists", + fields: defaultFields, + args: args{ + glob: "/**/t?/fil?.txt", + }, + // note: result "/link-to-file" resolves to the file but does not show up since the request path + // does not match the requirement glob + want: []file.Resolution{ + { + + RequestPath: "/path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", + }, + }, + { + + RequestPath: "/double-link-to-path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", + }, + }, + { + + RequestPath: "/link-to-path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", + }, + }, + }, + }, + { + name: "ancestor access path exists", + fields: defaultFields, + args: args{ + // note: this is a glob through a symlink (ancestor). If not using the index, this will work + // just fine, since we do a full tree search. However, if using the index, this shortcut will + // dodge any ancestor symlink and will not find the file. + glob: "**/link-to-path/to/file.txt", + }, + want: []file.Resolution{ + { + RequestPath: "/link-to-path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", + }, + }, + }, + }, + { + name: "access all children", + fields: defaultFields, + args: args{ + glob: "**/path/to/*", + }, + want: []file.Resolution{ + { + RequestPath: "/path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", + }, + }, + }, + }, + { + name: "access all children as path", + fields: defaultFields, + args: args{ + glob: "/path/to/*", + }, + want: []file.Resolution{ + { + RequestPath: "/path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", + }, + }, + }, + }, + { + name: "access via symlink for all children", + fields: defaultFields, + args: args{ + glob: "**/link-to-path/to/*", + }, + want: []file.Resolution{ + { + RequestPath: "/link-to-path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", + }, + }, + }, + }, + { + name: "multi ancestor access path exists", + fields: defaultFields, + args: args{ + // note: this is a glob through a symlink (ancestor). If not using the index, this will work + // just fine, since we do a full tree search. However, if using the index, this shortcut will + // dodge any ancestor symlink and will not find the file. + glob: "**/double-link-to-path/to/file.txt", + }, + want: []file.Resolution{ + { + RequestPath: "/double-link-to-path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", + }, + }, + }, + }, + { + name: "leaf access path exists", + fields: defaultFields, + args: args{ + glob: "**/link-to-file", + }, + want: []file.Resolution{ + { + RequestPath: "/link-to-file", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", + }, + LinkResolutions: []file.Resolution{ + { + RequestPath: "/link-to-file", + Reference: &file.Reference{ + RealPath: "/link-to-file", + }, + }, + }, + }, + }, + }, + { + name: "ancestor access path exists", + fields: defaultFields, + args: args{ + // note: this is a glob through a symlink (ancestor). If not using the index, this will work + // just fine, since we do a full tree search. However, if using the index, this shortcut will + // dodge any ancestor symlink and will not find the file. + glob: "**/link-to-path/to/file.txt", + }, + want: []file.Resolution{ + { + RequestPath: "/link-to-path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", + }, + }, + }, + }, + { + name: "by extension", + fields: defaultFields, + args: args{ + // note: this is a glob through a symlink (ancestor). If not using the index, this will work + // just fine, since we do a full tree search. However, if using the index, this shortcut will + // dodge any ancestor symlink and will not find the file. + glob: "**/*.txt", + }, + want: []file.Resolution{ + { + RequestPath: "/path/to/file.txt", + Reference: &file.Reference{RealPath: "/path/to/file.txt"}, + }, + { + RequestPath: "/double-link-to-path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", + }, + }, + // note: this is NOT expected since the input glob does not match against the request path + //{ + // Resolution: file.Resolution{ + // RequestPath: "/link-to-file", + // Reference: &file.Reference{ + // RealPath: "/path/to/file.txt", + // }, + // }, + // LinkResolutions: []file.Resolution{ + // { + // RequestPath: "/link-to-file", + // Reference: &file.Reference{RealPath: "/link-to-file"}, + // }, + // }, + //}, + { + RequestPath: "/link-to-path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", + }, + }, + }, + }, + { + name: "path does not exists", + fields: defaultFields, + args: args{ + glob: "/NOT/**/file", + }, + want: nil, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + sc := NewSearchContext(tt.fields.tree, tt.fields.index) + got, err := sc.SearchByGlob(tt.args.glob, tt.args.options...) + tt.wantErr(t, err, fmt.Sprintf("SearchByGlob(%v, %v)", tt.args.glob, tt.args.options)) + if err != nil { + return + } + + opts := []cmp.Option{ + cmpopts.IgnoreFields(file.Reference{}, "id"), + } + + if d := cmp.Diff(tt.want, got, opts...); d != "" { + t.Errorf("SearchByGlob() mismatch (-want +got):\n%s", d) + } + + expected, err := tt.fields.tree.FilesByGlob(tt.args.glob, tt.args.options...) + require.NoError(t, err) + + if d := cmp.Diff(expected, got, opts...); d != "" { + t.Errorf("Difference relative to tree results mismatch (-want +got):\n%s", d) + } + }) + } +} + +func Test_searchContext_SearchByMIMEType(t *testing.T) { + type fields struct { + tree *FileTree + index Index + } + type args struct { + mimeTypes string + } + + tree := New() + ref, err := tree.AddFile("/path/to/file.txt") + require.NoError(t, err) + require.NotNil(t, ref) + + idx := NewIndex() + idx.Add(*ref, file.Metadata{MIMEType: "plain/text"}) + + defaultFields := fields{ + tree: tree, + index: idx, + } + + tests := []struct { + name string + fields fields + args args + want []file.Resolution + wantErr require.ErrorAssertionFunc + }{ + { + name: "types exists", + fields: defaultFields, + args: args{ + mimeTypes: "plain/text", + }, + want: []file.Resolution{ + { + RequestPath: "/path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", + }, + }, + }, + }, + { + name: "types do not exists", + fields: defaultFields, + args: args{ + mimeTypes: "octetstream", + }, + want: nil, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + i := searchContext{ + tree: tt.fields.tree, + index: tt.fields.index, + } + got, err := i.SearchByMIMEType(tt.args.mimeTypes) + tt.wantErr(t, err, fmt.Sprintf("SearchByMIMEType(%v)", tt.args.mimeTypes)) + if err != nil { + return + } + + opts := []cmp.Option{ + cmpopts.IgnoreFields(file.Reference{}, "id"), + } + + if d := cmp.Diff(tt.want, got, opts...); d != "" { + t.Errorf("SearchByMIMEType() mismatch (-want +got):\n%s", d) + } + }) + } +} + +func Test_searchContext_allPathsToNode(t *testing.T) { + type input struct { + query *filenode.FileNode + sc *searchContext + } + + tests := []struct { + name string + input input + want []file.Path + wantErr require.ErrorAssertionFunc + }{ + { + name: "simple dir", + want: []file.Path{ + "/path/to", + }, + input: func() input { + tree := New() + + fileRef, err := tree.AddFile("/path/to/file.txt") + require.NoError(t, err) + require.NotNil(t, fileRef) + + idx := NewIndex() + idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeRegular}) + + na, err := tree.node("/path/to", linkResolutionStrategy{ + FollowAncestorLinks: false, + FollowBasenameLinks: false, + DoNotFollowDeadBasenameLinks: false, + }) + require.NoError(t, err) + require.NotNil(t, na) + require.NotNil(t, na.FileNode) + require.Equal(t, file.Path("/path/to"), na.FileNode.RealPath) + + return input{ + query: na.FileNode, + sc: NewSearchContext(tree, idx).(*searchContext), + } + }(), + }, + { + name: "dead symlink", + want: []file.Path{ + "/path/to/file.txt", + }, + input: func() input { + tree := New() + + deafLinkRef, err := tree.AddSymLink("/link-to-file", "/path/to/dead/file.txt") + require.NoError(t, err) + require.NotNil(t, deafLinkRef) + + fileRef, err := tree.AddFile("/path/to/file.txt") + require.NoError(t, err) + require.NotNil(t, fileRef) + + idx := NewIndex() + idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeRegular}) + idx.Add(*deafLinkRef, file.Metadata{Type: file.TypeSymLink}) + + na, err := tree.node(fileRef.RealPath, linkResolutionStrategy{ + FollowAncestorLinks: false, + FollowBasenameLinks: false, + DoNotFollowDeadBasenameLinks: false, + }) + require.NoError(t, err) + require.NotNil(t, na) + require.NotNil(t, na.FileNode) + require.Equalf(t, fileRef.ID(), na.FileNode.Reference.ID(), "query node should be the same as the file node") + + return input{ + query: na.FileNode, + sc: NewSearchContext(tree, idx).(*searchContext), + } + }(), + }, + { + name: "symlink triangle cycle", + want: []file.Path{ + "/1", + "/2", + "/3", + }, + input: func() input { + tree := New() + + link1, err := tree.AddSymLink("/1", "/2") + require.NoError(t, err) + require.NotNil(t, link1) + + link2, err := tree.AddSymLink("/2", "/3") + require.NoError(t, err) + require.NotNil(t, link2) + + link3, err := tree.AddSymLink("/3", "/1") + require.NoError(t, err) + require.NotNil(t, link3) + + idx := NewIndex() + idx.Add(*link1, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*link2, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*link3, file.Metadata{Type: file.TypeSymLink}) + + na, err := tree.node(link1.RealPath, linkResolutionStrategy{ + FollowAncestorLinks: false, + FollowBasenameLinks: false, + DoNotFollowDeadBasenameLinks: false, + }) + require.NoError(t, err) + require.NotNil(t, na) + require.NotNil(t, na.FileNode) + require.Equalf(t, link1.ID(), na.FileNode.Reference.ID(), "query node should be the same as the first link") + + return input{ + query: na.FileNode, + sc: NewSearchContext(tree, idx).(*searchContext), + } + }(), + }, + { + // note: this isn't a real link cycle, but it does look like one while resolving from a leaf to the root + name: "reverse symlink cycle", + want: []file.Path{ + "/bin/ttyd", + "/usr/bin/X11/ttyd", + "/usr/bin/ttyd", + }, + input: func() input { + tree := New() + + usrRef, err := tree.AddDir("/usr") + require.NoError(t, err) + require.NotNil(t, usrRef) + + usrBinRef, err := tree.AddDir("/usr/bin") + require.NoError(t, err) + require.NotNil(t, usrBinRef) + + ttydRef, err := tree.AddFile("/usr/bin/ttyd") + require.NoError(t, err) + require.NotNil(t, ttydRef) + + binLinkRef, err := tree.AddSymLink("/bin", "usr/bin") + require.NoError(t, err) + require.NotNil(t, binLinkRef) + + x11LinkRef, err := tree.AddSymLink("/usr/bin/X11", ".") + require.NoError(t, err) + require.NotNil(t, x11LinkRef) + + idx := NewIndex() + idx.Add(*usrRef, file.Metadata{Type: file.TypeDirectory}) + idx.Add(*usrBinRef, file.Metadata{Type: file.TypeDirectory}) + idx.Add(*binLinkRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*x11LinkRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*ttydRef, file.Metadata{Type: file.TypeRegular}) + + na, err := tree.node(ttydRef.RealPath, linkResolutionStrategy{ + FollowAncestorLinks: false, + FollowBasenameLinks: false, + DoNotFollowDeadBasenameLinks: false, + }) + require.NoError(t, err) + require.NotNil(t, na) + require.NotNil(t, na.FileNode) + require.Equalf(t, ttydRef.ID(), na.FileNode.Reference.ID(), "query node should be the same as usr/bin/ttyd binary") + + return input{ + query: na.FileNode, + sc: NewSearchContext(tree, idx).(*searchContext), + } + }(), + }, + { + name: "single leaf symlink", + want: []file.Path{ + "/link-to-file", + "/path/to/file.txt", + }, + input: func() input { + tree := New() + + linkToFileRef, err := tree.AddSymLink("/link-to-file", "/path/to/file.txt") + require.NoError(t, err) + require.NotNil(t, linkToFileRef) + + fileRef, err := tree.AddFile("/path/to/file.txt") + require.NoError(t, err) + require.NotNil(t, fileRef) + + idx := NewIndex() + idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeRegular}) + idx.Add(*linkToFileRef, file.Metadata{Type: file.TypeSymLink}) + + na, err := tree.node(fileRef.RealPath, linkResolutionStrategy{ + FollowAncestorLinks: false, + FollowBasenameLinks: false, + DoNotFollowDeadBasenameLinks: false, + }) + require.NoError(t, err) + require.NotNil(t, na) + require.NotNil(t, na.FileNode) + require.Equalf(t, fileRef.ID(), na.FileNode.Reference.ID(), "query node should be the same as the file node") + + return input{ + query: na.FileNode, + sc: NewSearchContext(tree, idx).(*searchContext), + } + }(), + }, + { + name: "2 deep leaf symlink", + want: []file.Path{ + "/double-link-to-file", + "/link-to-file", + "/path/to/file.txt", + }, + input: func() input { + tree := New() + + doubleLinkToFileRef, err := tree.AddSymLink("/double-link-to-file", "/link-to-file") + require.NoError(t, err) + require.NotNil(t, doubleLinkToFileRef) + + linkToFileRef, err := tree.AddSymLink("/link-to-file", "/path/to/file.txt") + require.NoError(t, err) + require.NotNil(t, linkToFileRef) + + fileRef, err := tree.AddFile("/path/to/file.txt") + require.NoError(t, err) + require.NotNil(t, fileRef) + + idx := NewIndex() + idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeRegular}) + idx.Add(*linkToFileRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*doubleLinkToFileRef, file.Metadata{Type: file.TypeSymLink}) + + na, err := tree.node(fileRef.RealPath, linkResolutionStrategy{ + FollowAncestorLinks: false, + FollowBasenameLinks: false, + DoNotFollowDeadBasenameLinks: false, + }) + require.NoError(t, err) + require.NotNil(t, na) + require.NotNil(t, na.FileNode) + require.Equalf(t, fileRef.ID(), na.FileNode.Reference.ID(), "query node should be the same as the file node") + + return input{ + query: na.FileNode, + sc: NewSearchContext(tree, idx).(*searchContext), + } + }(), + }, + { + name: "single ancestor symlink", + want: []file.Path{ + "/link-to-to/file.txt", + "/path/to/file.txt", + }, + input: func() input { + tree := New() + + dirTo, err := tree.AddDir("/path/to") + require.NoError(t, err) + require.NotNil(t, dirTo) + + linkToToRef, err := tree.AddSymLink("/link-to-to", "/path/to") + require.NoError(t, err) + require.NotNil(t, linkToToRef) + + fileRef, err := tree.AddFile("/path/to/file.txt") + require.NoError(t, err) + require.NotNil(t, fileRef) + + idx := NewIndex() + idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeRegular}) + idx.Add(*linkToToRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*dirTo, file.Metadata{Type: file.TypeDirectory}) + + na, err := tree.node(fileRef.RealPath, linkResolutionStrategy{ + FollowAncestorLinks: false, + FollowBasenameLinks: false, + DoNotFollowDeadBasenameLinks: false, + }) + require.NoError(t, err) + require.NotNil(t, na) + require.NotNil(t, na.FileNode) + require.Equalf(t, fileRef.ID(), na.FileNode.Reference.ID(), "query node should be the same as the file node") + + return input{ + query: na.FileNode, + sc: NewSearchContext(tree, idx).(*searchContext), + } + }(), + }, + { + name: "2 deep, single sibling ancestor symlink", + want: []file.Path{ + "/link-to-path/to/file.txt", + "/link-to-to/file.txt", + "/path/to/file.txt", + }, + input: func() input { + tree := New() + + dirTo, err := tree.AddDir("/path/to") + require.NoError(t, err) + require.NotNil(t, dirTo) + + linkToPathRef, err := tree.AddSymLink("/link-to-path", "/path") + require.NoError(t, err) + require.NotNil(t, linkToPathRef) + + linkToToRef, err := tree.AddSymLink("/link-to-to", "/path/to") + require.NoError(t, err) + require.NotNil(t, linkToToRef) + + fileRef, err := tree.AddFile("/path/to/file.txt") + require.NoError(t, err) + require.NotNil(t, fileRef) + + idx := NewIndex() + idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeRegular}) + idx.Add(*linkToToRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*linkToPathRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*dirTo, file.Metadata{Type: file.TypeDirectory}) + + na, err := tree.node(fileRef.RealPath, linkResolutionStrategy{ + FollowAncestorLinks: false, + FollowBasenameLinks: false, + DoNotFollowDeadBasenameLinks: false, + }) + require.NoError(t, err) + require.NotNil(t, na) + require.NotNil(t, na.FileNode) + require.Equalf(t, fileRef.ID(), na.FileNode.Reference.ID(), "query node should be the same as the file node") + + return input{ + query: na.FileNode, + sc: NewSearchContext(tree, idx).(*searchContext), + } + }(), + }, + { + name: "2 deep, multiple sibling ancestor symlink", + want: []file.Path{ + "/another-link-to-path/to/file.txt", + "/another-link-to-to/file.txt", + "/link-to-path/to/file.txt", + "/link-to-to/file.txt", + "/path/to/file.txt", + }, + input: func() input { + tree := New() + + dirTo, err := tree.AddDir("/path/to") + require.NoError(t, err) + require.NotNil(t, dirTo) + + linkToPathRef, err := tree.AddSymLink("/link-to-path", "/path") + require.NoError(t, err) + require.NotNil(t, linkToPathRef) + + anotherLinkToPathRef, err := tree.AddSymLink("/another-link-to-path", "/path") + require.NoError(t, err) + require.NotNil(t, anotherLinkToPathRef) + + linkToToRef, err := tree.AddSymLink("/link-to-to", "/path/to") + require.NoError(t, err) + require.NotNil(t, linkToToRef) + + anotherLinkToToRef, err := tree.AddSymLink("/another-link-to-to", "/path/to") + require.NoError(t, err) + require.NotNil(t, anotherLinkToToRef) + + fileRef, err := tree.AddFile("/path/to/file.txt") + require.NoError(t, err) + require.NotNil(t, fileRef) + + idx := NewIndex() + idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeRegular}) + idx.Add(*linkToToRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*linkToPathRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*anotherLinkToPathRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*anotherLinkToToRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*dirTo, file.Metadata{Type: file.TypeDirectory}) + + na, err := tree.node(fileRef.RealPath, linkResolutionStrategy{ + FollowAncestorLinks: false, + FollowBasenameLinks: false, + DoNotFollowDeadBasenameLinks: false, + }) + require.NoError(t, err) + require.NotNil(t, na) + require.NotNil(t, na.FileNode) + require.Equalf(t, fileRef.ID(), na.FileNode.Reference.ID(), "query node should be the same as the file node") + + return input{ + query: na.FileNode, + sc: NewSearchContext(tree, idx).(*searchContext), + } + }(), + }, + { + name: "2 deep, multiple nested ancestor symlink", + want: []file.Path{ + "/link-to-path/link-to-another/file.txt", + "/link-to-path/to/another/file.txt", + "/link-to-path/to/link-to-file", + "/link-to-to/another/file.txt", + "/link-to-to/link-to-file", + "/path/link-to-another/file.txt", + "/path/to/another/file.txt", + "/path/to/link-to-file", + }, + input: func() input { + tree := New() + + linkToAnotherViaLinkRef, err := tree.AddSymLink("/path/link-to-another", "/link-to-to/another") + require.NoError(t, err) + require.NotNil(t, linkToAnotherViaLinkRef) + + linkToPathRef, err := tree.AddSymLink("/link-to-path", "/path") + require.NoError(t, err) + require.NotNil(t, linkToPathRef) + + linkToToRef, err := tree.AddSymLink("/link-to-to", "/path/to") + require.NoError(t, err) + require.NotNil(t, linkToToRef) + + pathToLinkToFileRef, err := tree.AddSymLink("/path/to/link-to-file", "/path/to/another/file.txt") + require.NoError(t, err) + require.NotNil(t, pathToLinkToFileRef) + + dirTo, err := tree.AddDir("/path/to") + require.NoError(t, err) + require.NotNil(t, dirTo) + + dirAnother, err := tree.AddDir("/path/to/another") + require.NoError(t, err) + require.NotNil(t, dirAnother) + + fileRef, err := tree.AddFile("/path/to/another/file.txt") + require.NoError(t, err) + require.NotNil(t, fileRef) + + idx := NewIndex() + idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeRegular}) + idx.Add(*linkToAnotherViaLinkRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*linkToPathRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*linkToToRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*pathToLinkToFileRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*dirTo, file.Metadata{Type: file.TypeDirectory}) + idx.Add(*dirAnother, file.Metadata{Type: file.TypeDirectory}) + + na, err := tree.node(fileRef.RealPath, linkResolutionStrategy{ + FollowAncestorLinks: false, + FollowBasenameLinks: false, + DoNotFollowDeadBasenameLinks: false, + }) + require.NoError(t, err) + require.NotNil(t, na) + require.NotNil(t, na.FileNode) + require.Equalf(t, fileRef.ID(), na.FileNode.Reference.ID(), "query node should be the same as the file node") + + return input{ + query: na.FileNode, + sc: NewSearchContext(tree, idx).(*searchContext), + } + }(), + }, + { + name: "relative, 2 deep, multiple nested ancestor symlink", + want: []file.Path{ + "/link-to-path/link-to-another/file.txt", + "/link-to-path/to/another/file.txt", + "/link-to-path/to/link-to-file", + "/link-to-to/another/file.txt", + "/link-to-to/link-to-file", + "/path/link-to-another/file.txt", + "/path/to/another/file.txt", + "/path/to/link-to-file", + }, + input: func() input { + tree := New() + + linkToAnotherViaLinkRef, err := tree.AddSymLink("/path/link-to-another", "../link-to-to/another") + require.NoError(t, err) + require.NotNil(t, linkToAnotherViaLinkRef) + + linkToPathRef, err := tree.AddSymLink("/link-to-path", "./path") + require.NoError(t, err) + require.NotNil(t, linkToPathRef) + + linkToToRef, err := tree.AddSymLink("/link-to-to", "./path/to") + require.NoError(t, err) + require.NotNil(t, linkToToRef) + + pathToLinkToFileRef, err := tree.AddSymLink("/path/to/link-to-file", "../to/another/file.txt") + require.NoError(t, err) + require.NotNil(t, pathToLinkToFileRef) + + dirTo, err := tree.AddDir("/path/to") + require.NoError(t, err) + require.NotNil(t, dirTo) + + dirAnother, err := tree.AddDir("/path/to/another") + require.NoError(t, err) + require.NotNil(t, dirAnother) + + fileRef, err := tree.AddFile("/path/to/another/file.txt") + require.NoError(t, err) + require.NotNil(t, fileRef) + + idx := NewIndex() + idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeRegular}) + idx.Add(*linkToAnotherViaLinkRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*linkToPathRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*linkToToRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*pathToLinkToFileRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*dirTo, file.Metadata{Type: file.TypeDirectory}) + idx.Add(*dirAnother, file.Metadata{Type: file.TypeDirectory}) + + na, err := tree.node(fileRef.RealPath, linkResolutionStrategy{ + FollowAncestorLinks: false, + FollowBasenameLinks: false, + DoNotFollowDeadBasenameLinks: false, + }) + require.NoError(t, err) + require.NotNil(t, na) + require.NotNil(t, na.FileNode) + require.Equalf(t, fileRef.ID(), na.FileNode.Reference.ID(), "query node should be the same as the file node") + + return input{ + query: na.FileNode, + sc: NewSearchContext(tree, idx).(*searchContext), + } + }(), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + + got, err := tt.input.sc.allPathsToNode(tt.input.query) + tt.wantErr(t, err, fmt.Sprintf("allPathsToNode(%v)", tt.input.query)) + if err != nil { + return + } + + assert.ElementsMatchf(t, tt.want, got, cmp.Diff(tt.want, got), "expected and actual paths should match") + }) + } +} diff --git a/pkg/filetree/union_filetree.go b/pkg/filetree/union_filetree.go index 5e30c0a9..c1606a42 100644 --- a/pkg/filetree/union_filetree.go +++ b/pkg/filetree/union_filetree.go @@ -3,28 +3,28 @@ package filetree import "fmt" type UnionFileTree struct { - trees []*FileTree + trees []ReadWriter } func NewUnionFileTree() *UnionFileTree { return &UnionFileTree{ - trees: make([]*FileTree, 0), + trees: make([]ReadWriter, 0), } } -func (u *UnionFileTree) PushTree(t *FileTree) { +func (u *UnionFileTree) PushTree(t ReadWriter) { u.trees = append(u.trees, t) } -func (u *UnionFileTree) Squash() (*FileTree, error) { +func (u *UnionFileTree) Squash() (ReadWriter, error) { switch len(u.trees) { case 0: - return NewFileTree(), nil + return New(), nil case 1: return u.trees[0].Copy() } - var squashedTree *FileTree + var squashedTree ReadWriter var err error for layerIdx, refTree := range u.trees { if layerIdx == 0 { @@ -35,7 +35,7 @@ func (u *UnionFileTree) Squash() (*FileTree, error) { continue } - if err = squashedTree.merge(refTree); err != nil { + if err = squashedTree.Merge(refTree); err != nil { return nil, fmt.Errorf("unable to squash layer=%d : %w", layerIdx, err) } } diff --git a/pkg/filetree/union_filetree_test.go b/pkg/filetree/union_filetree_test.go index caeacc14..adbd6103 100644 --- a/pkg/filetree/union_filetree_test.go +++ b/pkg/filetree/union_filetree_test.go @@ -8,7 +8,7 @@ import ( func TestUnionFileTree_Squash(t *testing.T) { ut := NewUnionFileTree() - base := NewFileTree() + base := New() base.AddFile("/home/wagoodman/some/stuff-1.txt") originalNode, _ := base.AddFile("/home/wagoodman/some/stuff-2-overlap.txt") @@ -16,7 +16,7 @@ func TestUnionFileTree_Squash(t *testing.T) { originalMore, _ := base.AddFile("/home/wagoodman/more") originalMoreDir, _ := base.AddDir("/home/wagoodman/moredir") - top := NewFileTree() + top := New() top.AddFile("/etc/redhat-release") // note: override /home/wagoodman/more (a file) as a directory top.AddFile("/home/wagoodman/more/things.txt") @@ -65,7 +65,7 @@ func TestUnionFileTree_Squash(t *testing.T) { } _, f, _ = base.File("/home/wagoodman/more") - if f == nil { + if f == nil || f.Reference == nil { t.Fatal("base was never created") } @@ -74,17 +74,17 @@ func TestUnionFileTree_Squash(t *testing.T) { } _, f, _ = top.File("/home/wagoodman/more") - if f != nil { + if f.Reference != nil { t.Fatal("top file should have been implicitly nil but wasn't") } _, f, _ = squashed.File("/home/wagoodman/more") - if f != nil { + if f.Reference != nil { t.Fatal("file override to a dir has original properties") } _, f, _ = squashed.File("/home/wagoodman/moredir") - if f == nil { + if f == nil || f.Reference == nil { t.Fatal("dir override to a dir is missing original properties") } if originalMoreDir.ID() != f.ID() { @@ -95,13 +95,13 @@ func TestUnionFileTree_Squash(t *testing.T) { func TestUnionFileTree_Squash_whiteout(t *testing.T) { ut := NewUnionFileTree() - base := NewFileTree() + base := New() base.AddFile("/some/stuff-1.txt") base.AddFile("/some/stuff-2.txt") base.AddFile("/other/things-1.txt") - top := NewFileTree() + top := New() top.AddFile("/some/" + file.OpaqueWhiteout) top.AddFile("/other/" + file.WhiteoutPrefix + "things-1.txt") diff --git a/pkg/image/content_helpers.go b/pkg/image/content_helpers.go index 50709085..b38f811a 100644 --- a/pkg/image/content_helpers.go +++ b/pkg/image/content_helpers.go @@ -8,43 +8,20 @@ import ( "github.com/anchore/stereoscope/pkg/filetree" ) -// fetchFileContentsByPath is a common helper function for resolving the file contents for a path from the file +// fetchReaderByPath is a common helper function for resolving the file contents for a path from the file // catalog relative to the given tree. -func fetchFileContentsByPath(ft *filetree.FileTree, fileCatalog *FileCatalog, path file.Path) (io.ReadCloser, error) { - exists, fileReference, err := ft.File(path, filetree.FollowBasenameLinks) +func fetchReaderByPath(ft filetree.Reader, fileCatalog FileCatalogReader, path file.Path) (io.ReadCloser, error) { + exists, refVia, err := ft.File(path, filetree.FollowBasenameLinks) if err != nil { return nil, err } - if !exists && fileReference == nil { + if !exists && refVia == nil || refVia.Reference == nil { return nil, fmt.Errorf("could not find file path in Tree: %s", path) } - reader, err := fileCatalog.FileContents(*fileReference) + reader, err := fileCatalog.Open(*refVia.Reference) if err != nil { return nil, err } return reader, nil } - -// fetchFileContentsByPath is a common helper function for resolving file references for a MIME type from the file -// catalog relative to the given tree. -func fetchFilesByMIMEType(ft *filetree.FileTree, fileCatalog *FileCatalog, mType string) ([]file.Reference, error) { - fileEntries, err := fileCatalog.GetByMIMEType(mType) - if err != nil { - return nil, fmt.Errorf("unable to fetch file references by MIME type: %w", err) - } - - var refs []file.Reference - for _, entry := range fileEntries { - _, ref, err := ft.File(entry.File.RealPath, filetree.FollowBasenameLinks) - if err != nil { - return nil, fmt.Errorf("unable to get ref for path=%q: %w", entry.File.RealPath, err) - } - - // we know this entry exists in the tree, keep track of the reference for this file - if ref != nil && ref.ID() == entry.File.ID() { - refs = append(refs, *ref) - } - } - return refs, nil -} diff --git a/pkg/image/docker/tarball_provider.go b/pkg/image/docker/tarball_provider.go index 6eb5ce73..663f809e 100644 --- a/pkg/image/docker/tarball_provider.go +++ b/pkg/image/docker/tarball_provider.go @@ -82,5 +82,5 @@ func (p *TarballImageProvider) Provide(_ context.Context, userMetadata ...image. return nil, err } - return image.NewImage(img, contentTempDir, metadata...), nil + return image.New(img, contentTempDir, metadata...), nil } diff --git a/pkg/image/file_catalog.go b/pkg/image/file_catalog.go index 3f62ed97..e8ec637a 100644 --- a/pkg/image/file_catalog.go +++ b/pkg/image/file_catalog.go @@ -6,104 +6,69 @@ import ( "sync" "github.com/anchore/stereoscope/pkg/file" + "github.com/anchore/stereoscope/pkg/filetree" ) -var ErrFileNotFound = fmt.Errorf("could not find file") +type FileCatalogReader interface { + Layer(file.Reference) *Layer + Open(file.Reference) (io.ReadCloser, error) + filetree.IndexReader +} // FileCatalog represents all file metadata and source tracing for all files contained within the image layer // blobs (i.e. everything except for the image index/manifest/metadata files). type FileCatalog struct { - sync.RWMutex - catalog map[file.ID]FileCatalogEntry - byMIMEType map[string][]file.ID -} - -// FileCatalogEntry represents all stored metadata for a single file reference. -type FileCatalogEntry struct { - File file.Reference - Metadata file.Metadata - Layer *Layer - Contents file.Opener + *sync.RWMutex + filetree.Index + layerByID map[file.ID]*Layer + openerByID map[file.ID]file.Opener } // NewFileCatalog returns an empty FileCatalog. -func NewFileCatalog() FileCatalog { - return FileCatalog{ - catalog: make(map[file.ID]FileCatalogEntry), - byMIMEType: make(map[string][]file.ID), +func NewFileCatalog() *FileCatalog { + return &FileCatalog{ + RWMutex: &sync.RWMutex{}, + Index: filetree.NewIndex(), + layerByID: make(map[file.ID]*Layer), + openerByID: make(map[file.ID]file.Opener), } } // Add creates a new FileCatalogEntry for the given file reference and metadata, cataloged by the ID of the // file reference (overwriting any existing entries without warning). func (c *FileCatalog) Add(f file.Reference, m file.Metadata, l *Layer, opener file.Opener) { + c.Index.Add(f, m) // note: the index is already thread-safe + c.addImageReferences(f.ID(), l, opener) +} + +func (c *FileCatalog) addImageReferences(id file.ID, l *Layer, opener file.Opener) { c.Lock() defer c.Unlock() - if m.MIMEType != "" { - // an empty MIME type means that we didn't have the contents of the file to determine the MIME type. If we have - // the contents and the MIME type could not be determined then the default value is application/octet-stream. - c.byMIMEType[m.MIMEType] = append(c.byMIMEType[m.MIMEType], f.ID()) - } - c.catalog[f.ID()] = FileCatalogEntry{ - File: f, - Metadata: m, - Layer: l, - Contents: opener, - } + c.layerByID[id] = l + c.openerByID[id] = opener } -// Exists indicates if the given file reference exists in the catalog. -func (c *FileCatalog) Exists(f file.Reference) bool { +func (c *FileCatalog) Layer(f file.Reference) *Layer { c.RLock() defer c.RUnlock() - _, ok := c.catalog[f.ID()] - return ok -} -// Get fetches a FileCatalogEntry for the given file reference, or returns an error if the file reference has not -// been added to the catalog. -func (c *FileCatalog) Get(f file.Reference) (FileCatalogEntry, error) { - c.RLock() - defer c.RUnlock() - value, ok := c.catalog[f.ID()] - if !ok { - return FileCatalogEntry{}, ErrFileNotFound - } - return value, nil + return c.layerByID[f.ID()] } -func (c *FileCatalog) GetByMIMEType(mType string) ([]FileCatalogEntry, error) { +// Open returns a io.ReadCloser for the given file reference. The underlying io.ReadCloser will not attempt to +// allocate resources until the first read is performed. +func (c *FileCatalog) Open(f file.Reference) (io.ReadCloser, error) { c.RLock() defer c.RUnlock() - fileIDs, ok := c.byMIMEType[mType] - if !ok { - return nil, nil - } - var entries []FileCatalogEntry - for _, id := range fileIDs { - entry, ok := c.catalog[id] - if !ok { - return nil, fmt.Errorf("could not find file: %+v", id) - } - entries = append(entries, entry) - } - return entries, nil -} - -// FetchContents reads the file contents for the given file reference from the underlying image/layer blob. An error -// is returned if there is no file at the given path and layer or the read operation cannot continue. -func (c *FileCatalog) FileContents(f file.Reference) (io.ReadCloser, error) { - c.RLock() - defer c.RUnlock() - catalogEntry, ok := c.catalog[f.ID()] + opener, ok := c.openerByID[f.ID()] if !ok { return nil, fmt.Errorf("could not find file: %+v", f.RealPath) } - if catalogEntry.Contents == nil { + if opener == nil { return nil, fmt.Errorf("no contents available for file: %+v", f.RealPath) } - return catalogEntry.Contents(), nil + return opener(), nil } diff --git a/pkg/image/file_catalog_test.go b/pkg/image/file_catalog_test.go index b7211a7b..a6100156 100644 --- a/pkg/image/file_catalog_test.go +++ b/pkg/image/file_catalog_test.go @@ -6,11 +6,17 @@ package image import ( "crypto/sha256" "fmt" + "github.com/anchore/stereoscope/pkg/filetree" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "io" "os" "os/exec" "path" "path/filepath" + "strings" "testing" "github.com/go-test/deep" @@ -29,93 +35,18 @@ var ( tarCachePath = path.Join(fixturesPath, "tar-cache") ) -func getTarFixture(t *testing.T, name string) (*os.File, func()) { - generatorScriptName := name + ".sh" - generatorScriptPath := path.Join(fixturesGeneratorsPath, generatorScriptName) - if !fileExists(t, generatorScriptPath) { - t.Fatalf("no tar generator script for fixture '%s'", generatorScriptPath) - } - - version := fixtureVersion(t, generatorScriptPath) - tarName := name + ":" + version + ".tar" - tarFixturePath := path.Join(tarCachePath, tarName) - - if !fileExists(t, tarFixturePath) { - t.Logf("Creating tar fixture: %s", tarFixturePath) - - fullPath, err := filepath.Abs(tarFixturePath) - if err != nil { - t.Fatal(err) - } - - cmd := exec.Command("./"+generatorScriptName, fullPath) - cmd.Env = os.Environ() - cmd.Dir = fixturesGeneratorsPath - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - cmd.Stdin = os.Stdin - - err = cmd.Run() - if err != nil { - panic(err) - } - } - - fh, err := os.Open(tarFixturePath) - if err != nil { - t.Fatalf("could not open tar fixture '%s'", tarFixturePath) - } - - return fh, func() { - fh.Close() - } -} - -func fixtureVersion(t *testing.T, path string) string { - t.Helper() - f, err := os.Open(path) - if err != nil { - t.Fatal(err) - } - defer func() { - err := f.Close() - if err != nil { - t.Fatal(err) - } - }() - - h := sha256.New() - if _, err := io.Copy(h, f); err != nil { - t.Fatal(err) - } - - return fmt.Sprintf("%x", h.Sum(nil)) -} - -func fileExists(t *testing.T, filename string) bool { - t.Helper() - info, err := os.Stat(filename) - if os.IsNotExist(err) { - return false - } else if err != nil { - t.Fatal(err) - } - return !info.IsDir() -} - func TestFileCatalog_Add(t *testing.T) { ref := file.NewFileReference("/somepath") metadata := file.Metadata{ - Path: "a", - TarHeaderName: "b", - Linkname: "c", - Size: 1, - UserID: 2, - GroupID: 3, - TypeFlag: 4, - IsDir: true, - Mode: 5, + Path: "a", + LinkDestination: "c", + Size: 1, + UserID: 2, + GroupID: 3, + Type: 4, + IsDir: true, + Mode: 5, } layer := &Layer{ @@ -134,10 +65,9 @@ func TestFileCatalog_Add(t *testing.T) { catalog := NewFileCatalog() catalog.Add(*ref, metadata, layer, nil) - expected := FileCatalogEntry{ - File: *ref, - Metadata: metadata, - Layer: layer, + expected := filetree.IndexEntry{ + Reference: *ref, + Metadata: metadata, } actual, err := catalog.Get(*ref) @@ -148,6 +78,8 @@ func TestFileCatalog_Add(t *testing.T) { for d := range deep.Equal(expected, actual) { t.Errorf("diff: %+v", d) } + + assert.Equal(t, layer, catalog.Layer(*ref)) } type testLayerContent struct { @@ -177,9 +109,8 @@ func (t *testLayerContent) MediaType() (types.MediaType, error) { panic("not implemented") } -func TestFileCatalog_FileContents(t *testing.T) { - fixtureFile, cleanup := getTarFixture(t, "fixture-1") - defer cleanup() +func TestFileCatalog_Open(t *testing.T) { + fixtureFile := getTarFixture(t, "fixture-1") // a real path & contents from the fixture p := "path/branch/one/file-1.txt" @@ -187,27 +118,21 @@ func TestFileCatalog_FileContents(t *testing.T) { expected := "first file\n" metadata := file.Metadata{ - Path: p, - TarHeaderName: p, + Path: p, } tr, err := file.NewTarIndex(fixtureFile.Name(), nil) - if err != nil { - t.Fatalf("unable to get indexed reader") - } + require.NoError(t, err) + layer := &Layer{ layer: &testLayerContent{}, indexedContent: tr, } entries, err := tr.EntriesByName(p) - if err != nil { - t.Fatalf("unable to get entryies: %+v", err) - } + require.NoError(t, err) - if len(entries) != 1 { - t.Fatalf("bad entries len: %d", len(entries)) - } + require.Len(t, entries, 1) opener := func() io.ReadCloser { return io.NopCloser(entries[0].Reader) @@ -216,17 +141,687 @@ func TestFileCatalog_FileContents(t *testing.T) { catalog := NewFileCatalog() catalog.Add(*ref, metadata, layer, opener) - reader, err := catalog.FileContents(*ref) + reader, err := catalog.Open(*ref) + require.NoError(t, err) + + actual, err := io.ReadAll(reader) + require.NoError(t, err) + + for _, d := range deep.Equal([]byte(expected), actual) { + t.Errorf("diff: %+v", d) + } +} + +func Test_fileExtensions(t *testing.T) { + tests := []struct { + name string + path string + want []string + }{ + { + name: "empty", + path: "", + }, + { + name: "directory", + path: "/somewhere/to/nowhere/", + }, + { + name: "directory with ext", + path: "/somewhere/to/nowhere.d/", + }, + { + name: "single extension", + path: "/somewhere/to/my.tar", + want: []string{".tar"}, + }, + { + name: "multiple extensions", + path: "/somewhere/to/my.tar.gz", + want: []string{".gz", ".tar.gz"}, + }, + { + name: "ignore . prefix", + path: "/somewhere/to/.my.tar.gz", + want: []string{".gz", ".tar.gz"}, + }, + { + name: "ignore more . prefixes", + path: "/somewhere/to/...my.tar.gz", + want: []string{".gz", ".tar.gz"}, + }, + { + name: "ignore . suffixes", + path: "/somewhere/to/my.tar.gz...", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, fileExtensions(tt.path)) + }) + } +} + +func TestFileCatalog_GetByExtension(t *testing.T) { + fixtureTarFile := getTarFixture(t, "fixture-2") + + ft := filetree.New() + fileCatalog := NewFileCatalog() + var size int64 + + // we don't need the index itself, just the side effect on the file catalog after indexing + _, err := file.NewTarIndex( + fixtureTarFile.Name(), + layerTarIndexer(ft, fileCatalog, &size, nil, nil), + ) + require.NoError(t, err) + + tests := []struct { + name string + input string + want []filetree.IndexEntry + wantErr require.ErrorAssertionFunc + }{ + { + name: "get simple extension", + input: ".txt", + want: []filetree.IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-1.txt", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + { + + Reference: file.Reference{RealPath: "/path/branch.d/two/file-2.txt"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/two/file-2.txt", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + { + Reference: file.Reference{RealPath: "/path/file-3.txt"}, + Metadata: file.Metadata{ + Path: "/path/file-3.txt", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + }, + }, + { + name: "get mixed type extension", + input: ".d", + want: []filetree.IndexEntry{ + { + + Reference: file.Reference{RealPath: "/path/branch.d"}, + Metadata: file.Metadata{ + Path: "/path/branch.d", + Type: file.TypeDirectory, + IsDir: true, + }, + }, + { + + Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.d"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-4.d", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + + { + + Reference: file.Reference{RealPath: "/path/common/branch.d"}, + Metadata: file.Metadata{ + Path: "/path/common/branch.d", + LinkDestination: "path/branch.d", + Type: file.TypeSymLink, + }, + }, + { + + Reference: file.Reference{RealPath: "/path/common/file-1.d"}, + Metadata: file.Metadata{ + Path: "/path/common/file-1.d", + LinkDestination: "path/branch.d/one/file-1.txt", + Type: file.TypeSymLink, + }, + }, + }, + }, + { + name: "get long extension", + input: ".tar.gz", + want: []filetree.IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/branch.d/one/.file-4.tar.gz"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/.file-4.tar.gz", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + { + Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.tar.gz"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-4.tar.gz", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + }, + }, + { + name: "get short extension", + input: ".gz", + want: []filetree.IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/branch.d/one/.file-4.tar.gz"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/.file-4.tar.gz", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + { + Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.tar.gz"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-4.tar.gz", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + }, + }, + { + name: "get non-existent extension", + input: ".blerg-123", + want: []filetree.IndexEntry{}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + actual, err := fileCatalog.GetByExtension(tt.input) + tt.wantErr(t, err) + if err != nil { + return + } + if d := cmp.Diff(tt.want, actual, + cmpopts.EquateEmpty(), + cmpopts.IgnoreUnexported(file.Reference{}), + cmpopts.IgnoreFields(file.Metadata{}, "Mode", "GroupID", "UserID", "Size"), + ); d != "" { + t.Errorf("diff: %s", d) + } + }) + } +} + +func TestFileCatalog_GetByBasename(t *testing.T) { + fixtureTarFile := getTarFixture(t, "fixture-2") + + ft := filetree.New() + fileCatalog := NewFileCatalog() + var size int64 + + // we don't need the index itself, just the side effect on the file catalog after indexing + _, err := file.NewTarIndex( + fixtureTarFile.Name(), + layerTarIndexer(ft, fileCatalog, &size, nil, nil), + ) + require.NoError(t, err) + + tests := []struct { + name string + input string + want []filetree.IndexEntry + wantErr require.ErrorAssertionFunc + }{ + { + name: "get existing file name", + input: "file-1.txt", + want: []filetree.IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-1.txt", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + }, + }, + { + name: "get non-existing name", + input: "file-11.txt", + want: []filetree.IndexEntry{}, + }, + { + name: "get directory name", + input: "branch.d", + want: []filetree.IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/branch.d"}, + Metadata: file.Metadata{ + Path: "/path/branch.d", + Type: file.TypeDirectory, + IsDir: true, + }, + }, + { + Reference: file.Reference{RealPath: "/path/common/branch.d"}, + Metadata: file.Metadata{ + Path: "/path/common/branch.d", + LinkDestination: "path/branch.d", + Type: file.TypeSymLink, + }, + }, + }, + }, + { + name: "get symlink name", + input: "file-1.d", + want: []filetree.IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/common/file-1.d"}, + Metadata: file.Metadata{ + Path: "/path/common/file-1.d", + LinkDestination: "path/branch.d/one/file-1.txt", + Type: file.TypeSymLink, + }, + }, + }, + }, + { + name: "get basename with path expression", + input: "somewhere/file-1.d", + wantErr: require.Error, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + actual, err := fileCatalog.GetByBasename(tt.input) + tt.wantErr(t, err) + if err != nil { + return + } + if d := cmp.Diff(tt.want, actual, + cmpopts.EquateEmpty(), + cmpopts.IgnoreUnexported(file.Reference{}), + cmpopts.IgnoreFields(file.Metadata{}, "Mode", "GroupID", "UserID", "Size"), + ); d != "" { + t.Errorf("diff: %s", d) + } + }) + } +} + +func TestFileCatalog_GetByBasenameGlob(t *testing.T) { + fixtureTarFile := getTarFixture(t, "fixture-2") + + ft := filetree.New() + fileCatalog := NewFileCatalog() + var size int64 + + // we don't need the index itself, just the side effect on the file catalog after indexing + _, err := file.NewTarIndex( + fixtureTarFile.Name(), + layerTarIndexer(ft, fileCatalog, &size, nil, nil), + ) + require.NoError(t, err) + + tests := []struct { + name string + input string + want []filetree.IndexEntry + wantErr require.ErrorAssertionFunc + }{ + { + name: "get existing file name", + input: "file-1.*", + want: []filetree.IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/common/file-1.d"}, + Metadata: file.Metadata{ + Path: "/path/common/file-1.d", + LinkDestination: "path/branch.d/one/file-1.txt", + Type: file.TypeSymLink, + }, + }, + { + Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-1.txt", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + }, + }, + { + name: "get non-existing name", + input: "blerg-*.txt", + want: []filetree.IndexEntry{}, + }, + { + name: "get directory name", + input: "bran*.d", + want: []filetree.IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/branch.d"}, + Metadata: file.Metadata{ + Path: "/path/branch.d", + Type: file.TypeDirectory, + IsDir: true, + }, + }, + { + Reference: file.Reference{RealPath: "/path/common/branch.d"}, + Metadata: file.Metadata{ + Path: "/path/common/branch.d", + LinkDestination: "path/branch.d", + Type: file.TypeSymLink, + }, + }, + }, + }, + { + name: "get symlink name", + input: "file?1.d", + want: []filetree.IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/common/file-1.d"}, + Metadata: file.Metadata{ + Path: "/path/common/file-1.d", + LinkDestination: "path/branch.d/one/file-1.txt", + Type: file.TypeSymLink, + }, + }, + }, + }, + { + name: "get basename with path expression", + input: "somewhere/file?1.d", + wantErr: require.Error, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + actual, err := fileCatalog.GetByBasenameGlob(tt.input) + tt.wantErr(t, err) + if err != nil { + return + } + if d := cmp.Diff(tt.want, actual, + cmpopts.EquateEmpty(), + cmpopts.IgnoreUnexported(file.Reference{}), + cmpopts.IgnoreFields(file.Metadata{}, "Mode", "GroupID", "UserID", "Size"), + ); d != "" { + t.Errorf("diff: %s", d) + } + }) + } +} + +func TestFileCatalog_GetByMimeType(t *testing.T) { + fixtureTarFile := getTarFixture(t, "fixture-2") + + ft := filetree.New() + fileCatalog := NewFileCatalog() + var size int64 + + // we don't need the index itself, just the side effect on the file catalog after indexing + _, err := file.NewTarIndex( + fixtureTarFile.Name(), + layerTarIndexer(ft, fileCatalog, &size, nil, nil), + ) + require.NoError(t, err) + + tests := []struct { + name string + input string + want []filetree.IndexEntry + wantErr require.ErrorAssertionFunc + }{ + { + name: "get existing file mimetype", + input: "text/plain", + want: []filetree.IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/branch.d/one/.file-4.tar.gz"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/.file-4.tar.gz", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + { + Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-1.txt", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + { + Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.d"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-4.d", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + { + Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.tar.gz"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-4.tar.gz", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + { + Reference: file.Reference{RealPath: "/path/branch.d/two/file-2.txt"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/two/file-2.txt", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + { + Reference: file.Reference{RealPath: "/path/file-3.txt"}, + Metadata: file.Metadata{ + Path: "/path/file-3.txt", + Type: file.TypeRegular, + MIMEType: "text/plain", + }, + }, + }, + }, + { + name: "get non-existing mimetype", + input: "text/bogus", + want: []filetree.IndexEntry{}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + actual, err := fileCatalog.GetByMIMEType(tt.input) + tt.wantErr(t, err) + if err != nil { + return + } + if d := cmp.Diff(tt.want, actual, + cmpopts.EquateEmpty(), + cmpopts.IgnoreUnexported(file.Reference{}), + cmpopts.IgnoreFields(file.Metadata{}, "Mode", "GroupID", "UserID", "Size"), + ); d != "" { + t.Errorf("diff: %s", d) + } + }) + } +} + +func TestFileCatalog_GetBasenames(t *testing.T) { + fixtureTarFile := getTarFixture(t, "fixture-2") + + ft := filetree.New() + fileCatalog := NewFileCatalog() + var size int64 + + // we don't need the index itself, just the side effect on the file catalog after indexing + _, err := file.NewTarIndex( + fixtureTarFile.Name(), + layerTarIndexer(ft, fileCatalog, &size, nil, nil), + ) + require.NoError(t, err) + + tests := []struct { + name string + want []string + }{ + { + name: "go case", + want: []string{ + ".file-4.tar.gz", + "branch", + "branch.d", + "common", + "file-1.d", + "file-1.txt", + "file-2.txt", + "file-3.txt", + "file-4", + "file-4.d", + "file-4.tar.gz", + "one", + "path", + "two", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + actual := fileCatalog.Basenames() + assert.ElementsMatchf(t, tt.want, actual, "diff: %s", cmp.Diff(tt.want, actual)) + }) + } +} + +func getTarFixture(t *testing.T, name string) *os.File { + generatorScriptName := name + ".sh" + generatorScriptPath := path.Join(fixturesGeneratorsPath, generatorScriptName) + if !fileExists(t, generatorScriptPath) { + t.Fatalf("no tar generator script for fixture '%s'", generatorScriptPath) + } + + version := fixtureVersion(t, generatorScriptPath) + tarName := name + ":" + version + ".tar" + tarFixturePath := path.Join(tarCachePath, tarName) + + if !fileExists(t, tarFixturePath) { + t.Logf("Creating tar fixture: %s", tarFixturePath) + + fullPath, err := filepath.Abs(tarFixturePath) + if err != nil { + t.Fatal(err) + } + + cmd := exec.Command("./"+generatorScriptName, fullPath) + cmd.Env = os.Environ() + cmd.Dir = fixturesGeneratorsPath + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + cmd.Stdin = os.Stdin + + err = cmd.Run() + if err != nil { + panic(err) + } + } + + fh, err := os.Open(tarFixturePath) if err != nil { - t.Fatalf("could not get contents by ref: %+v", err) + t.Fatalf("could not open tar fixture '%s'", tarFixturePath) } - actual, err := io.ReadAll(reader) + t.Cleanup(func() { + require.NoError(t, fh.Close()) + }) + + return fh +} + +func fixtureVersion(t *testing.T, path string) string { + t.Helper() + f, err := os.Open(path) if err != nil { - t.Fatalf("could not read content reader: %+v", err) + t.Fatal(err) } + defer func() { + err := f.Close() + if err != nil { + t.Fatal(err) + } + }() - for _, d := range deep.Equal([]byte(expected), actual) { - t.Errorf("diff: %+v", d) + h := sha256.New() + if _, err := io.Copy(h, f); err != nil { + t.Fatal(err) + } + + return fmt.Sprintf("%x", h.Sum(nil)) +} + +func fileExists(t *testing.T, filename string) bool { + t.Helper() + info, err := os.Stat(filename) + if os.IsNotExist(err) { + return false + } else if err != nil { + t.Fatal(err) + } + return !info.IsDir() +} + +func fileExtensions(p string) []string { + var exts []string + p = strings.TrimSpace(p) + + // ignore oddities + if strings.HasSuffix(p, ".") { + return exts + } + + // ignore directories + if strings.HasSuffix(p, "/") { + return exts + } + + // ignore . which indicate a hidden file + p = strings.TrimLeft(path.Base(p), ".") + for i := len(p) - 1; i >= 0; i-- { + if p[i] == '.' { + exts = append(exts, p[i:]) + } } + return exts } diff --git a/pkg/image/image.go b/pkg/image/image.go index 0748c2c4..9798197b 100644 --- a/pkg/image/image.go +++ b/pkg/image/image.go @@ -30,7 +30,9 @@ type Image struct { // Layers contains the rich layer objects in build order Layers []*Layer // FileCatalog contains all file metadata for all files in all layers - FileCatalog FileCatalog + FileCatalog FileCatalogReader + + SquashedSearchContext filetree.Searcher overrideMetadata []AdditionalMetadata } @@ -127,12 +129,17 @@ func WithOS(o string) AdditionalMetadata { } } -// NewImage provides a new, unread image object. +// NewImage provides a new (unread) image object. +// Deprecated: use New() instead func NewImage(image v1.Image, contentCacheDir string, additionalMetadata ...AdditionalMetadata) *Image { + return New(image, contentCacheDir, additionalMetadata...) +} + +// New provides a new (unread) image object. +func New(image v1.Image, contentCacheDir string, additionalMetadata ...AdditionalMetadata) *Image { imgObj := &Image{ image: image, contentCacheDir: contentCacheDir, - FileCatalog: NewFileCatalog(), overrideMetadata: additionalMetadata, } return imgObj @@ -199,9 +206,11 @@ func (i *Image) Read() error { // let consumers know of a monitorable event (image save + copy stages) readProg := i.trackReadProgress(i.Metadata) + fileCatalog := NewFileCatalog() + for idx, v1Layer := range v1Layers { layer := NewLayer(v1Layer) - err := layer.Read(&i.FileCatalog, i.Metadata, idx, i.contentCacheDir) + err := layer.Read(fileCatalog, i.Metadata, idx, i.contentCacheDir) if err != nil { return err } @@ -214,24 +223,30 @@ func (i *Image) Read() error { i.Layers = layers // in order to resolve symlinks all squashed trees must be available - return i.squash(readProg) + err = i.squash(readProg) + + i.FileCatalog = fileCatalog + i.SquashedSearchContext = filetree.NewSearchContext(i.SquashedTree(), i.FileCatalog) + + return err } // squash generates a squash tree for each layer in the image. For instance, layer 2 squash = // squash(layer 0, layer 1, layer 2), layer 3 squash = squash(layer 0, layer 1, layer 2, layer 3), and so on. func (i *Image) squash(prog *progress.Manual) error { - var lastSquashTree *filetree.FileTree + var lastSquashTree filetree.ReadWriter for idx, layer := range i.Layers { if idx == 0 { - lastSquashTree = layer.Tree + lastSquashTree = layer.Tree.(filetree.ReadWriter) layer.SquashedTree = layer.Tree + layer.SquashedSearchContext = filetree.NewSearchContext(layer.SquashedTree, layer.fileCatalog.Index) continue } var unionTree = filetree.NewUnionFileTree() unionTree.PushTree(lastSquashTree) - unionTree.PushTree(layer.Tree) + unionTree.PushTree(layer.Tree.(filetree.ReadWriter)) squashedTree, err := unionTree.Squash() if err != nil { @@ -239,6 +254,7 @@ func (i *Image) squash(prog *progress.Manual) error { } layer.SquashedTree = squashedTree + layer.SquashedSearchContext = filetree.NewSearchContext(layer.SquashedTree, layer.fileCatalog.Index) lastSquashTree = squashedTree prog.N++ @@ -250,46 +266,63 @@ func (i *Image) squash(prog *progress.Manual) error { } // SquashedTree returns the pre-computed image squash file tree. -func (i *Image) SquashedTree() *filetree.FileTree { +func (i *Image) SquashedTree() filetree.Reader { layerCount := len(i.Layers) if layerCount == 0 { - return filetree.NewFileTree() + return filetree.New() } topLayer := i.Layers[layerCount-1] return topLayer.SquashedTree } +// OpenPathFromSquash fetches file contents for a single path, relative to the image squash tree. +// If the path does not exist an error is returned. +func (i *Image) OpenPathFromSquash(path file.Path) (io.ReadCloser, error) { + return fetchReaderByPath(i.SquashedTree(), i.FileCatalog, path) +} + // FileContentsFromSquash fetches file contents for a single path, relative to the image squash tree. // If the path does not exist an error is returned. +// Deprecated: use OpenPathFromSquash() instead. func (i *Image) FileContentsFromSquash(path file.Path) (io.ReadCloser, error) { - return fetchFileContentsByPath(i.SquashedTree(), &i.FileCatalog, path) + return fetchReaderByPath(i.SquashedTree(), i.FileCatalog, path) } // FilesByMIMETypeFromSquash returns file references for files that match at least one of the given MIME types. +// Deprecated: please use SquashedSearchContext().SearchByMIMEType() instead. func (i *Image) FilesByMIMETypeFromSquash(mimeTypes ...string) ([]file.Reference, error) { var refs []file.Reference - for _, ty := range mimeTypes { - refsForType, err := fetchFilesByMIMEType(i.SquashedTree(), &i.FileCatalog, ty) - if err != nil { - return nil, err + refVias, err := i.SquashedSearchContext.SearchByMIMEType(mimeTypes...) + if err != nil { + return nil, err + } + for _, refVia := range refVias { + if refVia.HasReference() { + refs = append(refs, *refVia.Reference) } - refs = append(refs, refsForType...) } return refs, nil } -// FileContentsByRef fetches file contents for a single file reference, irregardless of the source layer. +// OpenReference fetches file contents for a single file reference, regardless of the source layer. +// If the path does not exist an error is returned. +func (i *Image) OpenReference(ref file.Reference) (io.ReadCloser, error) { + return i.FileCatalog.Open(ref) +} + +// FileContentsByRef fetches file contents for a single file reference, regardless of the source layer. // If the path does not exist an error is returned. +// Deprecated: please use OpenReference() instead. func (i *Image) FileContentsByRef(ref file.Reference) (io.ReadCloser, error) { - return i.FileCatalog.FileContents(ref) + return i.FileCatalog.Open(ref) } // ResolveLinkByLayerSquash resolves a symlink or hardlink for the given file reference relative to the result from // the layer squash of the given layer index argument. // If the given file reference is not a link type, or is a unresolvable (dead) link, then the given file reference is returned. -func (i *Image) ResolveLinkByLayerSquash(ref file.Reference, layer int, options ...filetree.LinkResolutionOption) (*file.Reference, error) { +func (i *Image) ResolveLinkByLayerSquash(ref file.Reference, layer int, options ...filetree.LinkResolutionOption) (*file.Resolution, error) { allOptions := append([]filetree.LinkResolutionOption{filetree.FollowBasenameLinks}, options...) _, resolvedRef, err := i.Layers[layer].SquashedTree.File(ref.RealPath, allOptions...) return resolvedRef, err @@ -297,7 +330,7 @@ func (i *Image) ResolveLinkByLayerSquash(ref file.Reference, layer int, options // ResolveLinkByImageSquash resolves a symlink or hardlink for the given file reference relative to the result from the image squash. // If the given file reference is not a link type, or is a unresolvable (dead) link, then the given file reference is returned. -func (i *Image) ResolveLinkByImageSquash(ref file.Reference, options ...filetree.LinkResolutionOption) (*file.Reference, error) { +func (i *Image) ResolveLinkByImageSquash(ref file.Reference, options ...filetree.LinkResolutionOption) (*file.Resolution, error) { allOptions := append([]filetree.LinkResolutionOption{filetree.FollowBasenameLinks}, options...) _, resolvedRef, err := i.Layers[len(i.Layers)-1].SquashedTree.File(ref.RealPath, allOptions...) return resolvedRef, err diff --git a/pkg/image/image_test.go b/pkg/image/image_test.go index cd38df1b..3a44a626 100644 --- a/pkg/image/image_test.go +++ b/pkg/image/image_test.go @@ -3,10 +3,11 @@ package image import ( "crypto/sha256" "fmt" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" "os" "testing" - "github.com/go-test/deep" "github.com/google/go-containerregistry/pkg/name" ) @@ -97,13 +98,17 @@ func TestImageAdditionalMetadata(t *testing.T) { os.Remove(tempFile.Name()) }) - img := NewImage(nil, tempFile.Name(), test.options...) + img := New(nil, tempFile.Name(), test.options...) err = img.applyOverrideMetadata() if err != nil { t.Fatalf("could not create image: %+v", err) } - for _, d := range deep.Equal(img, &test.image) { + if d := cmp.Diff(img, &test.image, + cmpopts.IgnoreFields(Image{}, "FileCatalog"), + cmpopts.IgnoreUnexported(Image{}), + cmp.AllowUnexported(name.Tag{}, name.Repository{}, name.Registry{}), + ); d != "" { t.Errorf("diff: %+v", d) } }) diff --git a/pkg/image/layer.go b/pkg/image/layer.go index 96fe14c3..f0b1267d 100644 --- a/pkg/image/layer.go +++ b/pkg/image/layer.go @@ -1,7 +1,6 @@ package image import ( - "archive/tar" "bytes" "errors" "fmt" @@ -33,12 +32,14 @@ type Layer struct { // Metadata contains select layer attributes Metadata LayerMetadata // Tree is a filetree that represents the structure of the layer tar contents ("diff tree") - Tree *filetree.FileTree + Tree filetree.Reader // SquashedTree is a filetree that represents the combination of this layers diff tree and all diff trees // in lower layers relative to this one. - SquashedTree *filetree.FileTree + SquashedTree filetree.Reader // fileCatalog contains all file metadata for all files in all layers (not just this layer) - fileCatalog *FileCatalog + fileCatalog *FileCatalog + SquashedSearchContext filetree.Searcher + SearchContext filetree.Searcher } // NewLayer provides a new, unread layer object. @@ -80,7 +81,8 @@ func (l *Layer) uncompressedTarCache(uncompressedLayersCacheDir string) (string, // file tree, and the layer squash tree. func (l *Layer) Read(catalog *FileCatalog, imgMetadata Metadata, idx int, uncompressedLayersCacheDir string) error { var err error - l.Tree = filetree.NewFileTree() + tree := filetree.New() + l.Tree = tree l.fileCatalog = catalog l.Metadata, err = newLayerMetadata(imgMetadata, l.layer, idx) if err != nil { @@ -108,7 +110,10 @@ func (l *Layer) Read(catalog *FileCatalog, imgMetadata Metadata, idx int, uncomp return err } - l.indexedContent, err = file.NewTarIndex(tarFilePath, l.indexer(monitor)) + l.indexedContent, err = file.NewTarIndex( + tarFilePath, + layerTarIndexer(tree, l.fileCatalog, &l.Metadata.Size, l, monitor), + ) if err != nil { return fmt.Errorf("failed to read layer=%q tar : %w", l.Metadata.Digest, err) } @@ -122,9 +127,9 @@ func (l *Layer) Read(catalog *FileCatalog, imgMetadata Metadata, idx int, uncomp // Walk the more efficient walk if we're blessed with an io.ReaderAt. if ra, ok := r.(io.ReaderAt); ok { - err = file.WalkSquashFS(ra, l.squashfsVisitor(monitor)) + err = file.WalkSquashFS(ra, squashfsVisitor(tree, l.fileCatalog, &l.Metadata.Size, l, monitor)) } else { - err = file.WalkSquashFSFromReader(r, l.squashfsVisitor(monitor)) + err = file.WalkSquashFSFromReader(r, squashfsVisitor(tree, l.fileCatalog, &l.Metadata.Size, l, monitor)) } if err != nil { return fmt.Errorf("failed to walk layer=%q: %w", l.Metadata.Digest, err) @@ -134,50 +139,74 @@ func (l *Layer) Read(catalog *FileCatalog, imgMetadata Metadata, idx int, uncomp return fmt.Errorf("unknown layer media type: %+v", l.Metadata.MediaType) } + l.SearchContext = filetree.NewSearchContext(l.Tree, l.fileCatalog.Index) + monitor.SetCompleted() return nil } -// FetchContents reads the file contents for the given path from the underlying layer blob, relative to the layers "diff tree". +// OpenPath reads the file contents for the given path from the underlying layer blob, relative to the layers "diff tree". +// An error is returned if there is no file at the given path and layer or the read operation cannot continue. +func (l *Layer) OpenPath(path file.Path) (io.ReadCloser, error) { + return fetchReaderByPath(l.Tree, l.fileCatalog, path) +} + +// OpenPathFromSquash reads the file contents for the given path from the underlying layer blob, relative to the layers squashed file tree. +// An error is returned if there is no file at the given path and layer or the read operation cannot continue. +func (l *Layer) OpenPathFromSquash(path file.Path) (io.ReadCloser, error) { + return fetchReaderByPath(l.SquashedTree, l.fileCatalog, path) +} + +// FileContents reads the file contents for the given path from the underlying layer blob, relative to the layers "diff tree". // An error is returned if there is no file at the given path and layer or the read operation cannot continue. +// Deprecated: use OpenPath() instead. func (l *Layer) FileContents(path file.Path) (io.ReadCloser, error) { - return fetchFileContentsByPath(l.Tree, l.fileCatalog, path) + return fetchReaderByPath(l.Tree, l.fileCatalog, path) } // FileContentsFromSquash reads the file contents for the given path from the underlying layer blob, relative to the layers squashed file tree. // An error is returned if there is no file at the given path and layer or the read operation cannot continue. +// Deprecated: use OpenPathFromSquash() instead. func (l *Layer) FileContentsFromSquash(path file.Path) (io.ReadCloser, error) { - return fetchFileContentsByPath(l.SquashedTree, l.fileCatalog, path) + return fetchReaderByPath(l.SquashedTree, l.fileCatalog, path) } // FilesByMIMEType returns file references for files that match at least one of the given MIME types relative to each layer tree. +// Deprecated: use SearchContext().SearchByMIMEType() instead. func (l *Layer) FilesByMIMEType(mimeTypes ...string) ([]file.Reference, error) { var refs []file.Reference - for _, ty := range mimeTypes { - refsForType, err := fetchFilesByMIMEType(l.Tree, l.fileCatalog, ty) - if err != nil { - return nil, err + refVias, err := l.SearchContext.SearchByMIMEType(mimeTypes...) + if err != nil { + return nil, err + } + for _, refVia := range refVias { + if refVia.HasReference() { + refs = append(refs, *refVia.Reference) } - refs = append(refs, refsForType...) } return refs, nil } // FilesByMIMETypeFromSquash returns file references for files that match at least one of the given MIME types relative to the squashed file tree representation. +// Deprecated: use SquashedSearchContext().SearchByMIMEType() instead. func (l *Layer) FilesByMIMETypeFromSquash(mimeTypes ...string) ([]file.Reference, error) { var refs []file.Reference - for _, ty := range mimeTypes { - refsForType, err := fetchFilesByMIMEType(l.SquashedTree, l.fileCatalog, ty) - if err != nil { - return nil, err + refVias, err := l.SquashedSearchContext.SearchByMIMEType(mimeTypes...) + if err != nil { + return nil, err + } + for _, refVia := range refVias { + if refVia.HasReference() { + refs = append(refs, *refVia.Reference) } - refs = append(refs, refsForType...) } return refs, nil } -func (l *Layer) indexer(monitor *progress.Manual) file.TarIndexVisitor { +func layerTarIndexer(ft filetree.Writer, fileCatalog *FileCatalog, size *int64, layerRef *Layer, monitor *progress.Manual) file.TarIndexVisitor { + builder := filetree.NewBuilder(ft, fileCatalog.Index) + return func(index file.TarIndexEntry) error { var err error var entry = index.ToTarFileEntry() @@ -188,7 +217,7 @@ func (l *Layer) indexer(monitor *progress.Manual) file.TarIndexVisitor { log.Warnf("unable to close file while indexing layer: %+v", err) } }() - metadata := file.NewMetadata(entry.Header, entry.Sequence, contents) + metadata := file.NewMetadata(entry.Header, contents) // note: the tar header name is independent of surrounding structure, for example, there may be a tar header entry // for /some/path/to/file.txt without any entries to constituent paths (/some, /some/path, /some/path/to ). @@ -200,42 +229,26 @@ func (l *Layer) indexer(monitor *progress.Manual) file.TarIndexVisitor { // // In summary: the set of all FileTrees can have NON-leaf nodes that don't exist in the FileCatalog, but // the FileCatalog should NEVER have entries that don't appear in one (or more) FileTree(s). - var fileReference *file.Reference - switch metadata.TypeFlag { - case tar.TypeSymlink: - fileReference, err = l.Tree.AddSymLink(file.Path(metadata.Path), file.Path(metadata.Linkname)) - if err != nil { - return err - } - case tar.TypeLink: - fileReference, err = l.Tree.AddHardLink(file.Path(metadata.Path), file.Path(metadata.Linkname)) - if err != nil { - return err - } - case tar.TypeDir: - fileReference, err = l.Tree.AddDir(file.Path(metadata.Path)) - if err != nil { - return err - } - default: - fileReference, err = l.Tree.AddFile(file.Path(metadata.Path)) - if err != nil { - return err - } - } - if fileReference == nil { - return fmt.Errorf("could not add path=%q link=%q during tar iteration", metadata.Path, metadata.Linkname) + ref, err := builder.Add(metadata) + if err != nil { + return err } - l.Metadata.Size += metadata.Size - l.fileCatalog.Add(*fileReference, metadata, l, index.Open) + if size != nil { + *(size) += metadata.Size + } + fileCatalog.addImageReferences(ref.ID(), layerRef, index.Open) - monitor.N++ + if monitor != nil { + monitor.N++ + } return nil } } -func (l *Layer) squashfsVisitor(monitor *progress.Manual) file.SquashFSVisitor { +func squashfsVisitor(ft filetree.Writer, fileCatalog *FileCatalog, size *int64, layerRef *Layer, monitor *progress.Manual) file.SquashFSVisitor { + builder := filetree.NewBuilder(ft, fileCatalog.Index) + return func(fsys fs.FS, path string, d fs.DirEntry) error { ff, err := fsys.Open(path) if err != nil { @@ -245,7 +258,7 @@ func (l *Layer) squashfsVisitor(monitor *progress.Manual) file.SquashFSVisitor { f, ok := ff.(*squashfs.File) if !ok { - return errors.New("unexpected file type") + return errors.New("unexpected file type from squashfs") } metadata, err := file.NewMetadataFromSquashFSFile(path, f) @@ -253,32 +266,15 @@ func (l *Layer) squashfsVisitor(monitor *progress.Manual) file.SquashFSVisitor { return err } - var fileReference *file.Reference - - switch { - case f.IsSymlink(): - fileReference, err = l.Tree.AddSymLink(file.Path(metadata.Path), file.Path(metadata.Linkname)) - if err != nil { - return err - } - case f.IsDir(): - fileReference, err = l.Tree.AddDir(file.Path(metadata.Path)) - if err != nil { - return err - } - default: - fileReference, err = l.Tree.AddFile(file.Path(metadata.Path)) - if err != nil { - return err - } + fileReference, err := builder.Add(metadata) + if err != nil { + return err } - if fileReference == nil { - return fmt.Errorf("could not add path=%q link=%q during squashfs iteration", metadata.Path, metadata.Linkname) + if size != nil { + *(size) += metadata.Size } - - l.Metadata.Size += metadata.Size - l.fileCatalog.Add(*fileReference, metadata, l, func() io.ReadCloser { + fileCatalog.addImageReferences(fileReference.ID(), layerRef, func() io.ReadCloser { r, err := fsys.Open(path) if err != nil { // The file.Opener interface doesn't give us a way to return an error, and callers diff --git a/pkg/image/oci/directory_provider.go b/pkg/image/oci/directory_provider.go index 643b6cb1..08220cb5 100644 --- a/pkg/image/oci/directory_provider.go +++ b/pkg/image/oci/directory_provider.go @@ -69,5 +69,5 @@ func (p *DirectoryImageProvider) Provide(_ context.Context, userMetadata ...imag return nil, err } - return image.NewImage(img, contentTempDir, metadata...), nil + return image.New(img, contentTempDir, metadata...), nil } diff --git a/pkg/image/oci/registry_provider.go b/pkg/image/oci/registry_provider.go index 7768a407..52128a62 100644 --- a/pkg/image/oci/registry_provider.go +++ b/pkg/image/oci/registry_provider.go @@ -80,7 +80,7 @@ func (p *RegistryImageProvider) Provide(ctx context.Context, userMetadata ...ima // apply user-supplied metadata last to override any default behavior metadata = append(metadata, userMetadata...) - return image.NewImage(img, imageTempDir, metadata...), nil + return image.New(img, imageTempDir, metadata...), nil } func prepareReferenceOptions(registryOptions image.RegistryOptions) []name.Option { diff --git a/pkg/image/sif/provider.go b/pkg/image/sif/provider.go index 6f6a6738..e30f2033 100644 --- a/pkg/image/sif/provider.go +++ b/pkg/image/sif/provider.go @@ -51,5 +51,5 @@ func (p *SingularityImageProvider) Provide(ctx context.Context, userMetadata ... } metadata = append(metadata, userMetadata...) - return image.NewImage(ui, contentCacheDir, metadata...), nil + return image.New(ui, contentCacheDir, metadata...), nil } diff --git a/pkg/image/test-fixtures/generators/fixture-2.sh b/pkg/image/test-fixtures/generators/fixture-2.sh new file mode 100755 index 00000000..0c7b2f19 --- /dev/null +++ b/pkg/image/test-fixtures/generators/fixture-2.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash +set -ue + +realpath() { + [[ $1 = /* ]] && echo "$1" || echo "$PWD/${1#./}" +} + +FIXTURE_TAR_PATH=$1 +FIXTURE_NAME=$(basename $FIXTURE_TAR_PATH) +FIXTURE_DIR=$(realpath $(dirname $FIXTURE_TAR_PATH)) + +# note: since tar --sort is not an option on mac, and we want these generation scripts to be generally portable, we've +# elected to use docker to generate the tar +docker run --rm -i \ + -u $(id -u):$(id -g) \ + -v ${FIXTURE_DIR}:/scratch \ + -w /scratch \ + ubuntu:latest \ + /bin/bash -xs < path/branch.d/one/file-1.txt + echo "forth file" > path/branch.d/one/file-4.d + echo "multi ext file" > path/branch.d/one/file-4.tar.gz + echo "hidden file" > path/branch.d/one/.file-4.tar.gz + + ln -s path/branch.d path/common/branch.d + ln -s path/branch.d path/common/branch + ln -s path/branch.d/one/file-4.d path/common/file-4 + ln -s path/branch.d/one/file-1.txt path/common/file-1.d + + echo "second file" > path/branch.d/two/file-2.txt + + echo "third file" > path/file-3.txt + + # permissions + chmod -R 755 path + chmod -R 700 path/branch/one/ + chmod 664 path/file-3.txt + + # tar + owner + # note: sort by name is important for test file header entry ordering + tar --sort=name --owner=1337 --group=5432 -cvf "/scratch/${FIXTURE_NAME}" path/ + +popd +EOF diff --git a/pkg/tree/depth_first_walker.go b/pkg/tree/depth_first_walker.go index 6870de1d..0efd677d 100644 --- a/pkg/tree/depth_first_walker.go +++ b/pkg/tree/depth_first_walker.go @@ -27,7 +27,7 @@ type DepthFirstWalker struct { visitor NodeVisitor tree Reader stack node.Stack - visited node.Set + visited node.IDSet conditions WalkConditions } diff --git a/pkg/tree/node/id.go b/pkg/tree/node/id.go index b84d2c09..b023c733 100644 --- a/pkg/tree/node/id.go +++ b/pkg/tree/node/id.go @@ -1,22 +1,76 @@ package node +import "sort" + type ID string -type Set map[ID]struct{} +type IDSet map[ID]struct{} + +func NewIDSet(is ...ID) IDSet { + // TODO: replace with single generic implementation that also incorporates other set implementations + s := make(IDSet) + s.Add(is...) + return s +} + +func (s IDSet) Size() int { + return len(s) +} -func NewIDSet() Set { - return make(Set) +func (s IDSet) Merge(other IDSet) { + for _, i := range other.List() { + s.Add(i) + } } -func (s Set) Add(i ID) { - s[i] = struct{}{} +func (s IDSet) Add(ids ...ID) { + for _, i := range ids { + s[i] = struct{}{} + } } -func (s Set) Remove(i ID) { - delete(s, i) +func (s IDSet) Remove(ids ...ID) { + for _, i := range ids { + delete(s, i) + } } -func (s Set) Contains(i ID) bool { +func (s IDSet) Contains(i ID) bool { _, ok := s[i] return ok } + +func (s IDSet) Clear() { + // TODO: replace this with the new 'clear' keyword when it's available in go 1.20 or 1.21 + for i := range s { + delete(s, i) + } +} + +func (s IDSet) List() []ID { + ret := make([]ID, 0, len(s)) + for i := range s { + ret = append(ret, i) + } + return ret +} + +func (s IDSet) Sorted() []ID { + ids := s.List() + + sort.Slice(ids, func(i, j int) bool { + return ids[i] < ids[j] + }) + + return ids +} + +func (s IDSet) ContainsAny(ids ...ID) bool { + for _, i := range ids { + _, ok := s[i] + if ok { + return true + } + } + return false +} diff --git a/pkg/tree/node/id_test.go b/pkg/tree/node/id_test.go new file mode 100644 index 00000000..222981bc --- /dev/null +++ b/pkg/tree/node/id_test.go @@ -0,0 +1,226 @@ +package node + +import ( + "fmt" + "github.com/stretchr/testify/assert" + "testing" +) + +func TestIDSet_Size(t *testing.T) { + type testCase struct { + name string + s IDSet + want int + } + tests := []testCase{ + { + name: "empty set", + s: NewIDSet(), + want: 0, + }, + { + name: "non-empty set", + s: NewIDSet("items", "in", "set"), + want: 3, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := tt.s.Size(); got != tt.want { + t.Errorf("Size() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestIDSet_Add(t *testing.T) { + type args struct { + ids []ID + } + type testCase struct { + name string + s IDSet + args args + } + tests := []testCase{ + { + name: "add multiple", + s: NewIDSet(), + args: args{ids: []ID{"a", "b", "c"}}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.s.Add(tt.args.ids...) + for _, id := range tt.args.ids { + if !tt.s.Contains(id) { + t.Errorf("expected set to contain %q", id) + } + } + }) + } +} + +func TestIDSet_Remove(t *testing.T) { + type args struct { + ids []ID + } + type testCase struct { + name string + s IDSet + args args + expected []ID + } + tests := []testCase{ + { + name: "remove multiple", + s: NewIDSet("a", "b", "c"), + args: args{ids: []ID{"a", "b"}}, + expected: []ID{"c"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.s.Remove(tt.args.ids...) + for _, id := range tt.args.ids { + if tt.s.Contains(id) { + t.Errorf("expected set to NOT contain %q", id) + } + } + for _, id := range tt.expected { + if !tt.s.Contains(id) { + t.Errorf("expected set to contain %q", id) + } + } + }) + } +} + +func TestIDSet_Contains(t *testing.T) { + type args struct { + i ID + } + type testCase struct { + name string + s IDSet + args args + want bool + } + tests := []testCase{ + { + name: "contains", + s: NewIDSet("a", "b", "c"), + args: args{i: "a"}, + want: true, + }, + { + name: "not contains", + s: NewIDSet("a", "b", "c"), + args: args{i: "x"}, + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := tt.s.Contains(tt.args.i); got != tt.want { + t.Errorf("Contains() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestIDSet_Clear(t *testing.T) { + type testCase struct { + name string + s IDSet + } + tests := []testCase{ + { + name: "go case", + s: NewIDSet("a", "b", "c"), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.s.Clear() + assert.Equal(t, 0, tt.s.Size()) + }) + } +} + +func TestIDSet_List(t *testing.T) { + type testCase struct { + name string + s IDSet + want []ID + } + tests := []testCase{ + { + name: "go case", + s: NewIDSet("a", "b", "c"), + want: []ID{"a", "b", "c"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.ElementsMatchf(t, tt.want, tt.s.List(), "List()") + }) + } +} + +func TestIDSet_Sorted(t *testing.T) { + type testCase struct { + name string + s IDSet + want []ID + } + tests := []testCase{ + { + name: "go case", + s: NewIDSet("a", "b", "c"), + want: []ID{"a", "b", "c"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equalf(t, tt.want, tt.s.Sorted(), "Sorted()") + }) + } +} + +func TestIDSet_ContainsAny(t *testing.T) { + type args struct { + ids []ID + } + type testCase struct { + name string + s IDSet + args args + want bool + } + tests := []testCase{ + { + name: "contains one", + s: NewIDSet("a", "b", "c"), + args: args{ids: []ID{"a", "x"}}, + want: true, + }, + { + name: "contains all", + s: NewIDSet("a", "b", "c"), + args: args{ids: []ID{"a", "b"}}, + want: true, + }, + { + name: "contains none", + s: NewIDSet("a", "b", "c"), + args: args{ids: []ID{"x", "y"}}, + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, tt.s.ContainsAny(tt.args.ids...), fmt.Sprintf("ContainsAny(%v)", tt.args.ids)) + }) + } +} diff --git a/pkg/tree/tree.go b/pkg/tree/tree.go index 1b5079fd..0dc04557 100644 --- a/pkg/tree/tree.go +++ b/pkg/tree/tree.go @@ -8,9 +8,9 @@ import ( // Tree represents a simple Tree data structure. type Tree struct { - nodes map[node.ID]node.Node - children map[node.ID]map[node.ID]node.Node - parent map[node.ID]node.Node + nodes map[node.ID]node.Node // {node-id: node} + children map[node.ID]map[node.ID]node.Node // {parent-id: {child-id: child-node} + parent map[node.ID]node.Node // {child-id: parent-node} } // NewTree returns an instance of a Tree. diff --git a/test/integration/fixture_image_opaque_directory_test.go b/test/integration/fixture_image_opaque_directory_test.go index cccc4207..2e58fe2b 100644 --- a/test/integration/fixture_image_opaque_directory_test.go +++ b/test/integration/fixture_image_opaque_directory_test.go @@ -17,7 +17,7 @@ func TestImage_SquashedTree_OpaqueDirectoryExistsInFileCatalog(t *testing.T) { t.Fatalf("unable to get file=%q : %+v", path, err) } - _, err = image.FileCatalog.Get(*ref) + _, err = image.FileCatalog.Get(*ref.Reference) if err != nil { t.Fatal(err) } diff --git a/test/integration/fixture_image_simple_test.go b/test/integration/fixture_image_simple_test.go index 24a44c9f..2a378015 100644 --- a/test/integration/fixture_image_simple_test.go +++ b/test/integration/fixture_image_simple_test.go @@ -177,11 +177,11 @@ func BenchmarkSimpleImage_FetchSquashedContents(b *testing.B) { b.Run(c.source, func(b *testing.B) { for i := 0; i < b.N; i++ { for _, ref := range paths { - f, err := img.FileCatalog.Get(ref) + f, err := img.FileCatalog.Open(ref) if err != nil { b.Fatalf("unable to read: %+v", err) } - _, err = io.ReadAll(f.Contents()) + _, err = io.ReadAll(f) } } }) @@ -191,6 +191,7 @@ func BenchmarkSimpleImage_FetchSquashedContents(b *testing.B) { func assertImageSimpleMetadata(t *testing.T, i *image.Image, expectedValues testCase) { t.Helper() t.Log("Asserting metadata...") + if i.Metadata.MediaType != expectedValues.imageMediaType { t.Errorf("unexpected image media type: %+v", i.Metadata.MediaType) } @@ -226,21 +227,23 @@ func assertImageSimpleMetadata(t *testing.T, i *image.Image, expectedValues test } func assertImageSimpleSquashedTrees(t *testing.T, i *image.Image) { + t.Helper() t.Log("Asserting squashed trees...") - one := filetree.NewFileTree() + + one := filetree.New() one.AddFile("/somefile-1.txt") - two := filetree.NewFileTree() + two := filetree.New() two.AddFile("/somefile-1.txt") two.AddFile("/somefile-2.txt") - three := filetree.NewFileTree() + three := filetree.New() three.AddFile("/somefile-1.txt") three.AddFile("/somefile-2.txt") three.AddFile("/really/.wh..wh..opq") three.AddFile("/really/nested/file-3.txt") - expectedTrees := map[uint]*filetree.FileTree{ + expectedTrees := map[uint]filetree.Reader{ 0: one, 1: two, 2: three, @@ -254,7 +257,7 @@ func assertImageSimpleSquashedTrees(t *testing.T, i *image.Image) { compareLayerSquashTrees(t, expectedTrees, i, ignorePaths) - squashed := filetree.NewFileTree() + squashed := filetree.New() squashed.AddFile("/somefile-1.txt") squashed.AddFile("/somefile-2.txt") squashed.AddFile("/really/nested/file-3.txt") @@ -263,18 +266,20 @@ func assertImageSimpleSquashedTrees(t *testing.T, i *image.Image) { } func assertImageSimpleTrees(t *testing.T, i *image.Image) { + t.Helper() t.Log("Asserting trees...") - one := filetree.NewFileTree() + + one := filetree.New() one.AddFile("/somefile-1.txt") - two := filetree.NewFileTree() + two := filetree.New() two.AddFile("/somefile-2.txt") - three := filetree.NewFileTree() + three := filetree.New() three.AddFile("/really/.wh..wh..opq") three.AddFile("/really/nested/file-3.txt") - expectedTrees := map[uint]*filetree.FileTree{ + expectedTrees := map[uint]filetree.Reader{ 0: one, 1: two, 2: three, @@ -290,6 +295,7 @@ func assertImageSimpleTrees(t *testing.T, i *image.Image) { } func assertImageSimpleContents(t *testing.T, i *image.Image) { + t.Helper() t.Log("Asserting contents...") expectedContents := map[string]string{ @@ -300,7 +306,7 @@ func assertImageSimpleContents(t *testing.T, i *image.Image) { actualContents := make(map[string]io.Reader) for path := range expectedContents { - reader, err := i.FileContentsFromSquash(file.Path(path)) + reader, err := i.OpenPathFromSquash(file.Path(path)) if err != nil { t.Fatal("unable to fetch multiple contents", err) } diff --git a/test/integration/fixture_image_symlinks_test.go b/test/integration/fixture_image_symlinks_test.go index 1c2280da..684af08f 100644 --- a/test/integration/fixture_image_symlinks_test.go +++ b/test/integration/fixture_image_symlinks_test.go @@ -5,6 +5,7 @@ package integration import ( "fmt" + "github.com/stretchr/testify/require" "io" "testing" @@ -92,58 +93,45 @@ func assertMatch(t *testing.T, i *image.Image, cfg linkFetchConfig, expectedReso if actualResolve.ID() != expectedResolve.ID() { var exLayer = -1 var acLayer = -1 - var exType byte - var acType byte + var exType file.Type + var acType file.Type eM, err := i.FileCatalog.Get(*expectedResolve) if err == nil { - exLayer = int(eM.Layer.Metadata.Index) - exType = eM.Metadata.TypeFlag + exLayer = int(i.FileCatalog.Layer(*expectedResolve).Metadata.Index) + exType = eM.Metadata.Type } aM, err := i.FileCatalog.Get(*actualResolve) if err == nil { - acLayer = int(aM.Layer.Metadata.Index) - acType = aM.Metadata.TypeFlag + acLayer = int(i.FileCatalog.Layer(*actualResolve).Metadata.Index) + acType = aM.Metadata.Type } - t.Fatalf("mismatched link resolution link=%+v: '%+v (layer=%d type=%+v)'!='%+v (layer=%d type=%+v linkName=%s)'", cfg.linkPath, expectedResolve, exLayer, exType, actualResolve, acLayer, acType, aM.Metadata.Linkname) + t.Fatalf("mismatched link resolution link=%+v: <%+v (layer=%d type=%+v)> != <%+v (layer=%d type=%+v linkName=%s)>", cfg.linkPath, expectedResolve, exLayer, exType, actualResolve, acLayer, acType, aM.Metadata.LinkDestination) } } func fetchRefs(t *testing.T, i *image.Image, cfg linkFetchConfig) (*file.Reference, *file.Reference) { _, link, err := i.Layers[cfg.linkLayer].Tree.File(file.Path(cfg.linkPath), cfg.linkOptions...) - if err != nil { - t.Fatalf("unable to get link: %+v", err) - } - if link == nil { - t.Fatalf("missing expected link: %s", cfg.linkPath) - } + require.NoError(t, err) + require.NotNil(t, link) _, expectedResolve, err := i.Layers[cfg.resolveLayer].Tree.File(file.Path(cfg.expectedPath), cfg.linkOptions...) - if err != nil { - t.Fatalf("unable to get resolved link: %+v", err) - } - if expectedResolve == nil { - t.Fatalf("missing expected path: %s", expectedResolve) - } + require.NoError(t, err) + require.NotNil(t, expectedResolve) - actualResolve, err := i.ResolveLinkByLayerSquash(*link, cfg.perspectiveLayer, cfg.linkOptions...) - if err != nil { - t.Fatalf("failed to resolve link=%+v: %+v", link, err) - } - return expectedResolve, actualResolve + actualResolve, err := i.ResolveLinkByLayerSquash(*link.Reference, cfg.perspectiveLayer, cfg.linkOptions...) + require.NoError(t, err) + return expectedResolve.Reference, actualResolve.Reference } func fetchContents(t *testing.T, i *image.Image, cfg linkFetchConfig) string { - contents, err := i.Layers[cfg.perspectiveLayer].FileContentsFromSquash(file.Path(cfg.linkPath)) - if err != nil { - t.Fatalf("could not fetch contents of %+v: %+v", cfg.linkPath, err) - } + contents, err := i.Layers[cfg.perspectiveLayer].OpenPathFromSquash(file.Path(cfg.linkPath)) + require.NoError(t, err) + b, err := io.ReadAll(contents) - if err != nil { - t.Fatalf("unable to fetch contents for %+v : %+v", cfg, err) - } + require.NoError(t, err) return string(b) } diff --git a/test/integration/mime_type_detection_test.go b/test/integration/mime_type_detection_test.go index f905ad34..eb43edd7 100644 --- a/test/integration/mime_type_detection_test.go +++ b/test/integration/mime_type_detection_test.go @@ -22,7 +22,7 @@ func TestContentMIMETypeDetection(t *testing.T) { } for mimeType, paths := range pathsByMIMEType { - refs, err := img.FilesByMIMETypeFromSquash(mimeType) + refs, err := img.SquashedSearchContext.SearchByMIMEType(mimeType) assert.NoError(t, err) assert.NotZero(t, len(refs), "found no refs for type=%q", mimeType) for _, ref := range refs { diff --git a/test/integration/utils_test.go b/test/integration/utils_test.go index 6ca7b53a..4f9fa268 100644 --- a/test/integration/utils_test.go +++ b/test/integration/utils_test.go @@ -9,13 +9,13 @@ import ( "github.com/anchore/stereoscope/pkg/image" ) -func compareLayerSquashTrees(t *testing.T, expected map[uint]*filetree.FileTree, i *image.Image, ignorePaths []file.Path) { +func compareLayerSquashTrees(t *testing.T, expected map[uint]filetree.Reader, i *image.Image, ignorePaths []file.Path) { t.Helper() if len(expected) != len(i.Layers) { t.Fatalf("mismatched layers (%d!=%d)", len(expected), len(i.Layers)) } - var actual = make([]*filetree.FileTree, 0) + var actual = make([]filetree.Reader, 0) for _, l := range i.Layers { actual = append(actual, l.SquashedTree) } @@ -23,13 +23,13 @@ func compareLayerSquashTrees(t *testing.T, expected map[uint]*filetree.FileTree, compareTrees(t, expected, actual, ignorePaths) } -func compareLayerTrees(t *testing.T, expected map[uint]*filetree.FileTree, i *image.Image, ignorePaths []file.Path) { +func compareLayerTrees(t *testing.T, expected map[uint]filetree.Reader, i *image.Image, ignorePaths []file.Path) { t.Helper() if len(expected) != len(i.Layers) { t.Fatalf("mismatched layers (%d!=%d)", len(expected), len(i.Layers)) } - var actual = make([]*filetree.FileTree, 0) + var actual = make([]filetree.Reader, 0) for _, l := range i.Layers { actual = append(actual, l.Tree) } @@ -37,13 +37,13 @@ func compareLayerTrees(t *testing.T, expected map[uint]*filetree.FileTree, i *im compareTrees(t, expected, actual, ignorePaths) } -func compareTrees(t *testing.T, expected map[uint]*filetree.FileTree, actual []*filetree.FileTree, ignorePaths []file.Path) { +func compareTrees(t *testing.T, expected map[uint]filetree.Reader, actual []filetree.Reader, ignorePaths []file.Path) { t.Helper() - for idx, expected := range expected { - actual := actual[idx] - if !expected.Equal(actual) { - extra, missing := expected.PathDiff(actual) + for idx, e := range expected { + a := actual[idx] + if !e.(*filetree.FileTree).Equal(a.(*filetree.FileTree)) { + extra, missing := e.(*filetree.FileTree).PathDiff(a.(*filetree.FileTree)) nonIgnoredPaths := 0 for _, p := range extra { @@ -82,11 +82,11 @@ func compareTrees(t *testing.T, expected map[uint]*filetree.FileTree, actual []* } } -func compareSquashTree(t *testing.T, expected *filetree.FileTree, i *image.Image) { +func compareSquashTree(t *testing.T, expected filetree.Reader, i *image.Image) { t.Helper() actual := i.SquashedTree() - if !expected.Equal(actual) { + if !expected.(*filetree.FileTree).Equal(actual.(*filetree.FileTree)) { t.Log("Walking expected squashed tree:") err := expected.Walk(func(p file.Path, _ filenode.FileNode) error { t.Log(" ", p) @@ -105,7 +105,7 @@ func compareSquashTree(t *testing.T, expected *filetree.FileTree, i *image.Image t.Fatalf("failed to walk tree: %+v", err) } - extra, missing := expected.PathDiff(actual) + extra, missing := expected.(*filetree.FileTree).PathDiff(actual.(*filetree.FileTree)) t.Errorf("path differences: extra=%+v missing=%+v", extra, missing) t.Errorf("mismatched squashed trees") }