Merge branch 'main' into rest-api-temp

conda-incubator · Mar 14, 2024 · 72deb65 · 72deb65
2 parents 66c574c + d345670
commit 72deb65
Show file tree

Hide file tree

Showing 42 changed files with 1,094 additions and 201 deletions.
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
@@ -62,7 +62,7 @@ jobs:
 
       - name: "Unit tests ✅"
         run: |
-          pytest -m "not extended_prefix" tests
+          pytest -m "not extended_prefix and not user_journey" tests
 
       # https://github.com/actions/runner-images/issues/1052
       - name: "Windows extended prefix unit tests ✅"
@@ -125,6 +125,10 @@ jobs:
           export PYTHONPATH=$PYTHONPATH:$PWD
           pytest ../tests/test_api.py ../tests/test_metrics.py
 
+      - name: "Run user journey tests ✅"
+        run: |
+          pytest -m "user_journey"
+
       - name: "Get Docker logs 🔍"
         if: ${{ failure() }}
         run: |

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -13,14 +13,14 @@ repos:
     rev: 23.9.1
     hooks:
       - id: black
-        exclude: "examples|tests|docs"
+        exclude: "examples|tests/assets"
 
   - repo: https://github.com/charliermarsh/ruff-pre-commit
     # Ruff version.
     rev: "v0.0.289"
     hooks:
       - id: ruff
-        exclude: "examples|tests|docs"
+        exclude: "examples|tests/assets"
         args: ["--fix"]
 
   - repo: https://github.com/pycqa/isort

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,35 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 The project changed to `CalVer` in September 2023.
 
+## [2024.3.1] - 2024-03-12
+
+([full changelog](https://github.com/conda-incubator/conda-store/compare/2024.1.1...2024.3.1))
+
+## Added
+
+* Add upstream contribution policy by @pavithraes in https://github.com/conda-incubator/conda-store/pull/722
+* Pass `CONDA_OVERRIDE_CUDA` to `with_cuda` of conda-lock by @nkaretnikov in https://github.com/conda-incubator/conda-store/pull/721
+* Add backwards compatibility policy by @dcmcand in https://github.com/conda-incubator/conda-store/pull/687
+* add how to test section to PR template by @dcmcand in https://github.com/conda-incubator/conda-store/pull/743
+* Add extended-length prefix support by @nkaretnikov in https://github.com/conda-incubator/conda-store/pull/713
+* Generate `constructor` artifacts by @nkaretnikov in https://github.com/conda-incubator/conda-store/pull/714
+* Add support for the `editor` role by @nkaretnikov in https://github.com/conda-incubator/conda-store/pull/738
+* Add a test for parallel builds, fix race conditions due to the shared conda cache by @nkaretnikov in https://github.com/conda-incubator/conda-store/pull/745
+* Add user journey test by @dcmcand in https://github.com/conda-incubator/conda-store/pull/760
+* Add status `CANCELED` by @nkaretnikov in https://github.com/conda-incubator/conda-store/pull/747
+* [DOC] Document setting environment variable by @pavithraes in https://github.com/conda-incubator/conda-store/pull/765
+
+## Fixed
+
+* Log address and port, show exception trace from `uvicorn.run` by @nkaretnikov in https://github.com/conda-incubator/conda-store/pull/708
+* Check if worker is initialized by @nkaretnikov in https://github.com/conda-incubator/conda-store/pull/705
+
+## Contributors to this release
+
+([GitHub contributors page for this release](https://github.com/conda-incubator/conda-store/graphs/contributors?from=2024-01-30&to=2024-03-12&type=c))
+
+[@nkaretnikov](https://github.com/search?q=repo%3Aconda-incubator%2Fconda-store+involves%3Ankaretnikov+updated%3A2024-01-30..2024-03-12&type=Issues) | [@dcmcand](https://github.com/search?q=repo%3Aconda-incubator%2Fconda-store+involves%3Adcmcand+updated%3A2024-01-30..2024-03-12&type=Issues) | [@pavithraes](https://github.com/search?q=repo%3Aconda-incubator%2Fconda-store+involves%3Apavithraes+updated%3A2024-01-30..2024-03-12&type=Issues) | [@dependabot](https://github.com/search?q=repo%3Aconda-incubator%2Fconda-store+involves%3Adependabot+updated%3A2024-01-30..2024-03-12&type=Issues)| [@trallard](https://github.com/search?q=repo%3Aconda-incubator%2Fconda-store+involves%3Atrallard+updated%3A2024-01-30..2024-03-12&type=Issues)
+
 ## [2024.1.1] - 2024-01-30
 
 ([full changelog](https://github.com/conda-incubator/conda-store/compare/2023.10.1...ec606641f6d0bb7bde39b2e9f11cf515077feee8))

diff --git a/conda-store-server/conda_store_server/__init__.py b/conda-store-server/conda_store_server/__init__.py
@@ -2,7 +2,7 @@
 import typing
 from pathlib import Path
 
-__version__ = "2024.1.1"
+__version__ = "2024.3.1"
 
 
 CONDA_STORE_DIR = Path.home() / ".conda-store"

diff --git a/conda-store-server/conda_store_server/action/download_packages.py b/conda-store-server/conda_store_server/action/download_packages.py
@@ -1,10 +1,25 @@
+import os
 import pathlib
 import shutil
+import tempfile
 import typing
 
+# This import is needed to avoid the following error on conda imports:
+# AttributeError: 'Logger' object has no attribute 'trace'
+import conda.gateways.logging  # noqa
 import conda_package_handling.api
 import conda_package_streaming.url
 import filelock
+from conda.base.constants import PACKAGE_CACHE_MAGIC_FILE
+from conda.common.path import expand, strip_pkg_extension
+from conda.core.package_cache_data import (
+    PackageCacheRecord,
+    PackageRecord,
+    getsize,
+    read_index_json,
+    write_as_json_to_file,
+)
+from conda.gateways.disk.update import touch
 from conda_store_server import action, conda_utils
 
 
@@ -28,14 +43,149 @@ def action_fetch_and_extract_conda_packages(
             file_path = pkgs_dir / filename
             count_message = f"{packages_searched} of {total_packages}"
             with filelock.FileLock(str(lock_filename)):
+                # This magic file, which is currently set to "urls.txt", is used
+                # to check cache permissions in conda, see _check_writable in
+                # PackageCacheData.
+                #
+                # Sometimes this file is not yet created while this action is
+                # running. Without this magic file, PackageCacheData cache
+                # query functions, like query_all, will return nothing.
+                #
+                # If the magic file is not present, this error might be thrown
+                # during the lockfile install action:
+                #
+                # File "/opt/conda/lib/python3.10/site-packages/conda/misc.py", line 110, in explicit
+                #   raise AssertionError("No package cache records found")
+                #
+                # The code below is from create_package_cache_directory in
+                # conda, which creates the package cache, but we only need the
+                # magic file part here:
+                cache_magic_file = pkgs_dir / PACKAGE_CACHE_MAGIC_FILE
+                if not cache_magic_file.exists():
+                    sudo_safe = expand(pkgs_dir).startswith(expand("~"))
+                    touch(cache_magic_file, mkdir=True, sudo_safe=sudo_safe)
+
                 if file_path.exists():
                     context.log.info(f"SKIPPING {filename} | FILE EXISTS\n")
                 else:
-                    context.log.info(f"DOWNLOAD {filename} | {count_message}\n")
-                    (
-                        filename,
-                        conda_package_stream,
-                    ) = conda_package_streaming.url.conda_reader_for_url(url)
-                    with file_path.open("wb") as f:
-                        shutil.copyfileobj(conda_package_stream, f)
-                    conda_package_handling.api.extract(str(file_path))
+                    with tempfile.TemporaryDirectory() as tmp_dir:
+                        tmp_dir = pathlib.Path(tmp_dir)
+                        file_path = tmp_dir / filename
+                        file_path_str = str(file_path)
+                        extracted_dir = pathlib.Path(
+                            strip_pkg_extension(file_path_str)[0]
+                        )
+                        extracted_dir_str = str(extracted_dir)
+                        context.log.info(f"DOWNLOAD {filename} | {count_message}\n")
+                        (
+                            filename,
+                            conda_package_stream,
+                        ) = conda_package_streaming.url.conda_reader_for_url(url)
+                        with file_path.open("wb") as f:
+                            shutil.copyfileobj(conda_package_stream, f)
+                        conda_package_handling.api.extract(
+                            file_path_str, dest_dir=extracted_dir
+                        )
+
+                        # This code is needed to avoid failures when building in
+                        # parallel while using the shared cache.
+                        #
+                        # Package tarballs contain the info/index.json file,
+                        # which is used by conda to create the
+                        # info/repodata_record.json file. The latter is used to
+                        # interact with the cache. _make_single_record from
+                        # PackageCacheData would create the repodata json file
+                        # if it's not present, which would happen in conda-store
+                        # during the lockfile install action. The repodata file
+                        # is not created if it already exists.
+                        #
+                        # The code that does that in conda is similar to the code
+                        # below. However, there is an important difference. The
+                        # code in conda would fail to read the url and return None
+                        # here:
+                        #
+                        #   url = self._urls_data.get_url(package_filename)
+                        #
+                        # And that would result in the channel field of the json
+                        # file being set to "<unknown>". This is a problem because
+                        # the channel is used when querying cache entries, via
+                        # match_individual from MatchSpec, which would always result
+                        # in a mismatch because the proper channel value is
+                        # different.
+                        #
+                        # That would make conda think that the package is not
+                        # available in the cache, so it would try to download it
+                        # outside of this action, where no locking is implemented.
+                        #
+                        # As of now, conda's cache is not atomic, so the same
+                        # dependencies requested by different builds would overwrite
+                        # each other causing random failures during the build
+                        # process.
+                        #
+                        # To avoid this problem, the code below does what the code
+                        # in conda does but also sets the url properly, which would
+                        # make the channel match properly during the query process
+                        # later. So no dependencies would be downloaded outside of
+                        # this action and cache corruption is prevented.
+                        #
+                        # To illustrate, here's a diff of an old conda entry, which
+                        # didn't work, versus the new one created by this action:
+                        #
+                        # --- /tmp/old.txt        2024-02-05 01:08:16.879751010 +0100
+                        # +++ /tmp/new.txt        2024-02-05 01:08:02.919319887 +0100
+                        # @@ -2,7 +2,7 @@
+                        #    "arch": "x86_64",
+                        #    "build": "conda_forge",
+                        #    "build_number": 0,
+                        # -  "channel": "<unknown>",
+                        # +  "channel": "https://conda.anaconda.org/conda-forge/linux-64",
+                        #    "constrains": [],
+                        #    "depends": [],
+                        #    "features": "",
+                        # @@ -15,5 +15,6 @@
+                        #    "subdir": "linux-64",
+                        #    "timestamp": 1578324546067,
+                        #    "track_features": "",
+                        # +  "url": "https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2",
+                        #    "version": "0.1"
+                        #  }
+                        #
+                        # Also see the comment above about the cache magic file.
+                        # Without the magic file, cache queries would fail even if
+                        # repodata_record.json files have proper channels specified.
+
+                        # This file is used to parse cache records via PackageCacheRecord in conda
+                        repodata_file = extracted_dir / "info" / "repodata_record.json"
+
+                        raw_json_record = read_index_json(extracted_dir)
+                        fn = os.path.basename(file_path_str)
+                        md5 = package["hash"]["md5"]
+                        size = getsize(file_path_str)
+
+                        package_cache_record = PackageCacheRecord.from_objects(
+                            raw_json_record,
+                            url=url,
+                            fn=fn,
+                            md5=md5,
+                            size=size,
+                            package_tarball_full_path=file_path_str,
+                            extracted_package_dir=extracted_dir_str,
+                        )
+
+                        repodata_record = PackageRecord.from_objects(
+                            package_cache_record
+                        )
+                        write_as_json_to_file(repodata_file, repodata_record)
+
+                        # This is to ensure _make_single_record in conda never
+                        # sees the extracted package directory without our
+                        # repodata_record file being there. Otherwise, conda
+                        # would attempt to create the repodata file, with the
+                        # channel field set to "<unknown>", which would make the
+                        # above code pointless. Using symlinks here would be
+                        # better since those are atomic on Linux, but I don't
+                        # want to create any permanent directories on the
+                        # filesystem.
+                        shutil.rmtree(pkgs_dir / extracted_dir.name, ignore_errors=True)
+                        shutil.move(extracted_dir, pkgs_dir / extracted_dir.name)
+                        shutil.move(file_path, pkgs_dir / file_path.name)
diff --git a/conda-store-server/conda_store_server/alembic/versions/03c839888c82_add_canceled_status.py b/conda-store-server/conda_store_server/alembic/versions/03c839888c82_add_canceled_status.py
@@ -0,0 +1,44 @@
+"""add canceled status
+
+Revision ID: 03c839888c82
+Revises: 57cd11b949d5
+Create Date: 2024-01-29 03:56:36.889909
+
+"""
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "03c839888c82"
+down_revision = "57cd11b949d5"
+branch_labels = None
+depends_on = None
+
+
+# Migrating from/to VARCHAR having the same length might look strange, but it
+# serves a purpose. This will be a no-op in SQLite because it represents Python
+# enums as VARCHAR, but it will convert the enum in PostgreSQL to VARCHAR. The
+# old type is set to VARCHAR here because you can cast an enum to VARCHAR, which
+# is needed for the migration to work. In the end, both DBs will use VARCHAR to
+# represent the Python enum, which makes it easier to support both DBs at the
+# same time.
+def upgrade():
+    with op.batch_alter_table(
+        "build",
+        schema=None,
+    ) as batch_op:
+        batch_op.alter_column(
+            "status",
+            existing_type=sa.VARCHAR(length=9),
+            type_=sa.VARCHAR(length=9),
+            existing_nullable=False,
+        )
+    if not str(op.get_bind().engine.url).startswith("sqlite"):
+        op.execute("DROP TYPE IF EXISTS buildstatus")
+
+
+def downgrade():
+    # There are foreign key constraints linking build ids to other tables. So
+    # just mark the builds as failed, which was the status previously used for
+    # canceled builds
+    op.execute("UPDATE build SET status = 'FAILED' WHERE status = 'CANCELED'")
diff --git a/conda-store-server/conda_store_server/build.py b/conda-store-server/conda_store_server/build.py
@@ -48,6 +48,15 @@ def set_build_failed(
     db.commit()
 
 
+def set_build_canceled(
+    db: Session, build: orm.Build, status_info: typing.Optional[str] = None
+):
+    build.status = schema.BuildStatus.CANCELED
+    build.status_info = status_info
+    build.ended_on = datetime.datetime.utcnow()
+    db.commit()
+
+
 def set_build_completed(db: Session, conda_store, build: orm.Build):
     build.status = schema.BuildStatus.COMPLETED
     build.ended_on = datetime.datetime.utcnow()
@@ -65,17 +74,22 @@ def set_build_completed(db: Session, conda_store, build: orm.Build):
 
 
 def build_cleanup(
-    db: Session, conda_store, build_ids: typing.List[str] = None, reason: str = None
+    db: Session,
+    conda_store,
+    build_ids: typing.List[str] = None,
+    reason: str = None,
+    is_canceled: bool = False,
 ):
     """Walk through all builds in BUILDING state and check that they are actively running
 
     Build can get stuck in the building state due to worker
     spontaineously dying due to memory errors, killing container, etc.
     """
+    status = "CANCELED" if is_canceled else "FAILED"
     reason = (
         reason
-        or """
-Build marked as FAILED on cleanup due to being stuck in BUILDING state
+        or f"""
+Build marked as {status} on cleanup due to being stuck in BUILDING state
 and not present on workers. This happens for several reasons: build is
 canceled, a worker crash from out of memory errors, worker was killed,
 or error in conda-store
@@ -113,15 +127,18 @@ def build_cleanup(
             )
         ):
             conda_store.log.warning(
-                f"marking build {build.id} as FAILED since stuck in BUILDING state and not present on workers"
+                f"marking build {build.id} as {status} since stuck in BUILDING state and not present on workers"
             )
             append_to_logs(
                 db,
                 conda_store,
                 build,
                 reason,
             )
-            set_build_failed(db, build)
+            if is_canceled:
+                set_build_canceled(db, build)
+            else:
+                set_build_failed(db, build)
 
 
 def build_conda_environment(db: Session, conda_store, build):

diff --git a/conda-store-server/conda_store_server/schema.py b/conda-store-server/conda_store_server/schema.py
@@ -156,6 +156,7 @@ class BuildStatus(enum.Enum):
     BUILDING = "BUILDING"
     COMPLETED = "COMPLETED"
     FAILED = "FAILED"
+    CANCELED = "CANCELED"
 
 
 class BuildArtifact(BaseModel):

diff --git a/conda-store-server/conda_store_server/server/templates/build.html b/conda-store-server/conda_store_server/server/templates/build.html
@@ -61,7 +61,7 @@ <h3 class="card-title">Conda Packages
 </div>
 {% endif %}
 
-{% if build.status.value in ['BUILDING', 'COMPLETED', 'FAILED'] %}
+{% if build.status.value in ['BUILDING', 'COMPLETED', 'FAILED', 'CANCELED'] %}
 <div class="card my-2">
     <div class="card-body">
         <h3 class="card-title">Conda Environment Artifacts</h3>
@@ -89,7 +89,7 @@ <h3 class="card-title">Conda Environment Artifacts</h3>
 </div>
 {% endif %}
 
-{% if build.status.value in ['BUILDING', 'COMPLETED', 'FAILED'] %}
+{% if build.status.value in ['BUILDING', 'COMPLETED', 'FAILED', 'CANCELED'] %}
 <div class="card my-2">
     <div class="card-body">
         <a href="{{ url_for('api_get_build_logs', build_id=build.id) }}" class="btn btn-primary btn-block">Full Logs</a>