Skip to content

Commit

Permalink
Consolidate optional dependencies; test nuke; parallelize tests
Browse files Browse the repository at this point in the history
* Move previous dev, test, and datasette optional dependencies into
  the required dependencies to simplify application installation.
* Test make nuke; parallelize --live-dbs tests
* Move prettier into conda-only dependencies
* Update conda-lock.yml and rendered conda environment files.
* Remove action test file hashlog
* Remove merge markers.
* Remove transitive astroid dependency that's now correctly included in solve.
* Use the real immature library version of dagster-postgres (0.21.6)
  rather than the accidentally packaged 1.5.6 version found in conda.
  We'll need to keep an eye out for when dagster-postgres graduates
  to the stable versioning and update it. This is a bit of a mess
  because of some broken automation in the conda packaging for dagster
  which has now been fixed.
* Update "make pudl" to remove the old PUDL DB and reinitialize with
  alembic, rather than writing to the DB that already exists.
* Fixed some groupby.agg() deprecation warnings.
* Fix dagster-postgres version (again).
* Update username in path to settings file
* Avoid bugs in ferc_to_sqlite --clobber; don't use cache_dir for pip install.
* Make FERC extraction output removal more specific.
* Bump numpy and numba minimum versions.
* Bump black version in pre-commit
* Bump ruff pre-commit hook version
  • Loading branch information
zaneselvans committed Nov 14, 2023
1 parent 077f6c0 commit e1baa0a
Show file tree
Hide file tree
Showing 15 changed files with 2,814 additions and 1,997 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build-deploy-pudl.yml
Expand Up @@ -117,8 +117,8 @@ jobs:
--container-env DAGSTER_PG_PASSWORD="$DAGSTER_PG_PASSWORD" \
--container-env DAGSTER_PG_HOST="104.154.182.24" \
--container-env DAGSTER_PG_DB="dagster-storage" \
--container-env PUDL_SETTINGS_YML="/home/catalyst/src/pudl/package_data/settings/etl_full.yml" \
--container-env FLY_ACCESS_TOKEN=${{ secrets.FLY_ACCESS_TOKEN }} \
--container-env PUDL_SETTINGS_YML="/home/mambauser/src/pudl/package_data/settings/etl_full.yml" \
# Start the VM
- name: Start the deploy-pudl-vm
Expand Down
3 changes: 0 additions & 3 deletions .github/workflows/tox-pytest.yml
Expand Up @@ -34,7 +34,6 @@ jobs:
environment-file: environments/conda-lock.yml
environment-name: pudl-dev
cache-environment: true
create-args: --category main dev docs test datasette

- name: Log environment details
run: |
Expand Down Expand Up @@ -74,7 +73,6 @@ jobs:
environment-file: environments/conda-lock.yml
environment-name: pudl-dev
cache-environment: true
create-args: --category main dev docs test datasette

- name: Log environment details
run: |
Expand Down Expand Up @@ -125,7 +123,6 @@ jobs:
environment-file: environments/conda-lock.yml
environment-name: pudl-dev
cache-environment: true
create-args: --category main dev docs test datasette

- name: Log environment details
run: |
Expand Down
2 changes: 1 addition & 1 deletion .readthedocs.yaml
Expand Up @@ -10,7 +10,7 @@ version: 2
build:
os: ubuntu-22.04
tools:
python: mambaforge-4.10
python: mambaforge-22.9

# Define the python environment using conda / mamba
conda:
Expand Down
41 changes: 18 additions & 23 deletions Makefile
Expand Up @@ -5,7 +5,6 @@ coverage_report := coverage report --sort=cover
pytest_args := --durations 20 ${pytest_covargs} ${gcs_cache_path}
etl_fast_yml := src/pudl/package_data/settings/etl_fast.yml
etl_full_yml := src/pudl/package_data/settings/etl_full.yml
pip_install_pudl := pip install --no-deps --editable ./

# We use mamba locally, but micromamba in CI, so choose the right binary:
ifdef GITHUB_ACTION
Expand Down Expand Up @@ -47,25 +46,17 @@ conda-lock.yml: pyproject.toml
cd environments && conda-lock render \
--kind env \
--dev-dependencies \
--extras docs \
--extras datasette \
conda-lock.yml
prettier --write environments/*.yml

# Create the pudl-dev conda environment based on the universal lockfile
.PHONY: pudl-dev
pudl-dev: conda-lock.yml
conda-lock install \
--name pudl-dev \
--${mamba} \
--dev \
--extras docs \
--extras datasette \
environments/conda-lock.yml
conda-lock install --name pudl-dev --${mamba} --dev environments/conda-lock.yml

.PHONY: install-pudl
install-pudl: pudl-dev
${mamba} run --name pudl-dev pip install --no-deps --editable .
${mamba} run --name pudl-dev pip install --no-cache-dir --no-deps --editable .

########################################################################################
# Build documentation for local use or testing
Expand All @@ -92,19 +83,24 @@ docs-build: docs-clean
########################################################################################

# Extract all FERC DBF and XBRL data to SQLite.
ferc1.sqlite ferc1_xbrl.sqlite:
.PHONY: ferc
ferc:
rm -f ${PUDL_OUTPUT}/ferc*.sqlite
rm -f ${PUDL_OUTPUT}/ferc*_xbrl_datapackage.json
rm -f ${PUDL_OUTPUT}/ferc*_xbrl_taxonomy_metadata.json
coverage run ${covargs} -- \
src/pudl/ferc_to_sqlite/cli.py \
--clobber \
${gcs_cache_path} \
${etl_full_yml}

# Run the full PUDL ETL
pudl.sqlite:
coverage run ${covargs} -- \
src/pudl/cli/etl.py \
${gcs_cache_path} \
${etl_full_yml}
# Remove the existing PUDL DB if it exists.
# Create a new empty DB using alembic.
# Run the full PUDL ETL.
.PHONY: pudl
pudl:
rm -f ${PUDL_OUTPUT}/pudl.sqlite
alembic upgrade head
coverage run ${covargs} -- src/pudl/cli/etl.py ${gcs_cache_path} ${etl_full_yml}

########################################################################################
# pytest
Expand Down Expand Up @@ -140,11 +136,10 @@ pytest-validate:
# Backgrounding the data validation and integration tests and using wait allows them to
# run in parallel.
.PHONY: nuke
nuke: coverage-erase docs-build pytest-unit ferc1.sqlite ferc1_xbrl.sqlite pudl.sqlite
nuke: coverage-erase docs-build pytest-unit ferc pudl
pudl_check_fks
pytest ${pytest_args} --live-dbs --etl-settings ${etl_full_yml} test/integration & \
pytest ${pytest_args} --live-dbs test/validate & \
wait
pytest ${pytest_args} -n auto --live-dbs --etl-settings ${etl_full_yml} test/integration
pytest ${pytest_args} -n auto --live-dbs test/validate
${coverage_report}

# Check that designated Jupyter notebooks can be run against the current DB
Expand Down
2 changes: 1 addition & 1 deletion docker/Dockerfile
Expand Up @@ -46,7 +46,7 @@ COPY docker/dagster.yaml ${DAGSTER_HOME}/dagster.yaml

# Create a conda environment based on the specification in the repo
COPY environments/conda-lock.yml environments/conda-lock.yml
RUN micromamba create --prefix ${CONDA_PREFIX} --yes --category main dev docs test datasette --file environments/conda-lock.yml && \
RUN micromamba create --prefix ${CONDA_PREFIX} --yes --file environments/conda-lock.yml && \
micromamba clean -afy
# Copy the cloned pudl repository into the user's home directory
COPY --chown=${MAMBA_USER}:${MAMBA_USER} . ${CONTAINER_HOME}
Expand Down
16 changes: 9 additions & 7 deletions docker/gcp_pudl_etl.sh
Expand Up @@ -24,21 +24,23 @@ function run_pudl_etl() {
alembic upgrade head && \
pudl_setup && \
ferc_to_sqlite \
--loglevel=DEBUG \
--gcs-cache-path=gs://internal-zenodo-cache.catalyst.coop \
--workers=8 \
--loglevel DEBUG \
--gcs-cache-path gs://internal-zenodo-cache.catalyst.coop \
--workers 8 \
$PUDL_SETTINGS_YML && \
pudl_etl \
--loglevel DEBUG \
--gcs-cache-path gs://internal-zenodo-cache.catalyst.coop \
$PUDL_SETTINGS_YML && \
pytest \
--gcs-cache-path=gs://internal-zenodo-cache.catalyst.coop \
--etl-settings=$PUDL_SETTINGS_YML \
-n auto \
--gcs-cache-path gs://internal-zenodo-cache.catalyst.coop \
--etl-settings $PUDL_SETTINGS_YML \
--live-dbs test/integration test/unit && \
pytest \
--gcs-cache-path=gs://internal-zenodo-cache.catalyst.coop \
--etl-settings=$PUDL_SETTINGS_YML \
-n auto \
--gcs-cache-path gs://internal-zenodo-cache.catalyst.coop \
--etl-settings $PUDL_SETTINGS_YML \
--live-dbs test/validate
}

Expand Down

0 comments on commit e1baa0a

Please sign in to comment.