astronomy-commons · delucchi-cmu · Sep 28, 2023 · Aug 15, 2023 · Aug 21, 2023 · Aug 23, 2023
diff --git a/.copier-answers.yml b/.copier-answers.yml
@@ -1,10 +1,12 @@
 # Changes here will be overwritten by Copier
-_commit: v1.3.4
+_commit: v1.4.2
 _src_path: gh:lincc-frameworks/python-project-template
 author_email: lincc-frameworks-team@lists.lsst.org
 author_name: LINCC Frameworks
 create_example_module: false
 custom_install: true
+include_benchmarks: false
+include_docs: true
 include_notebooks: true
 mypy_type_checking: basic
 package_name: hipscat_import

diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
@@ -26,7 +26,7 @@ If it fixes an open issue, please link to the issue here. If this PR closes an i
 
 
 ## Code Quality
-- [ ] I have read the [Contribution Guide](https://lincc-ppt.readthedocs.io/en/latest/source/contributing.html)
+- [ ] I have read the Contribution Guide
 - [ ] My code follows the code style of this project
 - [ ] My code builds (or compiles) cleanly without any errors or warnings
 - [ ] My code contains relevant comments and necessary documentation
@@ -50,8 +50,8 @@ If it fixes an open issue, please link to the issue here. If this PR closes an i
 - [ ] Any updated docstrings use the [NumPy docstring format](https://numpydoc.readthedocs.io/en/latest/format.html)
 
 ### Build/CI Change Checklist
-- [ ] If required or optional dependencies have changed (including version numbers), I have updated the [README](https://github.com/lincc-frameworks/python-project-template/blob/main/README.md) to reflect this
-- [ ] If this is a new CI setup, I have added the associated badge to the [README](https://github.com/lincc-frameworks/python-project-template/blob/main/README.md)
+- [ ] If required or optional dependencies have changed (including version numbers), I have updated the README to reflect this
+- [ ] If this is a new CI setup, I have added the associated badge to the README
 
 <!-- ### Version Change Checklist [For Future Use] -->
 

diff --git a/.github/workflows/build-documentation.yml b/.github/workflows/build-documentation.yml
@@ -24,9 +24,8 @@ jobs:
       run: |
         sudo apt-get update
         python -m pip install --upgrade pip
+        if [ -f docs/requirements.txt ]; then pip install -r docs/requirements.txt; fi
         pip install .
-        pip install .[dev]
-        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
     - name: Install notebook requirements
       run: |
         sudo apt-get install pandoc

diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
@@ -33,4 +33,3 @@ jobs:
 
       run: |
         pylint -rn -sn --recursive=y ./src --rcfile=./src/.pylintrc
-
diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml
@@ -36,4 +36,4 @@ jobs:
       uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
       with:
         user: __token__
-        password: ${{ secrets.PYPI_API_TOKEN }}
+        password: ${{ secrets.PYPI_API_TOKEN }}
diff --git a/.github/workflows/smoke-test.yml b/.github/workflows/smoke-test.yml
@@ -1,5 +1,7 @@
 # This workflow will run daily at 06:45.
 # It will install Python dependencies and run tests with a variety of Python versions.
+# See documentation for help debugging smoke test issues:
+#    https://lincc-ppt.readthedocs.io/en/latest/practices/ci_testing.html#version-culprit
 
 name: Unit test smoke test
 
@@ -30,6 +32,9 @@ jobs:
         pip install .
         pip install .[dev]
         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+    - name: List dependencies
+      run: |
+        pip list
     - name: Run unit tests with pytest
       run: |
         python -m pytest tests

diff --git a/.gitignore b/.gitignore
@@ -138,3 +138,6 @@ dask-worker-space/
 
 # tmp directory
 tmp/
+
+# Mac OS
+.DS_Store
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -4,7 +4,7 @@ repos:
     # This hook should always pass. It will print a message if the local version 
     # is out of date.
   - repo: https://github.com/lincc-frameworks/pre-commit-hooks
-    rev: v0.1
+    rev: v0.1.1
     hooks:
       - id: check-lincc-frameworks-template-version
         name: Check template version

diff --git a/.prepare_project.sh b/.prepare_project.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+
+echo "Initializing local git repository"
+{
+    gitversion=( $(git version | sed 's/^.* //;s/\./ /g') )
+    if let "${gitversion[0]}<2"; then
+	# manipulate directly
+	git init . && echo 'ref: refs/heads/main' >.git/HEAD
+    elif let "${gitversion[0]}==2 & ${gitversion[1]}<34"; then
+	# rename master to main
+	git init . && { git branch -m master main 2>/dev/null || true; };
+    else
+	# set the initial branch name to main
+	git init --initial-branch=main >/dev/null
+    fi
+} > /dev/null
+
+echo "Installing package and runtime dependencies in local environment"
+pip install -e . > /dev/null
+
+echo "Installing developer dependencies in local environment"
+pip install -e .'[dev]' > /dev/null
+
+echo "Installing pre-commit"
+pre-commit install > /dev/null
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -1,8 +1,7 @@
-sphinx==6.1.3
-sphinx_rtd_theme==1.2.0
-sphinx-autoapi==2.0.1
+sphinx
+sphinx-rtd-theme
+sphinx-autoapi
 nbsphinx
-ipykernel
 ipython
 jupytext
 jupyter

diff --git a/pyproject.toml b/pyproject.toml
@@ -21,7 +21,7 @@ dependencies = [
     "healpy",
     "hipscat >= 0.1.2",
     "ipykernel", # Support for Jupyter notebooks
-    "pandas",
+    "pandas < 2.1.0",
     "pyarrow",
     "tqdm",
     "numpy < 1.25",
@@ -36,9 +36,9 @@ dev = [
     "pytest",
     "pytest-cov",
     "pytest-timeout",
-    "sphinx==6.1.3", # Used to automatically generate documentation
-    "sphinx_rtd_theme==1.2.0", # Used to render documentation
-    "sphinx-autoapi==2.0.1", # Used to automatically generate api documentation
+    "sphinx", # Used to automatically generate documentation
+    "sphinx-rtd-theme", # Used to render documentation
+    "sphinx-autoapi", # Used to automatically generate api documentation
     "mypy", # Used for static type checking of files
     # if you add dependencies here while experimenting in a notebook and you
     # want that notebook to render in your documentation, please add the
@@ -53,7 +53,7 @@ dev = [
 
 [build-system]
 requires = [
-    "setuptools>=45", # Used to build and package the Python project
+    "setuptools>=62", # Used to build and package the Python project
     "setuptools_scm>=6.2", # Gets release version from git. Makes it available programmatically
 ]
 build-backend = "setuptools.build_meta"
@@ -69,6 +69,9 @@ timeout = 1
 markers = [
     "dask: mark tests as having a dask client runtime dependency",
 ]
+testpaths = [
+    "tests",
+]
 
 [tool.coverage.report]
 omit = [

diff --git a/src/hipscat_import/catalog/resume_plan.py b/src/hipscat_import/catalog/resume_plan.py
@@ -40,7 +40,9 @@ def __post_init__(self):
 
     def gather_plan(self):
         """Initialize the plan."""
-        with tqdm(total=5, desc="Planning ", disable=not self.progress_bar) as step_progress:
+        with tqdm(
+            total=4, desc=self.get_formatted_stage_name("Planning"), disable=not self.progress_bar
+        ) as step_progress:
             ## Make sure it's safe to use existing resume state.
             super().safe_to_resume()
             step_progress.update(1)
@@ -71,7 +73,6 @@ def gather_plan(self):
             step_progress.update(1)
 
             ## Gather keys for execution.
-            step_progress.update(1)
             if not mapping_done:
                 mapped_keys = set(self.read_log_keys(self.MAPPING_STAGE))
                 self.map_files = [

diff --git a/src/hipscat_import/catalog/run_import.py b/src/hipscat_import/catalog/run_import.py
@@ -12,6 +12,7 @@
 
 import hipscat_import.catalog.map_reduce as mr
 from hipscat_import.catalog.arguments import ImportArguments
+from hipscat_import.pipeline_resume_plan import PipelineResumePlan
 
 
 def _map_pixels(args, client):
@@ -106,7 +107,9 @@ def run(args, client):
         raise ValueError("args must be type ImportArguments")
     _map_pixels(args, client)
 
-    with tqdm(total=2, desc="Binning  ", disable=not args.progress_bar) as step_progress:
+    with tqdm(
+        total=2, desc=PipelineResumePlan.get_formatted_stage_name("Binning"), disable=not args.progress_bar
+    ) as step_progress:
         raw_histogram = args.resume_plan.read_histogram(args.mapping_healpix_order)
         step_progress.update(1)
         if args.constant_healpix_order >= 0:
@@ -141,7 +144,9 @@ def run(args, client):
         _reduce_pixels(args, destination_pixel_map, client)
 
     # All done - write out the metadata
-    with tqdm(total=6, desc="Finishing", disable=not args.progress_bar) as step_progress:
+    with tqdm(
+        total=6, desc=PipelineResumePlan.get_formatted_stage_name("Finishing"), disable=not args.progress_bar
+    ) as step_progress:
         catalog_info = args.to_catalog_info(int(raw_histogram.sum()))
         io.write_provenance_info(
             catalog_base_dir=args.catalog_path,

diff --git a/src/hipscat_import/index/run_index.py b/src/hipscat_import/index/run_index.py
@@ -5,6 +5,7 @@
 
 import hipscat_import.index.map_reduce as mr
 from hipscat_import.index.arguments import IndexArguments
+from hipscat_import.pipeline_resume_plan import PipelineResumePlan
 
 
 def run(args):
@@ -16,7 +17,9 @@ def run(args):
     rows_written = mr.create_index(args)
 
     # All done - write out the metadata
-    with tqdm(total=4, desc="Finishing", disable=not args.progress_bar) as step_progress:
+    with tqdm(
+        total=4, desc=PipelineResumePlan.get_formatted_stage_name("Finishing"), disable=not args.progress_bar
+    ) as step_progress:
         # pylint: disable=duplicate-code
         catalog_info = args.to_catalog_info(int(rows_written))
         write_metadata.write_provenance_info(

diff --git a/src/hipscat_import/pipeline.py b/src/hipscat_import/pipeline.py
@@ -59,7 +59,11 @@ def _send_failure_email(args: RuntimeArguments, exception: Exception):
     message = EmailMessage()
     message["Subject"] = "hipscat-import failure."
     message["To"] = args.completion_email_address
-    message.set_content(f"failed with message:\n{exception}")
+    message.set_content(
+        f"output_catalog_name: {args.output_catalog_name}"
+        "\n\nSee logs for more details"
+        f"\n\nFailed with message:\n\n{exception}"
+    )
 
     _send_email(message)
 

diff --git a/src/hipscat_import/pipeline_resume_plan.py b/src/hipscat_import/pipeline_resume_plan.py
@@ -110,9 +110,10 @@ def wait_for_futures(self, futures, stage_name):
             stage_name(str): name of the stage (e.g. mapping, reducing)
         """
         some_error = False
+        formatted_stage_name = self.get_formatted_stage_name(stage_name)
         for future in tqdm(
             as_completed(futures),
-            desc=stage_name,
+            desc=formatted_stage_name,
             total=len(futures),
             disable=(not self.progress_bar),
         ):
@@ -123,3 +124,16 @@ def wait_for_futures(self, futures, stage_name):
         if some_error:  # pragma: no cover
             raise RuntimeError(f"Some {stage_name} stages failed. See logs for details.")
         self.touch_done_file(stage_name)
+
+    @staticmethod
+    def get_formatted_stage_name(stage_name) -> str:
+        """Create a stage name of consistent minimum length. Ensures that the tqdm
+        progress bars can line up nicely when multiple stages must run.
+
+        Args:
+            stage_name (str): name of the stage (e.g. mapping, reducing)
+        """
+        if stage_name is None or len(stage_name) == 0:
+            stage_name = "progress"
+
+        return f"{stage_name.capitalize(): <10}"
diff --git a/src/hipscat_import/soap/resume_plan.py b/src/hipscat_import/soap/resume_plan.py
@@ -35,7 +35,9 @@ def __init__(self, args: SoapArguments):
 
     def gather_plan(self, args):
         """Initialize the plan."""
-        with tqdm(total=5, desc="Planning ", disable=not self.progress_bar) as step_progress:
+        with tqdm(
+            total=3, desc=self.get_formatted_stage_name("Planning"), disable=not self.progress_bar
+        ) as step_progress:
             ## Make sure it's safe to use existing resume state.
             super().safe_to_resume()
             step_progress.update(1)

diff --git a/src/hipscat_import/soap/run_soap.py b/src/hipscat_import/soap/run_soap.py
@@ -6,6 +6,7 @@
 from hipscat.io import file_io, write_metadata
 from tqdm import tqdm
 
+from hipscat_import.pipeline_resume_plan import PipelineResumePlan
 from hipscat_import.soap.arguments import SoapArguments
 from hipscat_import.soap.map_reduce import combine_partial_results, count_joins
 from hipscat_import.soap.resume_plan import SoapPlan
@@ -36,7 +37,9 @@ def run(args, client):
         resume_plan.wait_for_counting(futures)
 
     # All done - write out the metadata
-    with tqdm(total=4, desc="Finishing", disable=not args.progress_bar) as step_progress:
+    with tqdm(
+        total=4, desc=PipelineResumePlan.get_formatted_stage_name("Finishing"), disable=not args.progress_bar
+    ) as step_progress:
         # pylint: disable=duplicate-code
         # Very similar to /index/run_index.py
         combine_partial_results(args.tmp_path, args.catalog_path)

diff --git a/tests/hipscat_import/test_pipeline_resume_plan.py b/tests/hipscat_import/test_pipeline_resume_plan.py
@@ -74,3 +74,17 @@ def test_safe_to_resume(tmp_path):
     ## If there are no more intermediate files, we don't need to set resume.
     plan.clean_resume_files()
     plan.safe_to_resume()
+
+
+def test_formatted_stage_name():
+    formatted = PipelineResumePlan.get_formatted_stage_name(None)
+    assert formatted == "Progress  "
+
+    formatted = PipelineResumePlan.get_formatted_stage_name("")
+    assert formatted == "Progress  "
+
+    formatted = PipelineResumePlan.get_formatted_stage_name("stage")
+    assert formatted == "Stage     "
+
+    formatted = PipelineResumePlan.get_formatted_stage_name("very long stage name")
+    assert formatted == "Very long stage name"