Skip to content

Commit

Permalink
Make orchestrator skip deleting static files
Browse files Browse the repository at this point in the history
  • Loading branch information
zschira committed Feb 28, 2024
1 parent 893d201 commit b6e8cca
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 2 deletions.
2 changes: 2 additions & 0 deletions dataset_doi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ ferc60:
ferc714:
production_doi: 10.5281/zenodo.4127100
sandbox_doi: 10.5072/zenodo.3279
ferceqr:
production_doi: 10.5281/zenodo.10086108
mshamines:
production_doi: 10.5281/zenodo.7683517
sandbox_doi: 10.5072/zenodo.3242
Expand Down
6 changes: 6 additions & 0 deletions src/pudl_archiver/archivers/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ class AbstractDatasetArchiver(ABC):
resumeable: bool = False
existing_files: list[str] = []

#: List of files that should not be deleted under any circumstance
files_not_to_delete: list[str] = []

# Configure which generic validation tests to run
fail_on_missing_files: bool = True
fail_on_empty_invalid_files: bool = True
Expand All @@ -104,6 +107,9 @@ def __init__(
"""
self.session = session

# Never delete datapackage
self.files_not_to_delete.append("datapackage.json")

# Create a temporary directory for downloading data

if download_directory is None:
Expand Down
17 changes: 16 additions & 1 deletion src/pudl_archiver/archivers/ferc/ferceqr.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,24 @@ class FercEQRArchiver(AbstractDatasetArchiver):
"""FERC EQR archiver."""

name = "ferceqr"
concurrency_limit = 5
concurrency_limit = 1
directory_per_resource_chunk = True
max_wait_time = 36000
files_not_to_delete = [
"eqr_nontransaction.zip",
"ferceqr-2002.zip",
"ferceqr-2003.zip",
"ferceqr-2004.zip",
"ferceqr-2005.zip",
"ferceqr-2006.zip",
"ferceqr-2007.zip",
"ferceqr-2008.zip",
"ferceqr-2009.zip",
"ferceqr-2010.zip",
"ferceqr-2011.zip",
"ferceqr-2012.zip",
"ferceqr-2013.zip",
]

async def get_resources(self) -> ArchiveAwaitable:
"""Download FERC EQR resources."""
Expand Down
5 changes: 4 additions & 1 deletion src/pudl_archiver/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,10 @@ def _get_deletions(
# Delete files not included in new deposition
files_to_delete = []
for filename in draft.files_map:
if filename not in resources and filename != "datapackage.json":
if (
filename not in resources
and filename not in self.downloader.files_not_to_delete
):
logger.info(f"Deleting {filename} from deposition.")
files_to_delete.append(
_DepositionChange(_DepositionAction.DELETE, name=filename)
Expand Down

0 comments on commit b6e8cca

Please sign in to comment.