Skip to content

Commit

Permalink
Fix dependency upgrades (#44)
Browse files Browse the repository at this point in the history
* Add info on pinned dependencies

* Read inventory as chunk instead of ull file

* Filter on Belgian only for testing

* Switch engine to c

* Parse datetime

* Add format of datetime

* Remove unused print

* Fix format bug

* fix typo

* Use minimal 1 process

* Add pinned requirements py310

* Remove unused code sections

* Fix dependency resolution on py3.10

* Prepare CI for py310

* Replace s3fs download to boto3 again (no async)

* Patch aiobotocore to hanbdle mocked s3 calls

* run on py310

* Format with black

* Use py310 in ci

* Use py310 in ci
  • Loading branch information
stijnvanhoey committed May 22, 2023
1 parent d5e5fae commit 7fa695e
Show file tree
Hide file tree
Showing 14 changed files with 378 additions and 57 deletions.
10 changes: 2 additions & 8 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,6 @@ on:
tags: ['v[0-9]*', '[0-9]+.[0-9]+*'] # Match tags that resemble a version
pull_request: # Run in every PR
workflow_dispatch: # Allow manually triggering the workflow
schedule:
# Run roughly every 15 days at 00:00 UTC
# (useful to check if updates on dependencies break the package)
- cron: '0 0 1,16 * *'

concurrency:
group: >-
Expand All @@ -30,7 +26,7 @@ jobs:
- uses: actions/checkout@v3
with: {fetch-depth: 0} # deep clone for setuptools-scm
- uses: actions/setup-python@v4
with: {python-version: "3.9"}
with: {python-version: "3.10"}
- name: Build package distribution files
run: pipx run --spec tox==3.27.1 tox -e clean,build
- name: Record the path of wheel distribution
Expand All @@ -50,9 +46,7 @@ jobs:
strategy:
matrix:
python:
#- "3.7" # oldest Python supported by PSF
- "3.9"
#- "3.10"
- "3.10"
platform:
- ubuntu-latest
- macos-latest
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/run_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: ['3.9']
python-version: ['3.9', '3.10']

steps:
- uses: actions/checkout@v2
Expand Down
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,14 @@ tox -e publish # Publish the package you have been developing to a package ind
tox -av # List all available tasks
```

To create a pinned `requirements.txt` set of dependencies, [pip-tools](https://github.com/jazzband/pip-tools) is used:

```commandline
pip-compile --extra transfer --resolver=backtracking`
```



<!-- pyscaffold-notes -->
## Notes

Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,4 +298,4 @@
"pyscaffold": ("https://pyscaffold.org/en/stable", None),
}

print(f"loading configurations for {project} {version} ...", file=sys.stderr)
print(f"loading configurations for {project} {version} ...", file=sys.stderr)
175 changes: 175 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
#
# This file is autogenerated by pip-compile with Python 3.10
# by the following command:
#
# pip-compile --extra=transfer
#
aiobotocore[boto3]==2.5.0
# via
# s3fs
# vptstools (setup.py)
aiohttp==3.8.4
# via
# aiobotocore
# s3fs
aioitertools==0.11.0
# via aiobotocore
aiosignal==1.3.1
# via aiohttp
async-timeout==4.0.2
# via aiohttp
attrs==23.1.0
# via
# aiohttp
# frictionless
# jsonschema
bcrypt==4.0.1
# via paramiko
boto3==1.26.76
# via aiobotocore
botocore==1.29.76
# via
# aiobotocore
# boto3
# s3transfer
certifi==2023.5.7
# via requests
cffi==1.15.1
# via
# cryptography
# pynacl
chardet==5.1.0
# via frictionless
charset-normalizer==3.1.0
# via
# aiohttp
# requests
click==8.1.3
# via
# typer
# vptstools (setup.py)
colorama==0.4.6
# via typer
cryptography==40.0.2
# via paramiko
decorator==5.1.1
# via validators
frictionless==5.13.1
# via vptstools (setup.py)
frozenlist==1.3.3
# via
# aiohttp
# aiosignal
fsspec==2023.5.0
# via
# s3fs
# vptstools (setup.py)
h5py==3.8.0
# via vptstools (setup.py)
humanize==4.6.0
# via frictionless
idna==3.4
# via
# requests
# yarl
isodate==0.6.1
# via frictionless
jinja2==3.1.2
# via frictionless
jmespath==1.0.1
# via
# boto3
# botocore
jsonschema==4.17.3
# via frictionless
markdown-it-py==2.2.0
# via rich
marko==1.3.0
# via frictionless
markupsafe==2.1.2
# via jinja2
mdurl==0.1.2
# via markdown-it-py
multidict==6.0.4
# via
# aiohttp
# yarl
numpy==1.24.3
# via
# h5py
# pandas
# pyarrow
pandas==2.0.1
# via vptstools (setup.py)
paramiko==3.1.0
# via vptstools (setup.py)
petl==1.7.12
# via frictionless
pyarrow==12.0.0
# via vptstools (setup.py)
pycparser==2.21
# via cffi
pydantic==1.10.7
# via frictionless
pygments==2.15.1
# via rich
pynacl==1.5.0
# via paramiko
pyrsistent==0.19.3
# via jsonschema
python-dateutil==2.8.2
# via
# botocore
# frictionless
# pandas
python-slugify==8.0.1
# via frictionless
pytz==2023.3
# via
# pandas
# vptstools (setup.py)
pyyaml==6.0
# via frictionless
requests==2.30.0
# via frictionless
rfc3986==2.0.0
# via frictionless
rich==13.3.5
# via typer
s3fs[boto3]==2023.5.0
# via vptstools (setup.py)
s3transfer==0.6.1
# via boto3
shellingham==1.5.0.post1
# via typer
simpleeval==0.9.13
# via frictionless
six==1.16.0
# via
# isodate
# python-dateutil
stringcase==1.2.0
# via frictionless
tabulate==0.9.0
# via frictionless
text-unidecode==1.3
# via python-slugify
typer[all]==0.9.0
# via frictionless
typing-extensions==4.5.0
# via
# frictionless
# pydantic
# typer
tzdata==2023.3
# via pandas
urllib3==1.26.15
# via
# botocore
# requests
validators==0.20.0
# via frictionless
wrapt==1.15.0
# via aiobotocore
yarl==1.9.2
# via aiohttp
3 changes: 1 addition & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,9 @@ exclude =
[options.extras_require]
# Requirements to work with the transfer functionalities (FTP/S3)
transfer =
boto3
s3fs[boto3]
paramiko
fsspec
s3fs
pyarrow

# Develop requirements (semicolon/line-separated)
Expand Down
23 changes: 16 additions & 7 deletions src/vptstools/bin/transfer_baltrad.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
# Simple Python script that:
# - Connects via SFTP to the BALTRAD server
# - For each vp file (pvol gets ignored), download the file from the server and upload it to the "aloft" S3 bucket
# - For each vp file (pvol gets ignored), download the file from the server and
# upload it to the "aloft" S3 bucket

# Designed to be executed daily via a simple cronjob (files disappear after a few days on the BALTRAD server)
# Use a simple config file named config.ini. Create one by copying config.template.ini and filling in the values.
# Designed to be executed daily via a simple cronjob (files disappear after a few
# days on the BALTRAD server)
# Use a simple config file named config.ini. Create one by copying config.template.ini
# and filling in the values.
# If file already exists at destination => do nothing
import os
import tempfile
Expand Down Expand Up @@ -37,9 +40,11 @@ def s3_key_exists(key: str, bucket: str, s3_client) -> bool:


def extract_metadata_from_filename(filename: str) -> tuple:
"""Extract the metadata from the filename (format such as 'fropo_vp_20220809T051000Z_0xb')
"""Extract the metadata from the filename (format
such as 'fropo_vp_20220809T051000Z_0xb')
All returned values are strings, month and days are 0-prefixed if they are single-digit.
All returned values are strings, month and days are 0-prefixed if
they are single-digit.
Parameters
----------
Expand Down Expand Up @@ -99,10 +104,14 @@ def cli():
radar_code, year, month_str, day_str = extract_metadata_from_filename(
entry.filename
)
destination_key = f"baltrad/hdf5/{radar_code}/{year}/{month_str}/{day_str}/{entry.filename}"
destination_key = (
f"baltrad/hdf5/{radar_code}/{year}/"
f"{month_str}/{day_str}/{entry.filename}"
)
if not s3_key_exists(destination_key, destination_bucket, s3_client):
click.echo(
f"{destination_key} does not exist at {destination_bucket}, transfer it...",
f"{destination_key} does not exist at {destination_bucket}, "
f"transfer it...",
end="",
)
with tempfile.TemporaryDirectory() as tmpdirname:
Expand Down
27 changes: 21 additions & 6 deletions src/vptstools/bin/vph5_to_vpts.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
default=2,
type=int,
help="Range of h5 vp files to include, i.e. files modified between now and N"
"modified-days-ago. If 0, all h5 files in the bucket will be included.",
"modified-days-ago. If 0, all h5 files in the bucket will be included.",
)
@click.option(
"--aws-profile",
Expand All @@ -40,21 +40,26 @@ def cli(modified_days_ago, aws_profile):
"""
if aws_profile:
storage_options = {"profile": aws_profile}
boto3_options = {"profile_name": aws_profile}
else:
storage_options = dict()
boto3_options = dict()

# Load the S3 manifest of today
click.echo(f"Load the S3 manifest of {date.today()}.")

manifest_parent_key = (
pd.Timestamp.now(tz="utc").date() - pd.Timedelta("1day")
).strftime(f"%Y-%m-%dT{MANIFEST_HOUR_OF_DAY}Z")
s3_url = f"{MANIFEST_URL}/{manifest_parent_key}/manifest.json" # define manifest of today
# define manifest of today
s3_url = f"{MANIFEST_URL}/{manifest_parent_key}/manifest.json"

click.echo(f"Extract coverage and days to recreate from manifest {s3_url}.")
if modified_days_ago == 0:
modified_days_ago = (pd.Timestamp.now(tz="utc") - S3_BUCKET_CREATION).days + 1
click.echo(
f"Recreate the full set of bucket files (files modified since {modified_days_ago}days). "
f"Recreate the full set of bucket files (files "
f"modified since {modified_days_ago}days). "
f"This will take a while!"
)

Expand All @@ -73,10 +78,14 @@ def cli(modified_days_ago, aws_profile):

# Run vpts daily conversion for each radar-day with modified files
inbo_s3 = s3fs.S3FileSystem(**storage_options)
# PATCH TO OVERCOME RECURSIVE s3fs in wrapped context
import boto3

session = boto3.Session(**boto3_options)
s3_client = session.client("s3")

click.echo(f"Create {days_to_create_vpts.shape[0]} daily vpts files.")
for j, daily_vpts in enumerate(days_to_create_vpts["directory"]):

# Enlist files of the day to rerun (all the given day)
source, _, radar_code, year, month, day = daily_vpts
odim_path = OdimFilePath(source, radar_code, "vp", year, month, day)
Expand All @@ -88,9 +97,15 @@ def cli(modified_days_ago, aws_profile):
# - download the files of the day
h5_file_local_paths = []
for i, file_key in enumerate(odim5_files):
h5_path = OdimFilePath.from_inventory(file_key)
h5_path = OdimFilePath.from_s3fs_enlisting(file_key)
h5_local_path = str(temp_folder_path / h5_path.file_name)
inbo_s3.download(file_key, h5_local_path)
# inbo_s3.get_file(file_key, h5_local_path)
# s3f3 failes in wrapped moto environment; fall back to boto3
s3_client.download_file(
S3_BUCKET,
f"{h5_path.s3_folder_path_h5}/{h5_path.file_name}",
h5_local_path,
)
h5_file_local_paths.append(h5_local_path)

# - run vpts on all locally downloaded files
Expand Down
Loading

0 comments on commit 7fa695e

Please sign in to comment.