Skip to content

Commit

Permalink
Resolved #1060; the start of the azure utilities. (#1068)
Browse files Browse the repository at this point in the history
* Resolves #1060
* Minor fixups while I'm here
  • Loading branch information
andrewbolster committed Feb 21, 2024
1 parent c24aa6e commit e262b27
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 21 deletions.
8 changes: 5 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,11 @@ Features

Data Sources
------------
* UK Companies House Listings
* NI House Price Index Data Wrangling
* Electoral Office for Northern Ireland (2016-2022 Assembly Election Results)
* `UK Companies House Listings: <http://download.companieshouse.gov.uk/>`_
* `NI House Price Index Data Wrangling: <https://www.nisra.gov.uk/statistics/housing-community-and-regeneration/northern-ireland-house-price-index>`_
* `Electoral Office for Northern Ireland (2016-2022 Assembly Election Results): <https://www.eoni.org.uk/>`_
* `NI Water Quality Data: <https://www.niwater.com/>`_


Credits
-------
Expand Down
25 changes: 25 additions & 0 deletions src/bolster/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,22 @@
"""
This module contains utility functions and classes that are used throughout the package.
"""
import datetime
import logging
import time
from functools import wraps

import tqdm

version_no = f"{(datetime.date.today() - datetime.date(1988, 5, 17)).total_seconds() / 31557600:.2f}"


class TqdmLoggingHandler(logging.Handler):
"""
Custom logging handler that uses tqdm to display log messages.
i.e. `logging.getLogger().addHandler(TqdmLoggingHandler())`
"""

def __init__(self, level=logging.NOTSET):
super().__init__(level)

Expand All @@ -17,3 +27,18 @@ def emit(self, record):
self.flush()
except Exception:
self.handleError(record)


def timed(func):
"""This decorator prints the execution time for the decorated function."""

@wraps(func)
def wrapper(*args, **kwargs):
start = time.time()
logging.info(f"Launching {func.__name__}")
result = func(*args, **kwargs)
end = time.time()
logging.info(f"{func.__name__} ran in {round(end - start, 2)}s")
return result

return wrapper
40 changes: 40 additions & 0 deletions src/bolster/utils/azure/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
"""
Azure Utils
"""
from urllib.parse import urlparse


def az_file_url_to_query_components(url: str):
"""
Helper function to parse an Azure file URL into its components to then be used by `pandas`/`dask`/`fsspec` etc.
>>> az_file_url_to_query_components("https://storageaccount.blob.core.windows.net/container/file_path.parquet")
{'storage_account': 'storageaccount', 'container': 'container', 'file_path': 'file_path.parquet'}
"""

p = urlparse(url)
assert not p.params, f"Invalid Params: {p.params}"
assert not p.fragment, f"Invalid Fragment: {p.fragment}"
assert not p.query, f"Invalid Params: {p.query}"

netlocs = p.netloc.split(".")
assert len(netlocs) == 5, f"Invalid netlocs: {p.netloc}: Not long enough"
assert netlocs[2:] == [
"core",
"windows",
"net",
], f"Invalid netlocs: {p.netloc} should end in core.windows.net"
assert netlocs[1] in [
"blob",
"dfs",
], f"Invalid netlocs: {p.netloc} should be one of blob/dfs"

storage_account = netlocs[0]
_, container, *paths = p.path.split(
"/"
) # path starts with a / so p.path.split('/')[0] == ''
file_path = "/".join(paths)

return dict(
storage_account=storage_account, container=container, file_path=file_path
)
18 changes: 0 additions & 18 deletions src/bolster/utils/deco.py
Original file line number Diff line number Diff line change
@@ -1,18 +0,0 @@
import logging
import time
from functools import wraps


def timed(func):
"""This decorator prints the execution time for the decorated function."""

@wraps(func)
def wrapper(*args, **kwargs):
start = time.time()
logging.info(f"Launching {func.__name__}")
result = func(*args, **kwargs)
end = time.time()
logging.info(f"{func.__name__} ran in {round(end - start, 2)}s")
return result

return wrapper

0 comments on commit e262b27

Please sign in to comment.