Skip to content

Commit

Permalink
Resolves #1060
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewbolster authored Feb 21, 2024
1 parent c24aa6e commit 8efa87a
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 21 deletions.
8 changes: 5 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,11 @@ Features

Data Sources
------------
* UK Companies House Listings
* NI House Price Index Data Wrangling
* Electoral Office for Northern Ireland (2016-2022 Assembly Election Results)
* `UK Companies House Listings: <http://download.companieshouse.gov.uk/>`_
* `NI House Price Index Data Wrangling: <https://www.nisra.gov.uk/statistics/housing-community-and-regeneration/northern-ireland-house-price-index>`_
* `Electoral Office for Northern Ireland (2016-2022 Assembly Election Results): <https://www.eoni.org.uk/>`_
* `NI Water Quality Data: <https://www.niwater.com/>`_


Credits
-------
Expand Down
25 changes: 25 additions & 0 deletions src/bolster/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,22 @@
"""
This module contains utility functions and classes that are used throughout the package.
"""
import datetime
import logging
import time
from functools import wraps

import tqdm

version_no = f"{(datetime.date.today() - datetime.date(1988, 5, 17)).total_seconds() / 31557600:.2f}"


class TqdmLoggingHandler(logging.Handler):
"""
Custom logging handler that uses tqdm to display log messages.
i.e. `logging.getLogger().addHandler(TqdmLoggingHandler())`
"""

def __init__(self, level=logging.NOTSET):
super().__init__(level)

Expand All @@ -17,3 +27,18 @@ def emit(self, record):
self.flush()
except Exception:
self.handleError(record)


def timed(func):
"""This decorator prints the execution time for the decorated function."""

@wraps(func)
def wrapper(*args, **kwargs):
start = time.time()
logging.info(f"Launching {func.__name__}")
result = func(*args, **kwargs)
end = time.time()
logging.info(f"{func.__name__} ran in {round(end - start, 2)}s")
return result

return wrapper
31 changes: 31 additions & 0 deletions src/bolster/utils/azure/__init__,py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""
Azure Utils
"""
def az_file_url_to_query_components(url:str):
"""
Helper function to parse an Azure file URL into its components to then be used by `pandas`/`dask`/`fsspec` etc.

>>> az_file_url_to_query_components("https://storageaccount.blob.core.windows.net/container/file_path.parquet")
{'storage_account': 'storageaccount', 'container': 'container', 'file_path': 'file_path.parquet'}
"""


p = urlparse(url)
assert not p.params, f"Invalid Params: {p.params}"
assert not p.fragment, f"Invalid Fragment: {p.fragment}"
assert not p.query, f"Invalid Params: {p.query}"

netlocs = p.netloc.split('.')
assert len(netlocs) == 5, f"Invalid netlocs: {p.netloc}: Not long enough"
assert netlocs[2:] == ['core','windows','net'], f"Invalid netlocs: {p.netloc} should end in core.windows.net"
assert netlocs[1] in ['blob','dfs'], f"Invalid netlocs: {p.netloc} should be one of blob/dfs"

storage_account = netlocs[0]
_, container, *paths = p.path.split('/') #path starts with a / so p.path.split('/')[0] == ''
file_path = '/'.join(paths)

return dict(
storage_account = storage_account,
container = container,
file_path = file_path
)
18 changes: 0 additions & 18 deletions src/bolster/utils/deco.py
Original file line number Diff line number Diff line change
@@ -1,18 +0,0 @@
import logging
import time
from functools import wraps


def timed(func):
"""This decorator prints the execution time for the decorated function."""

@wraps(func)
def wrapper(*args, **kwargs):
start = time.time()
logging.info(f"Launching {func.__name__}")
result = func(*args, **kwargs)
end = time.time()
logging.info(f"{func.__name__} ran in {round(end - start, 2)}s")
return result

return wrapper

0 comments on commit 8efa87a

Please sign in to comment.