diff --git a/CHANGELOG.md b/CHANGELOG.md index f0010af8..f8860aff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## 0.5.0 - 2022-11-07 + - Fixed dataframe columns for derived data schemas (dropped `channel_id`) + - Fixed `batch.submit_job` requests for `dbz` encoding + - Updated `quickstart.ipynb` jupyter notebook + ## 0.4.0 - 2022-09-14 - Upgraded `dbz-python` to `0.1.5` - Added `map_symbols` option for `.to_df()` (experimental) diff --git a/LICENSE b/LICENSE index 6596d4bd..91e18a62 100644 --- a/LICENSE +++ b/LICENSE @@ -172,4 +172,3 @@ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. - diff --git a/README.md b/README.md index 8fdd690e..340db423 100644 --- a/README.md +++ b/README.md @@ -45,8 +45,8 @@ To install the latest stable version of the package from PyPI: ## Usage The library needs to be configured with an API key from your account. [Sign up](https://databento.com/signup) for free and you will automatically -receive a set of API keys to start with. Each API key is a 28-character -string that can be found on the API Keys page of your [Databento user portal](https://databento.com/platform/keys). +receive a set of API keys to start with. Each API key is a 32-character +string starting with `db-`, that can be found on the API Keys page of your [Databento user portal](https://databento.com/platform/keys). A simple Databento application looks like this: @@ -75,7 +75,7 @@ array = data.to_ndarray() # to ndarray ``` Note that the API key was also passed as a parameter, which is -[not recommended for production applications](https://docs0.databento.com/knowledge-base/new-users/securing-your-api-keys?historical=python&live=python). +[not recommended for production applications](https://docs.databento.com/knowledge-base/kb-new-users/kb-new-security-managing-api-keys?historical=python&live=python). Instead, you can leave out this parameter to pass your API key via the `DATABENTO_API_KEY` environment variable: ```python diff --git a/databento/__init__.py b/databento/__init__.py index 8d66fb7c..d3b5138f 100644 --- a/databento/__init__.py +++ b/databento/__init__.py @@ -1,6 +1,22 @@ from typing import Optional from databento.common.bento import Bento, FileBento, MemoryBento +from databento.common.enums import ( + Compression, + Dataset, + Delivery, + Encoding, + FeedMode, + Flags, + HistoricalGateway, + LiveGateway, + Packaging, + RollRule, + Schema, + SplitDuration, + SType, + SymbologyResolution, +) from databento.historical.api import API_VERSION from databento.historical.client import Historical from databento.historical.error import ( @@ -19,9 +35,23 @@ "BentoError", "BentoHttpError", "BentoServerError", + "Compression", + "Dataset", + "Delivery", + "Encoding", + "FeedMode", "FileBento", + "Flags", "Historical", + "HistoricalGateway", + "LiveGateway", "MemoryBento", + "Packaging", + "RollRule", + "Schema", + "SplitDuration", + "SType", + "SymbologyResolution", ] # Set to either 'DEBUG' or 'INFO', controls console logging diff --git a/databento/common/bento.py b/databento/common/bento.py index f47a1c7b..f191dc48 100644 --- a/databento/common/bento.py +++ b/databento/common/bento.py @@ -1,22 +1,26 @@ import datetime as dt import io import os.path -from typing import Any, BinaryIO, Callable, Dict, List, Optional, Tuple +from typing import TYPE_CHECKING, Any, BinaryIO, Callable, Dict, List, Optional import numpy as np import pandas as pd import zstandard -from databento.common.data import DBZ_COLUMNS, DBZ_STRUCT_MAP, DERIV_SCHEMAS +from databento.common.data import COLUMNS, DERIV_SCHEMAS, STRUCT_MAP from databento.common.enums import Compression, Encoding, Schema, SType from databento.common.logging import log_debug from databento.common.metadata import MetadataDecoder from databento.common.symbology import ProductIdMappingInterval +if TYPE_CHECKING: + from databento.historical.client import Historical + + class Bento: """The abstract base class for all Bento I/O classes.""" - def __init__(self): + def __init__(self) -> None: self._metadata: Dict[str, Any] = {} self._dtype: Optional[np.dtype] = None self._product_id_index: Dict[dt.date, Dict[int, str]] = {} @@ -31,7 +35,7 @@ def __init__(self): self._limit: Optional[int] = None self._encoding: Optional[Encoding] = None self._compression: Optional[Compression] = None - self._shape: Optional[Tuple] = None + self._record_count: Optional[int] = None def _check_metadata(self) -> None: if not self._metadata: @@ -155,7 +159,7 @@ def dtype(self) -> np.dtype: """ if self._dtype is None: self._check_metadata() - self._dtype = np.dtype(DBZ_STRUCT_MAP[self.schema]) + self._dtype = np.dtype(STRUCT_MAP[self.schema]) return self._dtype @@ -336,24 +340,20 @@ def compression(self) -> Compression: return self._compression @property - def shape(self) -> Tuple: + def record_count(self) -> int: """ - Return the shape of the data. + Return the record count. Returns ------- - Tuple - The data shape. + int """ - if self._shape is None: + if self._record_count is None: self._check_metadata() - self._shape = ( - self._metadata["record_count"], - len(DBZ_STRUCT_MAP[self.schema]), - ) + self._record_count = self._metadata["record_count"] - return self._shape + return self._record_count @property def mappings(self) -> Dict[str, List[Dict[str, Any]]]: @@ -404,7 +404,7 @@ def to_ndarray(self) -> np.ndarray: """ data: bytes = self.reader(decompress=True).read() - return np.frombuffer(data, dtype=DBZ_STRUCT_MAP[self.schema]) + return np.frombuffer(data, dtype=STRUCT_MAP[self.schema]) def to_df( self, @@ -437,20 +437,12 @@ def to_df( df.set_index(self._get_index_column(), inplace=True) # Cleanup dataframe - if self.schema == Schema.MBO: - df.drop("channel_id", axis=1, inplace=True) - df = df.reindex(columns=DBZ_COLUMNS[self.schema]) + df.drop(["length", "rtype"], axis=1, inplace=True) + if self.schema == Schema.MBO or self.schema in DERIV_SCHEMAS: + df = df.reindex(columns=COLUMNS[self.schema]) df["flags"] = df["flags"] & 0xFF # Apply bitmask df["side"] = df["side"].str.decode("utf-8") df["action"] = df["action"].str.decode("utf-8") - elif self.schema in DERIV_SCHEMAS: - df.drop(["nwords", "type", "depth"], axis=1, inplace=True) - df = df.reindex(columns=DBZ_COLUMNS[self.schema]) - df["flags"] = df["flags"] & 0xFF # Apply bitmask - df["side"] = df["side"].str.decode("utf-8") - df["action"] = df["action"].str.decode("utf-8") - else: - df.drop(["nwords", "type"], axis=1, inplace=True) if pretty_ts: df.index = pd.to_datetime(df.index, utc=True) @@ -493,7 +485,7 @@ def replay(self, callback: Callable[[Any], None]) -> None: The callback to the data handler. """ - dtype = DBZ_STRUCT_MAP[self.schema] + dtype = STRUCT_MAP[self.schema] reader: BinaryIO = self.reader(decompress=True) while True: raw: bytes = reader.read(self.struct_size) @@ -590,7 +582,7 @@ def to_json(self, path: str) -> None: """ self.to_df().to_json(path, orient="records", lines=True) - def request_symbology(self, client) -> Dict[str, Any]: + def request_symbology(self, client: "Historical") -> Dict[str, Any]: """ Request symbology resolution based on the metadata properties. @@ -622,7 +614,7 @@ def request_symbology(self, client) -> Dict[str, Any]: def request_full_definitions( self, - client, + client: "Historical", path: Optional[str] = None, ) -> "Bento": """ diff --git a/databento/common/data.py b/databento/common/data.py index 2668e7f9..a6550df4 100644 --- a/databento/common/data.py +++ b/databento/common/data.py @@ -36,16 +36,16 @@ def get_deriv_ba_types(level: int) -> List[Tuple[str, Union[type, str]]]: ) -DBZ_COMMON_HEADER: List[Tuple[str, Union[type, str]]] = [ - ("nwords", np.uint8), - ("type", np.uint8), +RECORD_HEADER: List[Tuple[str, Union[type, str]]] = [ + ("length", np.uint8), + ("rtype", np.uint8), ("publisher_id", np.uint16), ("product_id", np.uint32), ("ts_event", np.uint64), ] -DBZ_MBP_MSG: List[Tuple[str, Union[type, str]]] = [ +MBP_MSG: List[Tuple[str, Union[type, str]]] = [ ("price", np.int64), ("size", np.uint32), ("action", "S1"), # 1 byte chararray @@ -58,7 +58,7 @@ def get_deriv_ba_types(level: int) -> List[Tuple[str, Union[type, str]]]: ] -DBZ_OHLCV_MSG: List[Tuple[str, Union[type, str]]] = [ +OHLCV_MSG: List[Tuple[str, Union[type, str]]] = [ ("open", np.int64), ("high", np.int64), ("low", np.int64), @@ -67,8 +67,8 @@ def get_deriv_ba_types(level: int) -> List[Tuple[str, Union[type, str]]]: ] -DBZ_STRUCT_MAP: Dict[Schema, List[Tuple[str, Union[type, str]]]] = { - Schema.MBO: DBZ_COMMON_HEADER +STRUCT_MAP: Dict[Schema, List[Tuple[str, Union[type, str]]]] = { + Schema.MBO: RECORD_HEADER + [ ("order_id", np.uint64), ("price", np.int64), @@ -81,9 +81,9 @@ def get_deriv_ba_types(level: int) -> List[Tuple[str, Union[type, str]]]: ("ts_in_delta", np.int32), ("sequence", np.uint32), ], - Schema.MBP_1: DBZ_COMMON_HEADER + DBZ_MBP_MSG + get_deriv_ba_types(0), # 1 - Schema.MBP_10: DBZ_COMMON_HEADER - + DBZ_MBP_MSG + Schema.MBP_1: RECORD_HEADER + MBP_MSG + get_deriv_ba_types(0), # 1 + Schema.MBP_10: RECORD_HEADER + + MBP_MSG + get_deriv_ba_types(0) # 1 + get_deriv_ba_types(1) # 2 + get_deriv_ba_types(2) # 3 @@ -94,13 +94,13 @@ def get_deriv_ba_types(level: int) -> List[Tuple[str, Union[type, str]]]: + get_deriv_ba_types(7) # 8 + get_deriv_ba_types(8) # 9 + get_deriv_ba_types(9), # 10 - Schema.TBBO: DBZ_COMMON_HEADER + DBZ_MBP_MSG + get_deriv_ba_types(0), - Schema.TRADES: DBZ_COMMON_HEADER + DBZ_MBP_MSG, - Schema.OHLCV_1S: DBZ_COMMON_HEADER + DBZ_OHLCV_MSG, - Schema.OHLCV_1M: DBZ_COMMON_HEADER + DBZ_OHLCV_MSG, - Schema.OHLCV_1H: DBZ_COMMON_HEADER + DBZ_OHLCV_MSG, - Schema.OHLCV_1D: DBZ_COMMON_HEADER + DBZ_OHLCV_MSG, - Schema.STATUS: DBZ_COMMON_HEADER + Schema.TBBO: RECORD_HEADER + MBP_MSG + get_deriv_ba_types(0), + Schema.TRADES: RECORD_HEADER + MBP_MSG, + Schema.OHLCV_1S: RECORD_HEADER + OHLCV_MSG, + Schema.OHLCV_1M: RECORD_HEADER + OHLCV_MSG, + Schema.OHLCV_1H: RECORD_HEADER + OHLCV_MSG, + Schema.OHLCV_1D: RECORD_HEADER + OHLCV_MSG, + Schema.STATUS: RECORD_HEADER + [ ("ts_recv", np.uint64), ("group", "S1"), # 1 byte chararray @@ -108,7 +108,7 @@ def get_deriv_ba_types(level: int) -> List[Tuple[str, Union[type, str]]]: ("halt_reason", np.uint8), ("trading_event", np.uint8), ], - Schema.DEFINITION: DBZ_COMMON_HEADER + Schema.DEFINITION: RECORD_HEADER + [ ("ts_recv", np.uint64), ("min_price_increment", np.int64), @@ -191,24 +191,26 @@ def get_deriv_ba_fields(level: int) -> List[str]: ] -DBZ_DERIV_HEADER_FIELDS = [ +DERIV_HEADER_FIELDS = [ "ts_event", "ts_in_delta", "publisher_id", "product_id", "action", "side", + "depth", "flags", "price", "size", "sequence", ] -DBZ_COLUMNS = { +COLUMNS = { Schema.MBO: [ "ts_event", "ts_in_delta", "publisher_id", + "channel_id", "product_id", "order_id", "action", @@ -218,8 +220,8 @@ def get_deriv_ba_fields(level: int) -> List[str]: "size", "sequence", ], - Schema.MBP_1: DBZ_DERIV_HEADER_FIELDS + get_deriv_ba_fields(0), - Schema.MBP_10: DBZ_DERIV_HEADER_FIELDS + Schema.MBP_1: DERIV_HEADER_FIELDS + get_deriv_ba_fields(0), + Schema.MBP_10: DERIV_HEADER_FIELDS + get_deriv_ba_fields(0) + get_deriv_ba_fields(1) + get_deriv_ba_fields(2) @@ -230,6 +232,6 @@ def get_deriv_ba_fields(level: int) -> List[str]: + get_deriv_ba_fields(7) + get_deriv_ba_fields(8) + get_deriv_ba_fields(9), - Schema.TBBO: DBZ_DERIV_HEADER_FIELDS + get_deriv_ba_fields(0), - Schema.TRADES: DBZ_DERIV_HEADER_FIELDS, + Schema.TBBO: DERIV_HEADER_FIELDS + get_deriv_ba_fields(0), + Schema.TRADES: DERIV_HEADER_FIELDS, } diff --git a/databento/common/enums.py b/databento/common/enums.py index 9fd3faf0..b04b9c43 100644 --- a/databento/common/enums.py +++ b/databento/common/enums.py @@ -5,7 +5,6 @@ class HistoricalGateway(Enum): """Represents a historical data center gateway location.""" - NEAREST = "nearest" BO1 = "bo1" @@ -14,7 +13,6 @@ class LiveGateway(Enum): """Represents a live data center gateway location.""" ORIGIN = "origin" - NEAREST = "nearest" NY4 = "ny4" DC3 = "dc3" @@ -72,8 +70,8 @@ class Compression(Enum): @unique -class Duration(Enum): - """Represents the duration interval for each batch data file.""" +class SplitDuration(Enum): + """Represents the duration before splitting for each batched data file.""" DAY = "day" WEEK = "week" @@ -92,7 +90,7 @@ class Packaging(Enum): @unique class Delivery(Enum): - """Represents the delivery mechanism for batch data.""" + """Represents the delivery mechanism for batched data.""" DOWNLOAD = "download" S3 = "s3" diff --git a/databento/common/logging.py b/databento/common/logging.py index 14a7da44..5a12664d 100644 --- a/databento/common/logging.py +++ b/databento/common/logging.py @@ -1,6 +1,7 @@ import logging import os import sys +from typing import Optional import databento @@ -14,7 +15,7 @@ _ERROR = "ERROR" -def _console_log_level(): +def _console_log_level() -> Optional[str]: if databento.log: databento.log = databento.log.upper() @@ -26,7 +27,7 @@ def _console_log_level(): return None -def log_debug(msg: str): +def log_debug(msg: str) -> None: """Log the given message with DEBUG level.""" log_level = _console_log_level() if log_level == [_DEBUG, _INFO, _ERROR]: @@ -34,7 +35,7 @@ def log_debug(msg: str): logger.debug(msg) -def log_info(msg: str): +def log_info(msg: str) -> None: """Log the given message with INFO level.""" log_level = _console_log_level() if log_level in [_INFO, _ERROR]: @@ -42,7 +43,7 @@ def log_info(msg: str): logger.info(msg) -def log_error(msg: str): +def log_error(msg: str) -> None: """Log the given message with ERROR level.""" log_level = _console_log_level() if log_level in [_ERROR]: diff --git a/databento/common/metadata.py b/databento/common/metadata.py index 7484d881..1cdf89f3 100644 --- a/databento/common/metadata.py +++ b/databento/common/metadata.py @@ -1,4 +1,4 @@ -from typing import Any, Dict +from typing import Any, Callable, Dict from databento.common.parsing import int_to_compression, int_to_schema, int_to_stype from dbz_python import decode_metadata @@ -25,7 +25,7 @@ def decode_to_json(raw_metadata: bytes) -> Dict[str, Any]: """ - def enum_value(fn): + def enum_value(fn: Callable[[Any], Any]) -> Any: return lambda x: fn(x).value metadata = decode_metadata(raw_metadata) diff --git a/databento/common/parsing.py b/databento/common/parsing.py index c19696fe..98d07a2d 100644 --- a/databento/common/parsing.py +++ b/databento/common/parsing.py @@ -141,9 +141,7 @@ def maybe_symbols_list_to_string( raise TypeError(f"invalid symbols type, was {type(symbols)}") -def maybe_date_to_string( - value: Optional[Union[date, str]], -) -> Optional[str]: +def maybe_date_to_string(value: Optional[Union[date, str]]) -> Optional[str]: """ Return a valid date string from the given value (if not None). @@ -163,8 +161,25 @@ def maybe_date_to_string( return str(pd.to_datetime(value).date()) +def datetime_to_string(value: Union[pd.Timestamp, date, str, int]) -> str: + """ + Return a valid datetime string from the given value. + + Parameters + ---------- + value : pd.Timestamp or date or str + The value to parse. + + Returns + ------- + str + + """ + return str(pd.to_datetime(value)).replace(" ", "T") + + def maybe_datetime_to_string( - value: Optional[Union[pd.Timestamp, date, str]], + value: Optional[Union[pd.Timestamp, date, str, int]], ) -> Optional[str]: """ Return a valid datetime string from the given value (if not None). @@ -182,7 +197,7 @@ def maybe_datetime_to_string( if value is None: return None - return str(pd.to_datetime(value)).replace(" ", "T") + return datetime_to_string(value) def parse_flags(value: int, apply_bitmask: bool = False) -> List[str]: diff --git a/databento/common/symbology.py b/databento/common/symbology.py index 953f3bb8..63d67106 100644 --- a/databento/common/symbology.py +++ b/databento/common/symbology.py @@ -1,6 +1,8 @@ import datetime as dt +from dataclasses import dataclass +@dataclass(frozen=True) class ProductIdMappingInterval: """ Represents a product ID to native symbol mapping over a start and end date @@ -18,23 +20,7 @@ class ProductIdMappingInterval: The product ID value. """ - def __init__( - self, - start_date: dt.date, - end_date: dt.date, - native: str, - product_id: int, - ): - self.start_date = start_date - self.end_date = end_date - self.native = native - self.product_id = product_id - - def __repr__(self): - return ( - f"{type(self).__name__}(" - f"start_date={self.start_date}, " - f"end_date={self.end_date}, " - f"native='{self.native}', " - f"product_id={self.product_id})" - ) + start_date: dt.date + end_date: dt.date + native: str + product_id: int diff --git a/databento/historical/api/batch.py b/databento/historical/api/batch.py index de2f84bf..484b68e1 100644 --- a/databento/historical/api/batch.py +++ b/databento/historical/api/batch.py @@ -6,10 +6,10 @@ Compression, Dataset, Delivery, - Duration, Encoding, Packaging, Schema, + SplitDuration, SType, ) from databento.common.parsing import ( @@ -26,20 +26,20 @@ class BatchHttpAPI(BentoHttpAPI): Provides request methods for the batch HTTP API endpoints. """ - def __init__(self, key, gateway): + def __init__(self, key: str, gateway: str) -> None: super().__init__(key=key, gateway=gateway) self._base_url = gateway + f"/v{API_VERSION}/batch" def submit_job( self, dataset: Union[Dataset, str], + start: Union[pd.Timestamp, date, str, int], + end: Union[pd.Timestamp, date, str, int], + symbols: Optional[Union[List[str], str]], schema: Union[Schema, str], - symbols: Optional[Union[List[str], str]] = None, - start: Optional[Union[pd.Timestamp, date, str, int]] = None, - end: Optional[Union[pd.Timestamp, date, str, int]] = None, encoding: Union[Encoding, str] = "dbz", compression: Optional[Union[Compression, str]] = None, - split_duration: Union[Duration, str] = "day", + split_duration: Union[SplitDuration, str] = "day", split_size: Optional[int] = None, packaging: Union[Packaging, str] = "none", delivery: Union[Delivery, str] = "download", @@ -48,7 +48,7 @@ def submit_job( limit: Optional[int] = None, ) -> Dict[str, Any]: """ - Request a new time series data batch job from Databento. + Request a new time series data batch download from Databento. Makes a `POST /batch.submit_job` HTTP request. @@ -56,26 +56,26 @@ def submit_job( ---------- dataset : Dataset or str The dataset code (string identifier) for the request. - symbols : List[Union[str, int]] or str, optional - The product symbols to filter for. Takes up to 2,000 symbols per request. - If more than 1 symbol is specified, the data is merged and sorted by time. - If `*` or ``None`` then will be for **all** symbols. - schema : Schema or str {'mbo', 'mbp-1', 'mbp-10', 'trades', 'tbbo', 'ohlcv-1s', 'ohlcv-1m', 'ohlcv-1h', 'ohlcv-1d', 'definition', 'statistics', 'status'}, default 'trades' # noqa - The data record schema for the request. - start : pd.Timestamp or date or str or int, optional + start : pd.Timestamp or date or str or int The start datetime of the request time range (inclusive). Assumes UTC as timezone unless passed a tz-aware object. If an integer is passed, then this represents nanoseconds since UNIX epoch. - end : pd.Timestamp or date or str or int, optional + end : pd.Timestamp or date or str or int The end datetime of the request time range (exclusive). Assumes UTC as timezone unless passed a tz-aware object. If an integer is passed, then this represents nanoseconds since UNIX epoch. + symbols : List[Union[str, int]] or str + The product symbols to filter for. Takes up to 2,000 symbols per request. + If more than 1 symbol is specified, the data is merged and sorted by time. + If `*` or ``None`` then will be for **all** symbols. + schema : Schema or str {'mbo', 'mbp-1', 'mbp-10', 'trades', 'tbbo', 'ohlcv-1s', 'ohlcv-1m', 'ohlcv-1h', 'ohlcv-1d', 'definition', 'statistics', 'status'}, default 'trades' # noqa + The data record schema for the request. encoding : Encoding or str {'dbz', 'csv', 'json'}, default 'dbz' The data encoding. compression : Compression or str {'none', 'zstd'}, optional The data compression format (if any). If encoding is 'dbz' then specifying a `compression` is invalid (already zstd compressed). - split_duration : Duration or str {'day', 'week', 'month', 'none'}, default 'day' + split_duration : SplitDuration or str {'day', 'week', 'month', 'none'}, default 'day' The maximum time duration before batched data is split into multiple files. A week starts on Sunday UTC. split_size : int, optional @@ -94,7 +94,7 @@ def submit_job( Returns ------- Dict[str, Any] - The job info for submitted batch data request. + The job info for batch download request. Warnings -------- @@ -107,39 +107,34 @@ def submit_job( validate_enum(schema, Schema, "schema") validate_enum(encoding, Encoding, "encoding") validate_enum(compression, Compression, "compression") - validate_enum(split_duration, Duration, "duration") + validate_enum(split_duration, SplitDuration, "duration") validate_enum(packaging, Packaging, "packaging") validate_enum(delivery, Delivery, "delivery") validate_enum(stype_in, SType, "stype_in") validate_enum(stype_out, SType, "stype_out") - schema = Schema(schema) - encoding = Encoding(encoding) - compression = Compression(compression) - split_duration = Duration(split_duration) - packaging = Packaging(packaging) - delivery = Delivery(delivery) - stype_in = SType(stype_in) - stype_out = SType(stype_out) - - params: List[Tuple[str, str]] = BentoHttpAPI._timeseries_params( + params: List[Tuple[str, Optional[str]]] = BentoHttpAPI._timeseries_params( dataset=dataset, - symbols=symbols, - schema=schema, start=start, end=end, + symbols=symbols, + schema=Schema(schema), limit=limit, - stype_in=stype_in, - stype_out=stype_out, + stype_in=SType(stype_in), + stype_out=SType(stype_out), ) - params.append(("encoding", encoding.value)) - params.append(("compression", compression.value)) - params.append(("split_duration", split_duration.value)) - params.append(("packaging", packaging.value)) - params.append(("delivery", delivery.value)) + params.append(("encoding", Encoding(encoding).value)) + if ( + Encoding(encoding) != Encoding.DBZ + or Compression(compression) != Compression.NONE + ): + params.append(("compression", Compression(compression).value)) + params.append(("split_duration", SplitDuration(split_duration).value)) if split_size is not None: params.append(("split_size", str(split_size))) + params.append(("packaging", Packaging(packaging).value)) + params.append(("delivery", Delivery(delivery).value)) return self._post( url=self._base_url + ".submit_job", @@ -172,12 +167,9 @@ def list_jobs( The batch job details. """ - states = maybe_values_list_to_string(states) - since = maybe_datetime_to_string(since) - - params: List[Tuple[str, str]] = [ - ("states", states), - ("since", since), + params: List[Tuple[str, Optional[str]]] = [ + ("states", maybe_values_list_to_string(states)), + ("since", maybe_datetime_to_string(since)), ] return self._get( diff --git a/databento/historical/api/metadata.py b/databento/historical/api/metadata.py index 5c447475..6ffc9323 100644 --- a/databento/historical/api/metadata.py +++ b/databento/historical/api/metadata.py @@ -21,7 +21,7 @@ class MetadataHttpAPI(BentoHttpAPI): Provides request methods for the metadata HTTP API endpoints. """ - def __init__(self, key, gateway): + def __init__(self, key: str, gateway: str) -> None: super().__init__(key=key, gateway=gateway) self._base_url = gateway + f"/v{API_VERSION}/metadata" @@ -69,12 +69,9 @@ def list_datasets( List[str] """ - start_date = maybe_date_to_string(start_date) - end_date = maybe_date_to_string(end_date) - - params: List[Tuple[str, str]] = [ - ("start_date", start_date), - ("end_date", end_date), + params: List[Tuple[str, Optional[str]]] = [ + ("start_date", maybe_date_to_string(start_date)), + ("end_date", maybe_date_to_string(end_date)), ] response: Response = self._get( @@ -86,7 +83,7 @@ def list_datasets( def list_schemas( self, - dataset: Union[Dataset, str], + dataset: Optional[Union[Dataset, str]] = None, start_date: Optional[Union[date, str]] = None, end_date: Optional[Union[date, str]] = None, ) -> List[str]: @@ -97,7 +94,7 @@ def list_schemas( Parameters ---------- - dataset : Dataset or str + dataset : Dataset or str, optional The dataset code (string identifier) for the request. start_date : date or str, optional The start date (UTC) for the request range. @@ -109,14 +106,10 @@ def list_schemas( List[str] """ - dataset = enum_or_str_lowercase(dataset, "dataset") - start_date = maybe_date_to_string(start_date) - end_date = maybe_date_to_string(end_date) - - params: List[Tuple[str, str]] = [ - ("dataset", dataset), - ("start_date", start_date), - ("end_date", end_date), + params: List[Tuple[str, Optional[str]]] = [ + ("dataset", enum_or_str_lowercase(dataset, "dataset")), + ("start_date", maybe_date_to_string(start_date)), + ("end_date", maybe_date_to_string(end_date)), ] response: Response = self._get( @@ -131,7 +124,7 @@ def list_fields( dataset: Union[Dataset, str], schema: Optional[Union[Schema, str]] = None, encoding: Optional[Union[Encoding, str]] = None, - ) -> Dict[str, Dict]: + ) -> Dict[str, Dict[str, str]]: """ Request all fields for a dataset, schema and encoding from Databento. @@ -151,21 +144,17 @@ def list_fields( Returns ------- - Dict[str, Any] + Dict[str, Dict[str, str]] A mapping of dataset to encoding to schema to field to data type. """ validate_maybe_enum(schema, Schema, "schema") validate_maybe_enum(encoding, Encoding, "encoding") - dataset = enum_or_str_lowercase(dataset, "dataset") - schema = maybe_enum_or_str_lowercase(schema, "schema") - encoding = maybe_enum_or_str_lowercase(encoding, "encoding") - params: List[Tuple[str, str]] = [ - ("dataset", dataset), - ("schema", schema), - ("encoding", encoding), + ("dataset", enum_or_str_lowercase(dataset, "dataset")), + ("schema", maybe_enum_or_str_lowercase(schema, "schema")), + ("encoding", maybe_enum_or_str_lowercase(encoding, "encoding")), ] response: Response = self._get( @@ -239,14 +228,10 @@ def list_unit_prices( validate_maybe_enum(schema, Schema, "schema") validate_maybe_enum(mode, FeedMode, "mode") - dataset = enum_or_str_lowercase(dataset, "dataset") - mode = maybe_enum_or_str_lowercase(mode, "mode") - schema = maybe_enum_or_str_lowercase(schema, "schema") - - params: List[Tuple[str, str]] = [ - ("dataset", dataset), - ("mode", mode), - ("schema", schema), + params: List[Tuple[str, Optional[str]]] = [ + ("dataset", enum_or_str_lowercase(dataset, "dataset")), + ("mode", maybe_enum_or_str_lowercase(mode, "mode")), + ("schema", maybe_enum_or_str_lowercase(schema, "schema")), ] response: Response = self._get( @@ -256,37 +241,37 @@ def list_unit_prices( ) return response.json() - def get_shape( + def get_record_count( self, dataset: Union[Dataset, str], + start: Union[pd.Timestamp, date, str, int], + end: Union[pd.Timestamp, date, str, int], symbols: Optional[Union[List[str], str]] = None, schema: Union[Schema, str] = "trades", - start: Optional[Union[pd.Timestamp, date, str, int]] = None, - end: Optional[Union[pd.Timestamp, date, str, int]] = None, encoding: Union[Encoding, str] = "dbz", stype_in: Optional[Union[SType, str]] = "native", limit: Optional[int] = None, - ) -> Tuple: + ) -> int: """ - Request the shape of the time series data from Databento. + Request the count of data records from Databento. - Makes a GET `/metadata.get_shape` HTTP request. + Makes a GET `/metadata.get_record_count` HTTP request. Parameters ---------- dataset : Dataset or str The dataset code for the request. + start : pd.Timestamp or date or str or int + The start datetime for the request range. Assumes UTC as timezone unless otherwise specified. + If an integer is passed, then this represents nanoseconds since UNIX epoch. + end : pd.Timestamp or date or str or int + The end datetime for the request range. Assumes UTC as timezone unless otherwise specified. + If an integer is passed, then this represents nanoseconds since UNIX epoch. symbols : List[Union[str, int]] or str, optional The product symbols to filter for. Takes up to 2,000 symbols per request. If `*` or ``None`` then will be for **all** symbols. schema : Schema or str {'mbo', 'mbp-1', 'mbp-10', 'trades', 'tbbo', 'ohlcv-1s', 'ohlcv-1m', 'ohlcv-1h', 'ohlcv-1d', 'definition', 'statistics', 'status'}, default 'trades' # noqa The data record schema for the request. - start : pd.Timestamp or date or str or int, optional - The start datetime for the request range. Assumes UTC as timezone unless otherwise specified. - If an integer is passed, then this represents nanoseconds since UNIX epoch. - end : pd.Timestamp or date or str or int, optional - The end datetime for the request range. Assumes UTC as timezone unless otherwise specified. - If an integer is passed, then this represents nanoseconds since UNIX epoch. encoding : Encoding or str {'dbz', 'csv', 'json'}, optional The data encoding. stype_in : SType or str, default 'native' @@ -297,49 +282,40 @@ def get_shape( Returns ------- - Tuple - The shape of the data expressed as size per dimension. + int + The count of records. """ validate_enum(schema, Schema, "schema") validate_enum(stype_in, SType, "stype_in") - dataset = enum_or_str_lowercase(dataset, "dataset") - symbols = maybe_symbols_list_to_string(symbols) - schema = Schema(schema) - start = maybe_datetime_to_string(start) - end = maybe_datetime_to_string(end) - encoding = Encoding(encoding) - stype_in = SType(stype_in) - - params: List[Tuple[str, str]] = [ - ("dataset", dataset), - ("symbols", symbols), - ("schema", schema.value), - ("start", start), - ("end", end), - ("encoding", encoding.value), - ("stype_in", stype_in.value), + params: List[Tuple[str, Optional[str]]] = [ + ("dataset", enum_or_str_lowercase(dataset, "dataset")), + ("symbols", maybe_symbols_list_to_string(symbols)), + ("schema", Schema(schema).value), + ("start", maybe_datetime_to_string(start)), + ("end", maybe_datetime_to_string(end)), + ("encoding", Encoding(encoding).value), + ("stype_in", SType(stype_in).value), ] if limit is not None: params.append(("limit", str(limit))) response: Response = self._get( - url=self._base_url + ".get_shape", + url=self._base_url + ".get_record_count", params=params, basic_auth=True, ) - values = response.json() - return tuple(values) + return response.json() def get_billable_size( self, dataset: Union[Dataset, str], + start: Union[pd.Timestamp, date, str, int], + end: Union[pd.Timestamp, date, str, int], symbols: Optional[Union[List[str], str]] = None, schema: Union[Schema, str] = "trades", - start: Optional[Union[pd.Timestamp, date, str, int]] = None, - end: Optional[Union[pd.Timestamp, date, str, int]] = None, stype_in: Optional[Union[SType, str]] = "native", limit: Optional[int] = None, ) -> int: @@ -353,17 +329,17 @@ def get_billable_size( ---------- dataset : Dataset or str The dataset code for the request. - symbols : List[Union[str, int]] or str, optional - The product symbols to filter for. Takes up to 2,000 symbols per request. - If `*` or ``None`` then will be for **all** symbols. - schema : Schema or str {'mbo', 'mbp-1', 'mbp-10', 'trades', 'tbbo', 'ohlcv-1s', 'ohlcv-1m', 'ohlcv-1h', 'ohlcv-1d', 'definition', 'statistics', 'status'}, default 'trades' # noqa - The data record schema for the request. start : pd.Timestamp or date or str or int, optional The start datetime for the request range. Assumes UTC as timezone unless otherwise specified. If an integer is passed, then this represents nanoseconds since UNIX epoch. end : pd.Timestamp or date or str or int, optional The end datetime for the request range. Assumes UTC as timezone unless otherwise specified. If an integer is passed, then this represents nanoseconds since UNIX epoch. + symbols : List[Union[str, int]] or str, optional + The product symbols to filter for. Takes up to 2,000 symbols per request. + If `*` or ``None`` then will be for **all** symbols. + schema : Schema or str {'mbo', 'mbp-1', 'mbp-10', 'trades', 'tbbo', 'ohlcv-1s', 'ohlcv-1m', 'ohlcv-1h', 'ohlcv-1d', 'definition', 'statistics', 'status'}, default 'trades' # noqa + The data record schema for the request. stype_in : SType or str, default 'native' The input symbology type to resolve from. limit : int, optional @@ -378,16 +354,13 @@ def get_billable_size( validate_enum(schema, Schema, "schema") validate_enum(stype_in, SType, "stype_in") - schema = Schema(schema) - stype_in = SType(stype_in) - - params: List[Tuple[str, str]] = super()._timeseries_params( + params: List[Tuple[str, Optional[str]]] = super()._timeseries_params( dataset=dataset, - symbols=symbols, - schema=schema, start=start, end=end, - stype_in=stype_in, + symbols=symbols, + schema=Schema(schema), + stype_in=SType(stype_in), limit=limit, ) @@ -402,11 +375,11 @@ def get_billable_size( def get_cost( self, dataset: Union[Dataset, str], + start: Union[pd.Timestamp, date, str, int], + end: Union[pd.Timestamp, date, str, int], mode: Union[FeedMode, str] = "historical-streaming", symbols: Optional[Union[List[str], str]] = None, schema: Union[Schema, str] = "trades", - start: Optional[Union[pd.Timestamp, date, str, int]] = None, - end: Optional[Union[pd.Timestamp, date, str, int]] = None, stype_in: Optional[Union[SType, str]] = "native", limit: Optional[int] = None, ) -> float: @@ -420,6 +393,12 @@ def get_cost( ---------- dataset : Dataset or str The dataset code for the request. + start : pd.Timestamp or date or str or int + The start datetime for the request range. Assumes UTC as timezone unless otherwise specified. + If an integer is passed, then this represents nanoseconds since UNIX epoch. + end : pd.Timestamp or date or str or int + The end datetime for the request range. Assumes UTC as timezone unless otherwise specified. + If an integer is passed, then this represents nanoseconds since UNIX epoch. mode : FeedMode or str {'live', 'historical-streaming', 'historical'}, default 'historical-streaming' The data feed mode for the request. symbols : List[Union[str, int]] or str, optional @@ -427,12 +406,6 @@ def get_cost( If `*` or ``None`` then will be for **all** symbols. schema : Schema or str {'mbo', 'mbp-1', 'mbp-10', 'trades', 'tbbo', 'ohlcv-1s', 'ohlcv-1m', 'ohlcv-1h', 'ohlcv-1d', 'definition', 'statistics', 'status'}, default 'trades' # noqa The data record schema for the request. - start : pd.Timestamp or date or str or int, optional - The start datetime for the request range. Assumes UTC as timezone unless otherwise specified. - If an integer is passed, then this represents nanoseconds since UNIX epoch. - end : pd.Timestamp or date or str or int, optional - The end datetime for the request range. Assumes UTC as timezone unless otherwise specified. - If an integer is passed, then this represents nanoseconds since UNIX epoch. stype_in : SType or str, default 'native' The input symbology type to resolve from. limit : int, optional @@ -448,21 +421,17 @@ def get_cost( validate_enum(schema, Schema, "schema") validate_enum(stype_in, SType, "stype_in") - mode = FeedMode(mode) - schema = Schema(schema) - stype_in = SType(stype_in) - - params: List[Tuple[str, str]] = super()._timeseries_params( + params: List[Tuple[str, Optional[str]]] = super()._timeseries_params( dataset=dataset, - symbols=symbols, - schema=schema, start=start, end=end, - stype_in=stype_in, + symbols=symbols, + schema=Schema(schema), + stype_in=SType(stype_in), limit=limit, ) - params.append(("mode", mode.value)) + params.append(("mode", FeedMode(mode).value)) response: Response = self._get( url=self._base_url + ".get_cost", diff --git a/databento/historical/api/symbology.py b/databento/historical/api/symbology.py index 33352323..372d69cf 100644 --- a/databento/historical/api/symbology.py +++ b/databento/historical/api/symbology.py @@ -14,7 +14,7 @@ class SymbologyHttpAPI(BentoHttpAPI): Provides request methods for the symbology HTTP API endpoints. """ - def __init__(self, key, gateway): + def __init__(self, key: str, gateway: str) -> None: super().__init__(key=key, gateway=gateway) self._base_url = gateway + f"/v{API_VERSION}/symbology" @@ -57,20 +57,13 @@ def resolve( date range. """ - dataset = enum_or_str_lowercase(dataset, "dataset") - symbols = maybe_symbols_list_to_string(symbols) - stype_in = enum_or_str_lowercase(stype_in, "stype_in") - stype_out = enum_or_str_lowercase(stype_out, "stype_out") - start_date = str(pd.to_datetime(start_date).date()) - end_date = str(pd.to_datetime(end_date).date()) - - params: List[Tuple[str, str]] = [ - ("dataset", dataset), - ("symbols", symbols), - ("stype_in", stype_in), - ("stype_out", stype_out), - ("start_date", start_date), - ("end_date", end_date), + params: List[Tuple[str, Optional[str]]] = [ + ("dataset", enum_or_str_lowercase(dataset, "dataset")), + ("symbols", maybe_symbols_list_to_string(symbols)), + ("stype_in", enum_or_str_lowercase(stype_in, "stype_in")), + ("stype_out", enum_or_str_lowercase(stype_out, "stype_out")), + ("start_date", str(pd.to_datetime(start_date).date())), + ("end_date", str(pd.to_datetime(end_date).date())), ("default_value", default_value), ] diff --git a/databento/historical/api/timeseries.py b/databento/historical/api/timeseries.py index 70a5c71c..ace6fdf0 100644 --- a/databento/historical/api/timeseries.py +++ b/databento/historical/api/timeseries.py @@ -15,17 +15,17 @@ class TimeSeriesHttpAPI(BentoHttpAPI): Provides request methods for the time series HTTP API endpoints. """ - def __init__(self, key, gateway): + def __init__(self, key: str, gateway: str) -> None: super().__init__(key=key, gateway=gateway) self._base_url = gateway + f"/v{API_VERSION}/timeseries" def stream( self, dataset: Union[Dataset, str], + start: Union[pd.Timestamp, date, str, int], + end: Union[pd.Timestamp, date, str, int], symbols: Optional[Union[List[str], str]] = None, schema: Union[Schema, str] = "trades", - start: Optional[Union[pd.Timestamp, date, str, int]] = None, - end: Optional[Union[pd.Timestamp, date, str, int]] = None, stype_in: Union[SType, str] = "native", stype_out: Union[SType, str] = "product_id", limit: Optional[int] = None, @@ -36,22 +36,28 @@ def stream( Makes a `GET /timeseries.stream` HTTP request. + Primary method for getting historical intraday market data, daily data, + instrument definitions and market status data directly into your application. + + This method only returns after all of the data has been downloaded, + which can take a long time. For large requests, consider using a batch download. + Parameters ---------- dataset : Dataset or str The dataset code (string identifier) for the request. + start : pd.Timestamp or date or str or int + The start datetime (UTC) of the request time range (inclusive). + If an integer is passed, then this represents nanoseconds since UNIX epoch. + end : pd.Timestamp or date or str or int + The end datetime (UTC) of the request time range (exclusive). + If an integer is passed, then this represents nanoseconds since UNIX epoch. symbols : List[Union[str, int]] or str, optional The product symbols to filter for. Takes up to 2,000 symbols per request. If more than 1 symbol is specified, the data is merged and sorted by time. If `*` or ``None`` then will be for **all** symbols. schema : Schema or str {'mbo', 'mbp-1', 'mbp-10', 'trades', 'tbbo', 'ohlcv-1s', 'ohlcv-1m', 'ohlcv-1h', 'ohlcv-1d', 'definition', 'statistics', 'status'}, default 'trades' # noqa The data record schema for the request. - start : pd.Timestamp or date or str or int, optional - The start datetime (UTC) of the request time range (inclusive). - If an integer is passed, then this represents nanoseconds since UNIX epoch. - end : pd.Timestamp or date or str or int, optional - The end datetime (UTC) of the request time range (exclusive). - If an integer is passed, then this represents nanoseconds since UNIX epoch. stype_in : SType or str, default 'native' The input symbology type to resolve from. stype_out : SType or str, default 'product_id' @@ -79,18 +85,14 @@ def stream( validate_enum(stype_in, SType, "stype_in") validate_enum(stype_out, SType, "stype_out") - schema = Schema(schema) - stype_in = SType(stype_in) - stype_out = SType(stype_out) - - params: List[Tuple[str, str]] = BentoHttpAPI._timeseries_params( + params: List[Tuple[str, Optional[str]]] = BentoHttpAPI._timeseries_params( dataset=dataset, - symbols=symbols, - schema=schema, start=start, end=end, - stype_in=stype_in, - stype_out=stype_out, + symbols=symbols, + schema=Schema(schema), + stype_in=SType(stype_in), + stype_out=SType(stype_out), limit=limit, ) @@ -98,7 +100,7 @@ def stream( self._pre_check_data_size( symbols=symbols, - schema=schema, + schema=Schema(schema), start=start, end=end, limit=limit, @@ -118,10 +120,10 @@ def stream( async def stream_async( self, dataset: Union[Dataset, str], + start: Union[pd.Timestamp, date, str, int], + end: Union[pd.Timestamp, date, str, int], symbols: Optional[Union[List[str], str]] = None, schema: Union[Schema, str] = "trades", - start: Optional[Union[pd.Timestamp, date, str, int]] = None, - end: Optional[Union[pd.Timestamp, date, str, int]] = None, stype_in: Union[SType, str] = "native", stype_out: Union[SType, str] = "product_id", limit: Optional[int] = None, @@ -132,22 +134,28 @@ async def stream_async( Makes a `GET /timeseries.stream` HTTP request. + Primary method for getting historical intraday market data, daily data, + instrument definitions and market status data directly into your application. + + This coroutine will complete once all of the data has been downloaded, + which can take a long time. For large requests, consider using a batch download. + Parameters ---------- dataset : Dataset or str The dataset code (string identifier) for the request. + start : pd.Timestamp or date or str or int + The start datetime (UTC) of the request time range (inclusive). + If an integer is passed, then this represents nanoseconds since UNIX epoch. + end : pd.Timestamp or date or str or int + The end datetime (UTC) of the request time range (exclusive). + If an integer is passed, then this represents nanoseconds since UNIX epoch. symbols : List[Union[str, int]] or str, optional The product symbols to filter for. Takes up to 2,000 symbols per request. If more than 1 symbol is specified, the data is merged and sorted by time. If `*` or ``None`` then will be for **all** symbols. schema : Schema or str {'mbo', 'mbp-1', 'mbp-10', 'trades', 'tbbo', 'ohlcv-1s', 'ohlcv-1m', 'ohlcv-1h', 'ohlcv-1d', 'definition', 'statistics', 'status'}, default 'trades' # noqa The data record schema for the request. - start : pd.Timestamp or date or str or int, optional - The start datetime (UTC) of the request time range (inclusive). - If an integer is passed, then this represents nanoseconds since UNIX epoch. - end : pd.Timestamp or date or str or int, optional - The end datetime (UTC) of the request time range (exclusive). - If an integer is passed, then this represents nanoseconds since UNIX epoch. stype_in : SType or str, default 'native' The input symbology type to resolve from. stype_out : SType or str, default 'product_id' @@ -175,18 +183,14 @@ async def stream_async( validate_enum(stype_in, SType, "stype_in") validate_enum(stype_out, SType, "stype_out") - schema = Schema(schema) - stype_in = SType(stype_in) - stype_out = SType(stype_out) - - params: List[Tuple[str, str]] = BentoHttpAPI._timeseries_params( + params: List[Tuple[str, Optional[str]]] = BentoHttpAPI._timeseries_params( dataset=dataset, - symbols=symbols, - schema=schema, start=start, end=end, - stype_in=stype_in, - stype_out=stype_out, + symbols=symbols, + schema=Schema(schema), + stype_in=SType(stype_in), + stype_out=SType(stype_out), limit=limit, ) @@ -194,7 +198,7 @@ async def stream_async( self._pre_check_data_size( symbols=symbols, - schema=schema, + schema=Schema(schema), start=start, end=end, limit=limit, @@ -211,14 +215,14 @@ async def stream_async( return bento - def _pre_check_data_size( + def _pre_check_data_size( # noqa (prefer not to make static) self, symbols: Optional[Union[List[str], str]], schema: Schema, start: Optional[Union[pd.Timestamp, date, str, int]], end: Optional[Union[pd.Timestamp, date, str, int]], limit: Optional[int], - ): + ) -> None: if limit and limit < 10**7: return @@ -232,7 +236,7 @@ def _pre_check_data_size( "\nThe size of the current streaming request is estimated " "to be 5 GB or greater. We recommend smaller " "individual streaming request sizes, or alternatively " - "submit a batch data request." + "submit a batch download request." "\nYou can check the uncompressed binary size of a request " "through the metadata API (from the client library, or over " "HTTP).\nThis warning can be suppressed " @@ -240,7 +244,7 @@ def _pre_check_data_size( ) -def _is_large_number_of_symbols(symbols: Optional[Union[List[str], str]]): +def _is_large_number_of_symbols(symbols: Optional[Union[List[str], str]]) -> bool: if not symbols: return True # All symbols @@ -253,11 +257,14 @@ def _is_large_number_of_symbols(symbols: Optional[Union[List[str], str]]): return False -def _is_large_data_size_schema(schema: Schema): +def _is_large_data_size_schema(schema: Schema) -> bool: return schema in (Schema.MBO, Schema.MBP_10) -def _is_greater_than_one_day(start, end): +def _is_greater_than_one_day( + start: Optional[Union[pd.Timestamp, date, str, int]], + end: Optional[Union[pd.Timestamp, date, str, int]], +) -> bool: if start is None or end is None: return True diff --git a/databento/historical/client.py b/databento/historical/client.py index a2e326e9..6504bbab 100644 --- a/databento/historical/client.py +++ b/databento/historical/client.py @@ -20,7 +20,7 @@ class Historical: key : str, optional The user API key for authentication. If ``None`` then the `DATABENTO_API_KEY` environment variable is used. - gateway : HistoricalGateway or str, default HistoricalGateway.NEAREST + gateway : HistoricalGateway or str, default HistoricalGateway.BO1 The API server gateway. If ``None`` then the default gateway is used. @@ -33,7 +33,7 @@ class Historical: def __init__( self, key: Optional[str] = None, - gateway: Union[HistoricalGateway, str] = HistoricalGateway.NEAREST, + gateway: Union[HistoricalGateway, str] = HistoricalGateway.BO1, ): if key is None: key = os.environ.get("DATABENTO_API_KEY") @@ -42,7 +42,7 @@ def __init__( # Configure data access gateway gateway = enum_or_str_lowercase(gateway, "gateway") - if gateway in ("nearest", "bo1"): + if gateway == "bo1": gateway = "https://hist.databento.com" self._key = key diff --git a/databento/historical/error.py b/databento/historical/error.py index 2f6b5dae..f6348dd7 100644 --- a/databento/historical/error.py +++ b/databento/historical/error.py @@ -1,3 +1,6 @@ +from typing import Any, Dict, Optional, Union + + class BentoError(Exception): """ Represents a Databento specific error. @@ -13,18 +16,18 @@ class BentoHttpError(BentoError): def __init__( self, - http_status=None, - http_body=None, - json_body=None, - message=None, - headers=None, - ): + http_status: int, + http_body: Optional[Union[bytes, str]] = None, + json_body: Optional[Dict[str, Any]] = None, + message: Optional[str] = None, + headers: Optional[Any] = None, + ) -> None: super(BentoHttpError, self).__init__(message) - if http_body and hasattr(http_body, "decode"): + if http_body and isinstance(http_body, bytes): try: http_body = http_body.decode("utf-8") - except BaseException: + except UnicodeDecodeError: http_body = ( "" @@ -37,7 +40,7 @@ def __init__( self.headers = headers or {} self.request_id = self.headers.get("request-id", None) - def __str__(self): + def __str__(self) -> str: msg = self.message or "" msg = f"{self.http_status} {msg}" if self.request_id is not None: @@ -45,7 +48,7 @@ def __str__(self): else: return msg - def __repr__(self): + def __repr__(self) -> str: return ( f"{type(self).__name__}(" f"request_id={self.request_id}, " @@ -61,15 +64,15 @@ class BentoServerError(BentoHttpError): def __init__( self, - http_status=None, - http_body=None, - json_body=None, - message=None, - headers=None, - ): + http_status: int, + http_body: Optional[Union[bytes, str]] = None, + json_body: Optional[Dict[str, Any]] = None, + message: Optional[str] = None, + headers: Optional[Any] = None, + ) -> None: super().__init__( - http_body=http_body, http_status=http_status, + http_body=http_body, json_body=json_body, message=message, headers=headers, @@ -83,15 +86,15 @@ class BentoClientError(BentoHttpError): def __init__( self, - http_status=None, - http_body=None, - json_body=None, - message=None, - headers=None, - ): + http_status: int, + http_body: Optional[Union[bytes, str]] = None, + json_body: Optional[Dict[str, Any]] = None, + message: Optional[str] = None, + headers: Optional[Any] = None, + ) -> None: super().__init__( - http_body=http_body, http_status=http_status, + http_body=http_body, json_body=json_body, message=message, headers=headers, diff --git a/databento/historical/http.py b/databento/historical/http.py index a539d5e4..15332d26 100644 --- a/databento/historical/http.py +++ b/databento/historical/http.py @@ -1,7 +1,7 @@ import sys from datetime import date from json.decoder import JSONDecodeError -from typing import Any, BinaryIO, List, Optional, Tuple, Union +from typing import Any, BinaryIO, List, Optional, Sequence, Tuple, Union import aiohttp import pandas as pd @@ -11,8 +11,8 @@ from databento.common.enums import Dataset, Schema, SType from databento.common.logging import log_info from databento.common.parsing import ( + datetime_to_string, enum_or_str_lowercase, - maybe_datetime_to_string, maybe_symbols_list_to_string, ) from databento.historical.error import BentoClientError, BentoServerError @@ -42,26 +42,20 @@ def __init__(self, key: str, gateway: str): def _timeseries_params( *, dataset: Union[Dataset, str], + start: Union[pd.Timestamp, date, str, int], + end: Union[pd.Timestamp, date, str, int], symbols: Optional[Union[List[str], str]] = None, schema: Schema, - start: Optional[Union[pd.Timestamp, date, str, int]] = None, - end: Optional[Union[pd.Timestamp, date, str, int]] = None, limit: Optional[int] = None, stype_in: SType, stype_out: SType = SType.PRODUCT_ID, - ) -> List[Tuple[str, str]]: - # Parse inputs - dataset = enum_or_str_lowercase(dataset, "dataset") - symbols = maybe_symbols_list_to_string(symbols) - start = maybe_datetime_to_string(start) - end = maybe_datetime_to_string(end) - + ) -> List[Tuple[str, Optional[str]]]: params: List[Tuple[str, Any]] = [ - ("dataset", dataset), - ("symbols", symbols), + ("dataset", enum_or_str_lowercase(dataset, "dataset")), + ("start", datetime_to_string(start)), + ("end", datetime_to_string(end)), + ("symbols", maybe_symbols_list_to_string(symbols) or "*"), ("schema", schema.value), - ("start", start), - ("end", end), ("stype_in", stype_in.value), ("stype_out", stype_out.value), ] @@ -78,7 +72,7 @@ def _create_bento(path: str) -> Union[MemoryBento, FileBento]: else: return FileBento(path=path) - def _check_api_key(self): + def _check_api_key(self) -> None: if self._key == "YOUR_API_KEY": raise ValueError( "The API key is currently set to 'YOUR_API_KEY'. " @@ -89,7 +83,7 @@ def _check_api_key(self): def _get( self, url: str, - params: Optional[List[Tuple[str, str]]] = None, + params: Optional[List[Tuple[str, Optional[str]]]] = None, basic_auth: bool = False, ) -> Response: self._check_api_key() @@ -109,7 +103,7 @@ def _get( async def _get_async( self, url: str, - params: Optional[List[Tuple[str, str]]] = None, + params: Optional[List[Tuple[str, Optional[str]]]] = None, basic_auth: bool = False, ) -> ClientResponse: self._check_api_key() @@ -129,7 +123,7 @@ async def _get_async( def _post( self, url: str, - params: Optional[List[Tuple[str, str]]] = None, + params: Optional[List[Tuple[str, Optional[str]]]] = None, basic_auth: bool = False, ) -> Response: self._check_api_key() @@ -149,7 +143,7 @@ def _post( def _stream( self, url: str, - params: List[Tuple[str, str]], + params: List[Tuple[str, Optional[str]]], basic_auth: bool, bento: Bento, ) -> None: @@ -185,7 +179,7 @@ def _stream( async def _stream_async( self, url: str, - params: List[Tuple[str, Optional[str]]], + params: Sequence[Tuple[str, Optional[str]]], basic_auth: bool, bento: Bento, ) -> None: diff --git a/databento/version.py b/databento/version.py index 6a9beea8..3d187266 100644 --- a/databento/version.py +++ b/databento/version.py @@ -1 +1 @@ -__version__ = "0.4.0" +__version__ = "0.5.0" diff --git a/examples/historical_metadata_get_shape.py b/examples/historical_metadata_get_record_count.py similarity index 79% rename from examples/historical_metadata_get_shape.py rename to examples/historical_metadata_get_record_count.py index e5066f5c..17b8b8b4 100644 --- a/examples/historical_metadata_get_shape.py +++ b/examples/historical_metadata_get_record_count.py @@ -1,5 +1,3 @@ -from typing import Tuple - import databento as db @@ -9,7 +7,7 @@ key = "YOUR_API_KEY" client = db.Historical(key=key) - shape: Tuple = client.metadata.get_shape( + count: int = client.metadata.get_record_count( dataset="GLBX.MDP3", symbols=["ESM2"], schema="mbo", @@ -18,4 +16,4 @@ encoding="csv", ) - print(shape) + print(count) diff --git a/examples/historical_timeseries_async.py b/examples/historical_timeseries_async.py index c1d8f3dd..5d060049 100644 --- a/examples/historical_timeseries_async.py +++ b/examples/historical_timeseries_async.py @@ -5,7 +5,7 @@ from databento import Bento -async def request_stream_async(): +async def request_stream_async() -> None: db.log = "debug" # optional debug logging key = "YOUR_API_KEY" diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 00000000..e4b64f58 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,11 @@ +[mypy] +python_version = 3.7 +disallow_untyped_defs = true +disallow_any_generics = true +disallow_subclassing_any = true +ignore_missing_imports = true +namespace_packages = true +no_strict_optional = true +warn_no_return = true +warn_unused_configs = true +warn_unused_ignores = true diff --git a/notebooks/quickstart.ipynb b/notebooks/quickstart.ipynb index c5dfe388..93fa7d75 100644 --- a/notebooks/quickstart.ipynb +++ b/notebooks/quickstart.ipynb @@ -29,16 +29,16 @@ "\n", "For information on our symbology, refer to https://docs.databento.com/reference-historical/basics/symbology. \n", "\n", - "For a more detailed API reference, refer to https://docs.databento.com/reference-historical.\n", + "For a more detailed API reference, refer to https://docs.databento.com/reference-historical.\n", "\n", "This tutorial covers the following:\n", "- Using the historical client to request metadata\n", "- Using the historical client to request time series market data\n", - "- Working with Bento data I/O helper objects\n", + "- Working with Bento data I/O helper objects\n", "- Using the historical client to make batch data requests\n", "- Querying batch job states\n", "\n", - "**Tip:** You can call help() on any class or method to see the 'docstring'." + "**Tip:** You can call help() on any class or method to see the 'docstring'." ] }, { @@ -79,14 +79,14 @@ "source": [ "## Historical data client\n", "\n", - "Once you've installed the Python client library, you can import it and initialize a historical client for requests. We'll use this `client` throughout the rest of the tutorial.\n", + "Once you've installed the Python client library, you can import it and initialize a historical client for requests. We'll use this `client` throughout the rest of the tutorial.\n", "\n", - "To initialize a client, you need to provide a valid API key. You can find these on the API Keys page of your Databento portal at https://databento.com." + "To initialize a client, you need to provide a valid API key. You can find these on the API Keys page of your Databento portal at https://databento.com." ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "9c5f6d59-ef17-4a94-8ba2-b012cec3010d", "metadata": { "pycharm": { @@ -116,7 +116,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "c5a0a08d-962a-42ce-8fb8-3bf23741dc7c", "metadata": { "pycharm": { @@ -130,7 +130,7 @@ "['GLBX.MDP3', 'XNAS.ITCH']" ] }, - "execution_count": 5, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -141,7 +141,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 5, "id": "18d41c7e-b82c-4236-b261-e49e089e3e23", "metadata": { "pycharm": { @@ -160,13 +160,10 @@ " 'ohlcv-1s',\n", " 'ohlcv-1m',\n", " 'ohlcv-1h',\n", - " 'ohlcv-1d',\n", - " 'definition',\n", - " 'statistics',\n", - " 'status']" + " 'ohlcv-1d']" ] }, - "execution_count": 8, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -177,7 +174,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 6, "id": "047e44d4-7081-4885-bb47-f09fb713c449", "metadata": { "pycharm": { @@ -201,7 +198,7 @@ " 'sequence': 'int'}}}}" ] }, - "execution_count": 11, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -212,7 +209,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 7, "id": "2ef363e9-7150-4ace-a2d8-dcc355d73f72", "metadata": { "pycharm": { @@ -226,7 +223,7 @@ "['dbz', 'csv', 'json']" ] }, - "execution_count": 12, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -237,7 +234,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 8, "id": "25a67ca8-10f0-41cb-a932-95a5e8dcd5f1", "metadata": { "pycharm": { @@ -251,7 +248,7 @@ "['none', 'zstd']" ] }, - "execution_count": 13, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -276,7 +273,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 9, "id": "3b0eec87-6b2e-4498-83c4-60efb7a140e0", "metadata": { "pycharm": { @@ -287,21 +284,21 @@ { "data": { "text/plain": [ - "{'historical-streaming': {'mbo': 21.05,\n", - " 'mbp-1': 82.05,\n", - " 'mbp-10': 31.95,\n", - " 'tbbo': 22.56,\n", - " 'trades': 67.76,\n", - " 'ohlcv-1s': 78.68,\n", - " 'ohlcv-1m': 63.32,\n", - " 'ohlcv-1h': 52.91,\n", - " 'ohlcv-1d': 41.5,\n", - " 'definition': 66.8,\n", - " 'statistics': 97.92,\n", - " 'status': 62.72}}" + "{'historical-streaming': {'mbo': 1.1,\n", + " 'mbp-1': 2.42,\n", + " 'mbp-10': 0.45,\n", + " 'tbbo': 17.89,\n", + " 'trades': 24.8,\n", + " 'ohlcv-1s': 50.5,\n", + " 'ohlcv-1m': 63.5,\n", + " 'ohlcv-1h': 130.0,\n", + " 'ohlcv-1d': 175.0,\n", + " 'definition': 1.66,\n", + " 'statistics': 0.71,\n", + " 'status': 1300.0}}" ] }, - "execution_count": 14, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -326,7 +323,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 47, "id": "367e6608-5dbe-4fcd-9104-b367e001e2bf", "metadata": { "pycharm": { @@ -340,8 +337,8 @@ " \"symbols\": \"ES.FUT\",\n", " \"stype_in\": \"smart\",\n", " \"schema\": \"mbo\",\n", - " \"start\": \"2020-12-27\",\n", - " \"end\": \"2020-12-30\",\n", + " \"start\": \"2022-06-10T14:30\",\n", + " \"end\": \"2022-06-11\",\n", "}" ] }, @@ -355,12 +352,12 @@ }, "source": [ "### Data cost\n", - "Before making a request for data, you can query the expected price in USD. The price is determined from the following formula: `unit_cost` * `uncompressed_size_GB`." + "Before making a request for data, you can query the expected price in USD. The price is determined from the following formula: `unit_cost` * `uncompressed_size_GB`." ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 48, "id": "60a9422e-1da0-4f72-9ecc-bed622a9988c", "metadata": { "pycharm": { @@ -371,10 +368,10 @@ { "data": { "text/plain": [ - "13.778901880607009" + "0.7184975445270538" ] }, - "execution_count": 18, + "execution_count": 48, "metadata": {}, "output_type": "execute_result" } @@ -396,14 +393,14 @@ "\n", "The rest of this tutorial involves historical time series data. Here's how to request for this data.\n", "\n", - "The historical time series data is streamed into an in-memory buffer encapsulated by a `Bento` object, which we'll use later to work with the data.\n", + "The historical time series data is streamed into an in-memory buffer encapsulated by a `Bento` object, which we'll use later to work with the data.\n", "\n", - "The following code sample requests for all E-mini S&P500 futures contract outrights, active between 2020-12-27 and 2020-12-30 using `smart` symbology." + "The following code sample requests for all E-mini S&P500 Futures contracts, active between 2022-06-10T14:30 and 2022-06-11 using `smart` symbology." ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 49, "id": "fe88dd07-968a-421c-a946-1f73cb9576e5", "metadata": { "pycharm": { @@ -418,37 +415,21 @@ ")" ] }, - { - "cell_type": "code", - "execution_count": 20, - "id": "5d867614-76ad-48f1-88e8-b839b6c0ffb6", - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "import pandas as pd" - ] - }, { "cell_type": "markdown", "id": "33e7c239-6050-4d55-a47a-7366873a5eeb", "metadata": { - "tags": [], "pycharm": { "name": "#%% md\n" - } + }, + "tags": [] }, "source": [ "## Working with the Bento helper object\n", "\n", "All time series data requests include a metadata header with the following specifications:\n", "- The original query paramaters (these can be used to re-request the data)\n", - "- Data shape\n", - "- Symbology mappings\n", - "- Instrument 'mini-definitions'" + "- Symbology mappings" ] }, { @@ -465,7 +446,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 50, "id": "41318aee-3ffe-45cd-8218-08777537e2d2", "metadata": { "pycharm": { @@ -479,7 +460,7 @@ "'GLBX.MDP3'" ] }, - "execution_count": 21, + "execution_count": 50, "metadata": {}, "output_type": "execute_result" } @@ -490,7 +471,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 51, "id": "fd329859-dd1e-4315-b6e9-ae79f3761306", "metadata": { "pycharm": { @@ -504,7 +485,7 @@ "" ] }, - "execution_count": 22, + "execution_count": 51, "metadata": {}, "output_type": "execute_result" } @@ -515,7 +496,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 52, "id": "9dd6aaf8-47f9-4153-bbcf-6403d435bb32", "metadata": { "pycharm": { @@ -529,7 +510,7 @@ "['ES.FUT']" ] }, - "execution_count": 23, + "execution_count": 52, "metadata": {}, "output_type": "execute_result" } @@ -540,7 +521,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 53, "id": "ad313e09-3887-4a64-9aba-fc49ebfd7761", "metadata": { "pycharm": { @@ -554,7 +535,7 @@ "" ] }, - "execution_count": 24, + "execution_count": 53, "metadata": {}, "output_type": "execute_result" } @@ -565,7 +546,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 54, "id": "a5e2683e-fe9a-43f0-bcc5-dc5933ee37da", "metadata": { "pycharm": { @@ -579,7 +560,7 @@ "" ] }, - "execution_count": 25, + "execution_count": 54, "metadata": {}, "output_type": "execute_result" } @@ -590,7 +571,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 55, "id": "2013de29-346a-4903-b304-280d1ef265d2", "metadata": { "pycharm": { @@ -601,10 +582,10 @@ { "data": { "text/plain": [ - "Timestamp('2020-12-27 00:00:00+0000', tz='UTC')" + "Timestamp('2022-06-10 14:30:00+0000', tz='UTC')" ] }, - "execution_count": 26, + "execution_count": 55, "metadata": {}, "output_type": "execute_result" } @@ -615,7 +596,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 56, "id": "f712b3f8-60ec-4ac1-baa9-d755fc046aef", "metadata": { "pycharm": { @@ -626,10 +607,10 @@ { "data": { "text/plain": [ - "Timestamp('2020-12-30 00:00:00+0000', tz='UTC')" + "Timestamp('2022-06-11 00:00:00+0000', tz='UTC')" ] }, - "execution_count": 27, + "execution_count": 56, "metadata": {}, "output_type": "execute_result" } @@ -640,7 +621,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 57, "id": "057cf57d-9ab9-44bb-9327-c1f2df56c7fa", "metadata": { "pycharm": { @@ -654,7 +635,7 @@ "1000" ] }, - "execution_count": 28, + "execution_count": 57, "metadata": {}, "output_type": "execute_result" } @@ -665,32 +646,7 @@ }, { "cell_type": "code", - "execution_count": 29, - "id": "e6c68d7a-ab0d-4f41-8a31-7865783c5c50", - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data.encoding" - ] - }, - { - "cell_type": "code", - "execution_count": 30, + "execution_count": 58, "id": "337b3de4-2d86-4772-a740-5dc302711bde", "metadata": { "pycharm": { @@ -704,7 +660,7 @@ "" ] }, - "execution_count": 30, + "execution_count": 58, "metadata": {}, "output_type": "execute_result" } @@ -715,7 +671,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 59, "id": "97bc0031-be8b-43cd-b9a4-1c5d2c388a28", "metadata": { "pycharm": { @@ -726,21 +682,21 @@ { "data": { "text/plain": [ - "(1000, 14)" + "1000" ] }, - "execution_count": 31, + "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "data.shape" + "data.record_count" ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 60, "id": "3075f725-b435-411d-97e0-1c29992923e3", "metadata": { "pycharm": { @@ -751,10 +707,10 @@ { "data": { "text/plain": [ - "dtype([('nwords', 'u1'), ('type', 'u1'), ('publisher_id', 'ts_event\n", " ts_in_delta\n", " publisher_id\n", + " channel_id\n", " product_id\n", " order_id\n", " action\n", @@ -997,288 +1144,309 @@ " \n", " \n", " \n", + " \n", " \n", " \n", " \n", " \n", - " 2020-12-27 20:00:25.252293395+00:00\n", - " 2020-12-27 20:00:25.061045683+00:00\n", - " 26179\n", + " 2022-06-10 14:30:00.000637392+00:00\n", + " 2022-06-10 14:30:00.000025147+00:00\n", + " 16157\n", " 1\n", - " 5482\n", - " 647773887134\n", - " B\n", - " A\n", " 0\n", - " 3634.00\n", + " 97042\n", + " 6410153029859\n", + " A\n", + " B\n", + " 128\n", + " 3903.50\n", " 10\n", - " 1124\n", + " 65509273\n", " \n", " \n", - " 2020-12-27 20:00:25.252293395+00:00\n", - " 2020-12-27 20:00:25.061045683+00:00\n", - " 26179\n", + " 2022-06-10 14:30:00.000758204+00:00\n", + " 2022-06-10 14:30:00.000644983+00:00\n", + " 18570\n", " 1\n", - " 5482\n", - " 647773887135\n", - " B\n", - " A\n", " 0\n", - " 3634.00\n", - " 10\n", - " 1124\n", + " 97042\n", + " 6410153029860\n", + " A\n", + " B\n", + " 128\n", + " 3903.50\n", + " 1\n", + " 65509274\n", " \n", " \n", - " 2020-12-27 20:00:25.252293395+00:00\n", - " 2020-12-27 20:00:25.061045683+00:00\n", - " 26179\n", + " 2022-06-10 14:30:00.003570400+00:00\n", + " 2022-06-10 14:30:00.003445373+00:00\n", + " 21408\n", " 1\n", - " 5482\n", - " 647773887136\n", - " B\n", - " A\n", " 0\n", - " 3634.25\n", - " 10\n", - " 1124\n", + " 3403\n", + " 6410153029861\n", + " A\n", + " A\n", + " 128\n", + " 3904.75\n", + " 1\n", + " 65509275\n", " \n", " \n", - " 2020-12-27 20:00:25.252293395+00:00\n", - " 2020-12-27 20:00:25.061045683+00:00\n", - " 26179\n", + " 2022-06-10 14:30:00.003759884+00:00\n", + " 2022-06-10 14:30:00.003627981+00:00\n", + " 20756\n", " 1\n", - " 5482\n", - " 647773887137\n", - " B\n", - " A\n", " 0\n", - " 3634.25\n", - " 10\n", - " 1124\n", + " 3403\n", + " 6410153029862\n", + " A\n", + " B\n", + " 128\n", + " 3895.00\n", + " 1\n", + " 65509276\n", " \n", " \n", - " 2020-12-27 20:00:25.252293395+00:00\n", - " 2020-12-27 20:00:25.061045683+00:00\n", - " 26179\n", + " 2022-06-10 14:30:00.003766076+00:00\n", + " 2022-06-10 14:30:00.003630375+00:00\n", + " 16130\n", " 1\n", - " 5482\n", - " 647773887138\n", - " B\n", - " A\n", " 0\n", - " 3634.50\n", - " 19\n", - " 1124\n", + " 3403\n", + " 6410153029863\n", + " A\n", + " A\n", + " 128\n", + " 3904.50\n", + " 1\n", + " 65509277\n", " \n", " \n", - " 2020-12-27 20:00:25.252293395+00:00\n", - " 2020-12-27 20:00:25.061045683+00:00\n", - " 26179\n", + " 2022-06-10 14:30:00.003773116+00:00\n", + " 2022-06-10 14:30:00.003651355+00:00\n", + " 17362\n", " 1\n", - " 5482\n", - " 647773887139\n", - " B\n", - " A\n", " 0\n", - " 3634.50\n", + " 3403\n", + " 6410153029864\n", + " A\n", + " A\n", + " 128\n", + " 3904.75\n", " 1\n", - " 1124\n", + " 65509278\n", " \n", " \n", - " 2020-12-27 20:00:25.252293395+00:00\n", - " 2020-12-27 20:00:25.061045683+00:00\n", - " 26179\n", + " 2022-06-10 14:30:00.003786908+00:00\n", + " 2022-06-10 14:30:00.003657773+00:00\n", + " 25327\n", " 1\n", - " 5482\n", - " 647773887140\n", - " B\n", - " A\n", " 0\n", - " 3634.75\n", - " 5\n", - " 1124\n", + " 3403\n", + " 6410153029865\n", + " A\n", + " B\n", + " 128\n", + " 3894.75\n", + " 1\n", + " 65509279\n", " \n", " \n", - " 2020-12-27 20:00:25.252293395+00:00\n", - " 2020-12-27 20:00:25.061045683+00:00\n", - " 26179\n", + " 2022-06-10 14:30:00.003787568+00:00\n", + " 2022-06-10 14:30:00.003675667+00:00\n", + " 15046\n", " 1\n", - " 5482\n", - " 647773887141\n", - " B\n", - " A\n", " 0\n", - " 3634.75\n", - " 5\n", - " 1124\n", + " 3403\n", + " 6410153029866\n", + " A\n", + " B\n", + " 128\n", + " 3894.50\n", + " 1\n", + " 65509280\n", " \n", " \n", - " 2020-12-27 20:00:25.252293395+00:00\n", - " 2020-12-27 20:00:25.061045683+00:00\n", - " 26179\n", + " 2022-06-10 14:30:00.003802408+00:00\n", + " 2022-06-10 14:30:00.003690029+00:00\n", + " 20556\n", " 1\n", - " 5482\n", - " 647773887142\n", - " B\n", - " A\n", " 0\n", - " 3634.75\n", - " 5\n", - " 1124\n", + " 3403\n", + " 6410153029867\n", + " A\n", + " A\n", + " 128\n", + " 3905.00\n", + " 1\n", + " 65509281\n", " \n", " \n", - " 2020-12-27 20:00:25.252293395+00:00\n", - " 2020-12-27 20:00:25.061045683+00:00\n", - " 26179\n", + " 2022-06-10 14:30:00.003812108+00:00\n", + " 2022-06-10 14:30:00.003704327+00:00\n", + " 19123\n", " 1\n", - " 5482\n", - " 647773887143\n", - " B\n", - " A\n", " 0\n", - " 3634.75\n", - " 5\n", - " 1124\n", + " 3403\n", + " 6410153029868\n", + " A\n", + " A\n", + " 128\n", + " 3905.25\n", + " 1\n", + " 65509282\n", " \n", " \n", - " 2020-12-27 20:00:25.252293395+00:00\n", - " 2020-12-27 20:00:25.061045683+00:00\n", - " 26179\n", + " 2022-06-10 14:30:00.003822544+00:00\n", + " 2022-06-10 14:30:00.003705023+00:00\n", + " 18341\n", " 1\n", - " 5482\n", - " 647773887144\n", - " B\n", - " A\n", " 0\n", - " 3635.00\n", - " 13\n", - " 1124\n", + " 3403\n", + " 6410153029869\n", + " A\n", + " B\n", + " 128\n", + " 3894.25\n", + " 1\n", + " 65509283\n", " \n", " \n", - " 2020-12-27 20:00:25.252293395+00:00\n", - " 2020-12-27 20:00:25.061045683+00:00\n", - " 26179\n", + " 2022-06-10 14:30:00.003835852+00:00\n", + " 2022-06-10 14:30:00.003728203+00:00\n", + " 17751\n", " 1\n", - " 5482\n", - " 647773887145\n", - " B\n", - " A\n", " 0\n", - " 3635.00\n", - " 2\n", - " 1124\n", + " 3403\n", + " 6410153029870\n", + " A\n", + " A\n", + " 128\n", + " 3905.50\n", + " 1\n", + " 65509284\n", " \n", " \n", - " 2020-12-27 20:00:25.252293395+00:00\n", - " 2020-12-27 20:00:25.061045683+00:00\n", - " 26179\n", + " 2022-06-10 14:30:00.003848452+00:00\n", + " 2022-06-10 14:30:00.003733117+00:00\n", + " 18962\n", " 1\n", - " 5482\n", - " 647773887146\n", - " B\n", - " A\n", " 0\n", - " 3635.00\n", - " 5\n", - " 1124\n", + " 3403\n", + " 6410153029871\n", + " A\n", + " B\n", + " 128\n", + " 3894.00\n", + " 1\n", + " 65509285\n", " \n", " \n", - " 2020-12-27 20:00:25.252293395+00:00\n", - " 2020-12-27 20:00:25.061045683+00:00\n", - " 26179\n", + " 2022-06-10 14:30:00.003864076+00:00\n", + " 2022-06-10 14:30:00.003753393+00:00\n", + " 19764\n", " 1\n", - " 5482\n", - " 647773887147\n", - " B\n", - " A\n", " 0\n", - " 3635.25\n", - " 13\n", - " 1124\n", + " 3403\n", + " 6410153029872\n", + " A\n", + " B\n", + " 128\n", + " 3893.75\n", + " 1\n", + " 65509286\n", " \n", " \n", - " 2020-12-27 20:00:25.252293395+00:00\n", - " 2020-12-27 20:00:25.061045683+00:00\n", - " 26179\n", + " 2022-06-10 14:30:00.003874684+00:00\n", + " 2022-06-10 14:30:00.003760453+00:00\n", + " 15824\n", " 1\n", - " 5482\n", - " 647773887148\n", - " B\n", - " A\n", " 0\n", - " 3635.25\n", - " 4\n", - " 1124\n", + " 3403\n", + " 6410153029873\n", + " A\n", + " A\n", + " 128\n", + " 3905.75\n", + " 1\n", + " 65509287\n", " \n", " \n", - " 2020-12-27 20:00:25.252293395+00:00\n", - " 2020-12-27 20:00:25.061045683+00:00\n", - " 26179\n", + " 2022-06-10 14:30:00.003898096+00:00\n", + " 2022-06-10 14:30:00.003789553+00:00\n", + " 16624\n", " 1\n", - " 5482\n", - " 647773887149\n", - " B\n", - " A\n", " 0\n", - " 3635.25\n", - " 3\n", - " 1124\n", + " 3403\n", + " 6410153029874\n", + " A\n", + " A\n", + " 128\n", + " 3906.00\n", + " 1\n", + " 65509288\n", " \n", " \n", - " 2020-12-27 20:00:25.252293395+00:00\n", - " 2020-12-27 20:00:25.061045683+00:00\n", - " 26179\n", + " 2022-06-10 14:30:00.003925516+00:00\n", + " 2022-06-10 14:30:00.003808893+00:00\n", + " 15815\n", " 1\n", - " 5482\n", - " 647773887150\n", - " B\n", - " A\n", " 0\n", - " 3635.50\n", - " 10\n", - " 1124\n", + " 3403\n", + " 6410153020825\n", + " C\n", + " A\n", + " 128\n", + " 3916.00\n", + " 1\n", + " 65509289\n", " \n", " \n", - " 2020-12-27 20:00:25.252293395+00:00\n", - " 2020-12-27 20:00:25.061045683+00:00\n", - " 26179\n", + " 2022-06-10 14:30:00.003933688+00:00\n", + " 2022-06-10 14:30:00.003809399+00:00\n", + " 16531\n", " 1\n", - " 5482\n", - " 647773887151\n", - " B\n", + " 0\n", + " 3403\n", + " 6410153012258\n", + " C\n", " A\n", " 128\n", - " 3635.50\n", - " 10\n", - " 1124\n", + " 3915.00\n", + " 1\n", + " 65509290\n", " \n", " \n", - " 2020-12-27 20:00:25.252346963+00:00\n", - " 2020-12-27 20:00:25.061045683+00:00\n", - " 25295\n", + " 2022-06-10 14:30:00.003942440+00:00\n", + " 2022-06-10 14:30:00.003810909+00:00\n", + " 19100\n", " 1\n", - " 5482\n", - " 647773887152\n", - " B\n", - " A\n", " 0\n", - " 3635.75\n", - " 20\n", - " 1125\n", + " 3403\n", + " 6410153020823\n", + " C\n", + " A\n", + " 128\n", + " 3915.50\n", + " 1\n", + " 65509291\n", " \n", " \n", - " 2020-12-27 20:00:25.252346963+00:00\n", - " 2020-12-27 20:00:25.061045683+00:00\n", - " 25295\n", + " 2022-06-10 14:30:00.003957015+00:00\n", + " 2022-06-10 14:30:00.003815319+00:00\n", + " 18502\n", " 1\n", - " 5482\n", - " 647773887153\n", - " B\n", - " A\n", " 0\n", - " 3636.00\n", - " 5\n", - " 1125\n", + " 3403\n", + " 6410153029875\n", + " A\n", + " A\n", + " 128\n", + " 3905.00\n", + " 1\n", + " 65509292\n", " \n", " \n", "\n", @@ -1287,103 +1455,107 @@ "text/plain": [ " ts_event \\\n", "ts_recv \n", - "2020-12-27 20:00:25.252293395+00:00 2020-12-27 20:00:25.061045683+00:00 \n", - "2020-12-27 20:00:25.252293395+00:00 2020-12-27 20:00:25.061045683+00:00 \n", - "2020-12-27 20:00:25.252293395+00:00 2020-12-27 20:00:25.061045683+00:00 \n", - "2020-12-27 20:00:25.252293395+00:00 2020-12-27 20:00:25.061045683+00:00 \n", - "2020-12-27 20:00:25.252293395+00:00 2020-12-27 20:00:25.061045683+00:00 \n", - "2020-12-27 20:00:25.252293395+00:00 2020-12-27 20:00:25.061045683+00:00 \n", - "2020-12-27 20:00:25.252293395+00:00 2020-12-27 20:00:25.061045683+00:00 \n", - "2020-12-27 20:00:25.252293395+00:00 2020-12-27 20:00:25.061045683+00:00 \n", - "2020-12-27 20:00:25.252293395+00:00 2020-12-27 20:00:25.061045683+00:00 \n", - "2020-12-27 20:00:25.252293395+00:00 2020-12-27 20:00:25.061045683+00:00 \n", - "2020-12-27 20:00:25.252293395+00:00 2020-12-27 20:00:25.061045683+00:00 \n", - "2020-12-27 20:00:25.252293395+00:00 2020-12-27 20:00:25.061045683+00:00 \n", - "2020-12-27 20:00:25.252293395+00:00 2020-12-27 20:00:25.061045683+00:00 \n", - "2020-12-27 20:00:25.252293395+00:00 2020-12-27 20:00:25.061045683+00:00 \n", - "2020-12-27 20:00:25.252293395+00:00 2020-12-27 20:00:25.061045683+00:00 \n", - "2020-12-27 20:00:25.252293395+00:00 2020-12-27 20:00:25.061045683+00:00 \n", - "2020-12-27 20:00:25.252293395+00:00 2020-12-27 20:00:25.061045683+00:00 \n", - "2020-12-27 20:00:25.252293395+00:00 2020-12-27 20:00:25.061045683+00:00 \n", - "2020-12-27 20:00:25.252346963+00:00 2020-12-27 20:00:25.061045683+00:00 \n", - "2020-12-27 20:00:25.252346963+00:00 2020-12-27 20:00:25.061045683+00:00 \n", + "2022-06-10 14:30:00.000637392+00:00 2022-06-10 14:30:00.000025147+00:00 \n", + "2022-06-10 14:30:00.000758204+00:00 2022-06-10 14:30:00.000644983+00:00 \n", + "2022-06-10 14:30:00.003570400+00:00 2022-06-10 14:30:00.003445373+00:00 \n", + "2022-06-10 14:30:00.003759884+00:00 2022-06-10 14:30:00.003627981+00:00 \n", + "2022-06-10 14:30:00.003766076+00:00 2022-06-10 14:30:00.003630375+00:00 \n", + "2022-06-10 14:30:00.003773116+00:00 2022-06-10 14:30:00.003651355+00:00 \n", + "2022-06-10 14:30:00.003786908+00:00 2022-06-10 14:30:00.003657773+00:00 \n", + "2022-06-10 14:30:00.003787568+00:00 2022-06-10 14:30:00.003675667+00:00 \n", + "2022-06-10 14:30:00.003802408+00:00 2022-06-10 14:30:00.003690029+00:00 \n", + "2022-06-10 14:30:00.003812108+00:00 2022-06-10 14:30:00.003704327+00:00 \n", + "2022-06-10 14:30:00.003822544+00:00 2022-06-10 14:30:00.003705023+00:00 \n", + "2022-06-10 14:30:00.003835852+00:00 2022-06-10 14:30:00.003728203+00:00 \n", + "2022-06-10 14:30:00.003848452+00:00 2022-06-10 14:30:00.003733117+00:00 \n", + "2022-06-10 14:30:00.003864076+00:00 2022-06-10 14:30:00.003753393+00:00 \n", + "2022-06-10 14:30:00.003874684+00:00 2022-06-10 14:30:00.003760453+00:00 \n", + "2022-06-10 14:30:00.003898096+00:00 2022-06-10 14:30:00.003789553+00:00 \n", + "2022-06-10 14:30:00.003925516+00:00 2022-06-10 14:30:00.003808893+00:00 \n", + "2022-06-10 14:30:00.003933688+00:00 2022-06-10 14:30:00.003809399+00:00 \n", + "2022-06-10 14:30:00.003942440+00:00 2022-06-10 14:30:00.003810909+00:00 \n", + "2022-06-10 14:30:00.003957015+00:00 2022-06-10 14:30:00.003815319+00:00 \n", "\n", - " ts_in_delta publisher_id product_id \\\n", - "ts_recv \n", - "2020-12-27 20:00:25.252293395+00:00 26179 1 5482 \n", - "2020-12-27 20:00:25.252293395+00:00 26179 1 5482 \n", - "2020-12-27 20:00:25.252293395+00:00 26179 1 5482 \n", - "2020-12-27 20:00:25.252293395+00:00 26179 1 5482 \n", - "2020-12-27 20:00:25.252293395+00:00 26179 1 5482 \n", - "2020-12-27 20:00:25.252293395+00:00 26179 1 5482 \n", - "2020-12-27 20:00:25.252293395+00:00 26179 1 5482 \n", - "2020-12-27 20:00:25.252293395+00:00 26179 1 5482 \n", - "2020-12-27 20:00:25.252293395+00:00 26179 1 5482 \n", - "2020-12-27 20:00:25.252293395+00:00 26179 1 5482 \n", - "2020-12-27 20:00:25.252293395+00:00 26179 1 5482 \n", - "2020-12-27 20:00:25.252293395+00:00 26179 1 5482 \n", - "2020-12-27 20:00:25.252293395+00:00 26179 1 5482 \n", - "2020-12-27 20:00:25.252293395+00:00 26179 1 5482 \n", - "2020-12-27 20:00:25.252293395+00:00 26179 1 5482 \n", - "2020-12-27 20:00:25.252293395+00:00 26179 1 5482 \n", - "2020-12-27 20:00:25.252293395+00:00 26179 1 5482 \n", - "2020-12-27 20:00:25.252293395+00:00 26179 1 5482 \n", - "2020-12-27 20:00:25.252346963+00:00 25295 1 5482 \n", - "2020-12-27 20:00:25.252346963+00:00 25295 1 5482 \n", + " ts_in_delta publisher_id channel_id \\\n", + "ts_recv \n", + "2022-06-10 14:30:00.000637392+00:00 16157 1 0 \n", + "2022-06-10 14:30:00.000758204+00:00 18570 1 0 \n", + "2022-06-10 14:30:00.003570400+00:00 21408 1 0 \n", + "2022-06-10 14:30:00.003759884+00:00 20756 1 0 \n", + "2022-06-10 14:30:00.003766076+00:00 16130 1 0 \n", + "2022-06-10 14:30:00.003773116+00:00 17362 1 0 \n", + "2022-06-10 14:30:00.003786908+00:00 25327 1 0 \n", + "2022-06-10 14:30:00.003787568+00:00 15046 1 0 \n", + "2022-06-10 14:30:00.003802408+00:00 20556 1 0 \n", + "2022-06-10 14:30:00.003812108+00:00 19123 1 0 \n", + "2022-06-10 14:30:00.003822544+00:00 18341 1 0 \n", + "2022-06-10 14:30:00.003835852+00:00 17751 1 0 \n", + "2022-06-10 14:30:00.003848452+00:00 18962 1 0 \n", + "2022-06-10 14:30:00.003864076+00:00 19764 1 0 \n", + "2022-06-10 14:30:00.003874684+00:00 15824 1 0 \n", + "2022-06-10 14:30:00.003898096+00:00 16624 1 0 \n", + "2022-06-10 14:30:00.003925516+00:00 15815 1 0 \n", + "2022-06-10 14:30:00.003933688+00:00 16531 1 0 \n", + "2022-06-10 14:30:00.003942440+00:00 19100 1 0 \n", + "2022-06-10 14:30:00.003957015+00:00 18502 1 0 \n", "\n", - " order_id action side flags price \\\n", - "ts_recv \n", - "2020-12-27 20:00:25.252293395+00:00 647773887134 B A 0 3634.00 \n", - "2020-12-27 20:00:25.252293395+00:00 647773887135 B A 0 3634.00 \n", - "2020-12-27 20:00:25.252293395+00:00 647773887136 B A 0 3634.25 \n", - "2020-12-27 20:00:25.252293395+00:00 647773887137 B A 0 3634.25 \n", - "2020-12-27 20:00:25.252293395+00:00 647773887138 B A 0 3634.50 \n", - "2020-12-27 20:00:25.252293395+00:00 647773887139 B A 0 3634.50 \n", - "2020-12-27 20:00:25.252293395+00:00 647773887140 B A 0 3634.75 \n", - "2020-12-27 20:00:25.252293395+00:00 647773887141 B A 0 3634.75 \n", - "2020-12-27 20:00:25.252293395+00:00 647773887142 B A 0 3634.75 \n", - "2020-12-27 20:00:25.252293395+00:00 647773887143 B A 0 3634.75 \n", - "2020-12-27 20:00:25.252293395+00:00 647773887144 B A 0 3635.00 \n", - "2020-12-27 20:00:25.252293395+00:00 647773887145 B A 0 3635.00 \n", - "2020-12-27 20:00:25.252293395+00:00 647773887146 B A 0 3635.00 \n", - "2020-12-27 20:00:25.252293395+00:00 647773887147 B A 0 3635.25 \n", - "2020-12-27 20:00:25.252293395+00:00 647773887148 B A 0 3635.25 \n", - "2020-12-27 20:00:25.252293395+00:00 647773887149 B A 0 3635.25 \n", - "2020-12-27 20:00:25.252293395+00:00 647773887150 B A 0 3635.50 \n", - "2020-12-27 20:00:25.252293395+00:00 647773887151 B A 128 3635.50 \n", - "2020-12-27 20:00:25.252346963+00:00 647773887152 B A 0 3635.75 \n", - "2020-12-27 20:00:25.252346963+00:00 647773887153 B A 0 3636.00 \n", + " product_id order_id action side \\\n", + "ts_recv \n", + "2022-06-10 14:30:00.000637392+00:00 97042 6410153029859 A B \n", + "2022-06-10 14:30:00.000758204+00:00 97042 6410153029860 A B \n", + "2022-06-10 14:30:00.003570400+00:00 3403 6410153029861 A A \n", + "2022-06-10 14:30:00.003759884+00:00 3403 6410153029862 A B \n", + "2022-06-10 14:30:00.003766076+00:00 3403 6410153029863 A A \n", + "2022-06-10 14:30:00.003773116+00:00 3403 6410153029864 A A \n", + "2022-06-10 14:30:00.003786908+00:00 3403 6410153029865 A B \n", + "2022-06-10 14:30:00.003787568+00:00 3403 6410153029866 A B \n", + "2022-06-10 14:30:00.003802408+00:00 3403 6410153029867 A A \n", + "2022-06-10 14:30:00.003812108+00:00 3403 6410153029868 A A \n", + "2022-06-10 14:30:00.003822544+00:00 3403 6410153029869 A B \n", + "2022-06-10 14:30:00.003835852+00:00 3403 6410153029870 A A \n", + "2022-06-10 14:30:00.003848452+00:00 3403 6410153029871 A B \n", + "2022-06-10 14:30:00.003864076+00:00 3403 6410153029872 A B \n", + "2022-06-10 14:30:00.003874684+00:00 3403 6410153029873 A A \n", + "2022-06-10 14:30:00.003898096+00:00 3403 6410153029874 A A \n", + "2022-06-10 14:30:00.003925516+00:00 3403 6410153020825 C A \n", + "2022-06-10 14:30:00.003933688+00:00 3403 6410153012258 C A \n", + "2022-06-10 14:30:00.003942440+00:00 3403 6410153020823 C A \n", + "2022-06-10 14:30:00.003957015+00:00 3403 6410153029875 A A \n", "\n", - " size sequence \n", - "ts_recv \n", - "2020-12-27 20:00:25.252293395+00:00 10 1124 \n", - "2020-12-27 20:00:25.252293395+00:00 10 1124 \n", - "2020-12-27 20:00:25.252293395+00:00 10 1124 \n", - "2020-12-27 20:00:25.252293395+00:00 10 1124 \n", - "2020-12-27 20:00:25.252293395+00:00 19 1124 \n", - "2020-12-27 20:00:25.252293395+00:00 1 1124 \n", - "2020-12-27 20:00:25.252293395+00:00 5 1124 \n", - "2020-12-27 20:00:25.252293395+00:00 5 1124 \n", - "2020-12-27 20:00:25.252293395+00:00 5 1124 \n", - "2020-12-27 20:00:25.252293395+00:00 5 1124 \n", - "2020-12-27 20:00:25.252293395+00:00 13 1124 \n", - "2020-12-27 20:00:25.252293395+00:00 2 1124 \n", - "2020-12-27 20:00:25.252293395+00:00 5 1124 \n", - "2020-12-27 20:00:25.252293395+00:00 13 1124 \n", - "2020-12-27 20:00:25.252293395+00:00 4 1124 \n", - "2020-12-27 20:00:25.252293395+00:00 3 1124 \n", - "2020-12-27 20:00:25.252293395+00:00 10 1124 \n", - "2020-12-27 20:00:25.252293395+00:00 10 1124 \n", - "2020-12-27 20:00:25.252346963+00:00 20 1125 \n", - "2020-12-27 20:00:25.252346963+00:00 5 1125 " + " flags price size sequence \n", + "ts_recv \n", + "2022-06-10 14:30:00.000637392+00:00 128 3903.50 10 65509273 \n", + "2022-06-10 14:30:00.000758204+00:00 128 3903.50 1 65509274 \n", + "2022-06-10 14:30:00.003570400+00:00 128 3904.75 1 65509275 \n", + "2022-06-10 14:30:00.003759884+00:00 128 3895.00 1 65509276 \n", + "2022-06-10 14:30:00.003766076+00:00 128 3904.50 1 65509277 \n", + "2022-06-10 14:30:00.003773116+00:00 128 3904.75 1 65509278 \n", + "2022-06-10 14:30:00.003786908+00:00 128 3894.75 1 65509279 \n", + "2022-06-10 14:30:00.003787568+00:00 128 3894.50 1 65509280 \n", + "2022-06-10 14:30:00.003802408+00:00 128 3905.00 1 65509281 \n", + "2022-06-10 14:30:00.003812108+00:00 128 3905.25 1 65509282 \n", + "2022-06-10 14:30:00.003822544+00:00 128 3894.25 1 65509283 \n", + "2022-06-10 14:30:00.003835852+00:00 128 3905.50 1 65509284 \n", + "2022-06-10 14:30:00.003848452+00:00 128 3894.00 1 65509285 \n", + "2022-06-10 14:30:00.003864076+00:00 128 3893.75 1 65509286 \n", + "2022-06-10 14:30:00.003874684+00:00 128 3905.75 1 65509287 \n", + "2022-06-10 14:30:00.003898096+00:00 128 3906.00 1 65509288 \n", + "2022-06-10 14:30:00.003925516+00:00 128 3916.00 1 65509289 \n", + "2022-06-10 14:30:00.003933688+00:00 128 3915.00 1 65509290 \n", + "2022-06-10 14:30:00.003942440+00:00 128 3915.50 1 65509291 \n", + "2022-06-10 14:30:00.003957015+00:00 128 3905.00 1 65509292 " ] }, - "execution_count": 38, + "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "import pandas as pd\n", + "\n", + "pd.set_option('display.max_columns', None)\n", + "\n", "df = data.to_df(pretty_px=True, pretty_ts=True)\n", "df.head(20)" ] @@ -1405,7 +1577,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 65, "id": "23dbe271-0379-46f0-ae47-1f33a7b708e5", "metadata": { "pycharm": { @@ -1416,20 +1588,20 @@ { "data": { "text/plain": [ - "array([(14, 32, 1, 5482, 1609099225061045683, 647439984644, 315950000000000, 2, 0, 0, b'B', b'A', 1609099225250461359, 92701, 1098),\n", - " (14, 32, 1, 5482, 1609099225061045683, 647439984689, 310550000000000, 3, 0, 0, b'B', b'A', 1609099225250461359, 92701, 1098),\n", - " (14, 32, 1, 5482, 1609099225061045683, 647508324609, 330000000000000, 2, 0, 0, b'B', b'A', 1609099225250461359, 92701, 1098),\n", - " (14, 32, 1, 5482, 1609099225061045683, 647530969859, 287000000000000, 10, 0, 0, b'B', b'A', 1609099225250461359, 92701, 1098),\n", - " (14, 32, 1, 5482, 1609099225061045683, 647570749552, 321325000000000, 1, 0, 0, b'B', b'A', 1609099225250461359, 92701, 1098),\n", - " (14, 32, 1, 5482, 1609099225061045683, 647570749560, 321225000000000, 1, 0, 0, b'B', b'A', 1609099225250461359, 92701, 1098),\n", - " (14, 32, 1, 5482, 1609099225061045683, 647570749656, 321125000000000, 1, 0, 0, b'B', b'A', 1609099225250461359, 92701, 1098),\n", - " (14, 32, 1, 5482, 1609099225061045683, 647570749727, 321025000000000, 1, 0, 0, b'B', b'A', 1609099225250461359, 92701, 1098),\n", - " (14, 32, 1, 5482, 1609099225061045683, 647570749776, 320925000000000, 1, 0, 0, b'B', b'A', 1609099225250461359, 92701, 1098),\n", - " (14, 32, 1, 5482, 1609099225061045683, 647570749868, 320825000000000, 1, 0, 0, b'B', b'A', 1609099225250461359, 92701, 1098)],\n", - " dtype=[('nwords', 'u1'), ('type', 'u1'), ('publisher_id', '" + "" ] }, - "execution_count": 40, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -1536,7 +1708,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 34, "id": "92c62480-5fac-4342-b091-0f5654605e20", "metadata": { "pycharm": { @@ -1550,7 +1722,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 35, "id": "16ae4a8b-5c77-4f2f-8ac3-9b21e4bc5187", "metadata": { "pycharm": { @@ -1588,7 +1760,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "e1002ece-da46-4aab-9434-556fab131f9a", "metadata": { "pycharm": { @@ -1596,12 +1768,20 @@ } }, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[('dataset', 'glbx.mdp3'), ('start', '2020-12-27T12:00:00'), ('end', '2020-12-29T00:00:00'), ('symbols', 'ESH1'), ('schema', 'trades'), ('stype_in', 'native'), ('stype_out', 'product_id'), ('limit', '1000'), ('encoding', 'dbz'), ('split_duration', 'day'), ('packaging', 'none'), ('delivery', 'download')]\n" + ] + }, { "data": { "text/plain": [ - "{'id': 'GLBX-20220720-BTW9J5HY5C',\n", + "{'id': 'GLBX-20221106-EUV99KMLFQ',\n", " 'user_id': '46PCMCVF',\n", - " 'bill_id': '3eaf1158',\n", + " 'bill_id': '57b69b21d9154fc3b047d740d548555d',\n", + " 'cost': 0.110865,\n", " 'dataset': 'GLBX.MDP3',\n", " 'symbols': 'ESH1',\n", " 'stype_in': 'native',\n", @@ -1609,27 +1789,31 @@ " 'schema': 'trades',\n", " 'start': '2020-12-27 12:00:00+00:00',\n", " 'end': '2020-12-29 00:00:00+00:00',\n", - " 'limit': 100,\n", + " 'limit': 1000,\n", " 'encoding': 'dbz',\n", " 'compression': 'zstd',\n", + " 'pretty_px': False,\n", + " 'pretty_ts': False,\n", " 'split_duration': 'day',\n", " 'split_size': None,\n", + " 'split_symbols': False,\n", " 'packaging': 'none',\n", " 'delivery': 'download',\n", + " 'is_full_book': False,\n", " 'is_example': False,\n", - " 'record_count': 100,\n", - " 'billed_size': 4800,\n", + " 'record_count': 1000,\n", + " 'billed_size': 48000,\n", " 'actual_size': None,\n", " 'package_size': None,\n", " 'state': 'queued',\n", - " 'ts_received': '2022-07-20 07:26:45.617296+00:00',\n", + " 'ts_received': '2022-11-06 21:38:51.679646+00:00',\n", " 'ts_queued': None,\n", " 'ts_process_start': None,\n", " 'ts_process_done': None,\n", " 'ts_expiration': None}" ] }, - "execution_count": 34, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -1643,7 +1827,6 @@ " end=\"2020-12-29\",\n", " encoding=\"dbz\",\n", " delivery=\"download\",\n", - " compression=\"zstd\",\n", " limit=1000, # <-- request limited to 1000 records\n", ")" ] @@ -1732,9 +1915,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.9.15" } }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} diff --git a/requirements_dev.txt b/requirements_dev.txt index 67b05289..14ffba1f 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,2 +1,3 @@ -pytest>=7.1.2 -pytest-mock>=3.8.1 +pytest>=7.2.0 +pytest-mock>=3.10.0 +types-requests diff --git a/setup.cfg b/setup.cfg index 9dcb1d18..46e23ecb 100644 --- a/setup.cfg +++ b/setup.cfg @@ -14,3 +14,7 @@ ignore = C101, E203, W503 # C101: Coding magic comment not found (not required as utf-8 is the default for python3). # E203: whitespace before ':' (conflicts with Black) # W503: in error will be amended by flake8 soon (https://www.flake8rules.com/rules/W503.html) + +[tool:pytest] +testpaths = tests +asyncio_mode = auto diff --git a/setup.py b/setup.py index 80fcbbd5..6f91330e 100755 --- a/setup.py +++ b/setup.py @@ -56,6 +56,7 @@ "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Topic :: Software Development :: Libraries", "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Office/Business :: Financial", diff --git a/tests/fixtures.py b/tests/fixtures.py index 09fdfdf8..4aba7750 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -6,10 +6,10 @@ TESTS_ROOT = os.path.dirname(os.path.abspath(__file__)) -def get_test_data_path(schema: Schema): +def get_test_data_path(schema: Schema) -> str: return os.path.join(TESTS_ROOT, "data", f"test_data.{schema.value}.dbz") -def get_test_data(schema: Schema): +def get_test_data(schema: Schema) -> bytes: with open(get_test_data_path(schema=schema), "rb") as f: return f.read() diff --git a/tests/test_common_parsing.py b/tests/test_common_parsing.py index c4ecae00..a0f86ab9 100644 --- a/tests/test_common_parsing.py +++ b/tests/test_common_parsing.py @@ -1,4 +1,6 @@ -from datetime import date +import datetime as dt +from enum import Enum +from typing import Any, List, Optional, Union import numpy as np import pandas as pd @@ -15,6 +17,11 @@ ) +# Set the type to `Any` to disable mypy type checking. Used to test if functions +# will raise a `TypeError` when passed an incorrectly-typed argument. +INCORRECT_TYPE: Any = type + + class TestParsing: def test_enum_or_str_lowercase_given_none_raises_type_error(self) -> None: # Arrange, Act, Assert @@ -24,7 +31,7 @@ def test_enum_or_str_lowercase_given_none_raises_type_error(self) -> None: def test_enum_or_str_lowercase_given_incorrect_type_raises_type_error(self) -> None: # Arrange, Act, Assert with pytest.raises(TypeError): - enum_or_str_lowercase(type, "param") + enum_or_str_lowercase(INCORRECT_TYPE, "param") @pytest.mark.parametrize( "value, expected", @@ -35,7 +42,7 @@ def test_enum_or_str_lowercase_given_incorrect_type_raises_type_error(self) -> N ], ) def test_enum_or_str_lowercase_returns_expected_outputs( - self, value, expected + self, value: Union[Enum, str], expected: str ) -> None: # Arrange, Act, Assert assert enum_or_str_lowercase(value, "param") == expected @@ -45,7 +52,7 @@ def test_maybe_enum_or_str_lowercase_given_incorrect_types_raises_error( ) -> None: # Arrange, Act, Assert with pytest.raises(TypeError): - maybe_enum_or_str_lowercase(type, "param") + maybe_enum_or_str_lowercase(INCORRECT_TYPE, "param") @pytest.mark.parametrize( "value, expected", @@ -57,8 +64,8 @@ def test_maybe_enum_or_str_lowercase_given_incorrect_types_raises_error( ], ) def test_maybe_enum_or_str_lowercase_returns_expected_outputs( - self, value, expected - ): + self, value: Optional[Union[Enum, str]], expected: Optional[str] + ) -> None: # Arrange, Act, Assert assert maybe_enum_or_str_lowercase(value, "param") == expected @@ -67,7 +74,7 @@ def test_maybe_values_list_to_string_given_invalid_input_raises_type_error( ) -> None: # Arrange, Act, Assert with pytest.raises(TypeError): - maybe_values_list_to_string(type) + maybe_values_list_to_string(INCORRECT_TYPE) @pytest.mark.parametrize( "values, expected", @@ -82,11 +89,11 @@ def test_maybe_values_list_to_string_given_invalid_input_raises_type_error( ) def test_maybe_values_list_to_string_given_valid_inputs_returns_expected( self, - values, - expected, + values: Optional[List[str]], + expected: str, ) -> None: # Arrange, Act - result = maybe_values_list_to_string(values) + result: Optional[str] = maybe_values_list_to_string(values) # Assert assert result == expected @@ -96,7 +103,7 @@ def test_maybe_symbols_list_to_string_given_invalid_input_raises_type_error( ) -> None: # Arrange, Act, Assert with pytest.raises(TypeError): - maybe_symbols_list_to_string(type) + maybe_symbols_list_to_string(INCORRECT_TYPE) @pytest.mark.parametrize( "symbols, expected", @@ -111,11 +118,11 @@ def test_maybe_symbols_list_to_string_given_invalid_input_raises_type_error( ) def test_maybe_symbols_list_to_string_given_valid_inputs_returns_expected( self, - symbols, - expected, + symbols: Optional[List[str]], + expected: str, ) -> None: # Arrange, Act - result = maybe_symbols_list_to_string(symbols) + result: Optional[str] = maybe_symbols_list_to_string(symbols) # Assert assert result == expected @@ -124,19 +131,17 @@ def test_maybe_symbols_list_to_string_given_valid_inputs_returns_expected( "value, expected", [ [None, None], - [1604782791000000000, "2020-11-07"], - ["2020-11-07T20:59:51", "2020-11-07"], - [date(2020, 12, 28), "2020-12-28"], - [pd.Timestamp("2020-12-28T23:12:01.123"), "2020-12-28"], + ["2020-11-07", "2020-11-07"], + [dt.date(2020, 12, 28), "2020-12-28"], ], ) def test_maybe_date_to_string_give_valid_values_returns_expected_results( self, - value, - expected, + value: Union[dt.date, str], + expected: str, ) -> None: # Arrange, Act - result = maybe_date_to_string(value) + result: Optional[str] = maybe_date_to_string(value) # Assert assert result == expected @@ -147,17 +152,17 @@ def test_maybe_date_to_string_give_valid_values_returns_expected_results( [None, None], [1604782791000000000, "2020-11-07T20:59:51"], ["2020-11-07T20:59:51", "2020-11-07T20:59:51"], - [date(2020, 12, 28), "2020-12-28T00:00:00"], - [pd.Timestamp("2020-12-28T23:12:01.123"), "2020-12-28T23:12:01.123000"], + [dt.date(2020, 12, 28), "2020-12-28T00:00:00"], + [pd.to_datetime("2020-12-28T23:12:01.123"), "2020-12-28T23:12:01.123000"], ], ) def test_maybe_datetime_to_string_give_valid_values_returns_expected_results( self, - value, - expected, + value: Union[pd.Timestamp, dt.date, str, int], + expected: str, ) -> None: # Arrange, Act - result = maybe_datetime_to_string(value) + result: Optional[str] = maybe_datetime_to_string(value) # Assert assert result == expected @@ -173,11 +178,11 @@ def test_maybe_datetime_to_string_give_valid_values_returns_expected_results( ) def test_parse_flags_given_valid_values_returns_expected_results( self, - value, - expected, + value: int, + expected: List[str], ) -> None: # Arrange, Act - result = parse_flags(value) + result: List[str] = parse_flags(value) # Assert assert result == expected diff --git a/tests/test_common_validation.py b/tests/test_common_validation.py index 4b46bbbd..f8db74e4 100644 --- a/tests/test_common_validation.py +++ b/tests/test_common_validation.py @@ -1,3 +1,6 @@ +from enum import Enum +from typing import Any, Type, Union + import pytest from databento.common.enums import Compression, Encoding from databento.common.validation import validate_enum, validate_maybe_enum @@ -13,9 +16,9 @@ class TestValidation: ) def test_validate_enum_given_wrong_types_raises_type_error( self, - value, - enum, - param, + value: Any, + enum: Type[Enum], + param: str, ) -> None: # Arrange, Act, Assert with pytest.raises(TypeError): @@ -36,14 +39,14 @@ def test_validate_enum_given_invalid_value_raises_value_error(self) -> None: ) def test_validate_enum_given_valid_value_returns_expected_output( self, - value, - enum, - param, - expected, + value: Union[str, Enum], + enum: Type[Enum], + param: str, + expected: Union[str, Enum], ) -> None: # Arrange, Act, Assert assert validate_enum(value, enum, "param") == expected - def test_validate_maybe_enum_give_none_returns_none(self): + def test_validate_maybe_enum_give_none_returns_none(self) -> None: # Arrange, Act, Assert assert validate_maybe_enum(None, Encoding, "encoding") is None diff --git a/tests/test_historical_batch.py b/tests/test_historical_batch.py index ebc13214..f20126eb 100644 --- a/tests/test_historical_batch.py +++ b/tests/test_historical_batch.py @@ -3,10 +3,11 @@ import databento as db import pytest import requests +from pytest_mock import MockerFixture class TestHistoricalBatch: - def setup(self) -> None: + def setup_method(self) -> None: key = "DUMMY_API_KEY" self.client = db.Historical(key=key) @@ -49,7 +50,9 @@ def test_batch_submit_job_given_invalid_stype_in_raises_error(self) -> None: ) @pytest.mark.skipif(sys.version_info < (3, 8), reason="incompatible mocking") - def test_batch_submit_job_sends_expected_request(self, mocker) -> None: + def test_batch_submit_job_sends_expected_request( + self, mocker: MockerFixture + ) -> None: # Arrange mocked_get = mocker.patch("requests.post") @@ -81,24 +84,26 @@ def test_batch_submit_job_sends_expected_request(self, mocker) -> None: ) assert call["params"] == [ ("dataset", "glbx.mdp3"), - ("symbols", "ESH1"), - ("schema", "trades"), ("start", "2020-12-28T12:00:00"), ("end", "2020-12-29T00:00:00"), + ("symbols", "ESH1"), + ("schema", "trades"), ("stype_in", "native"), ("stype_out", "product_id"), ("encoding", "csv"), ("compression", "zstd"), ("split_duration", "day"), + ("split_size", "10000000000"), ("packaging", "none"), ("delivery", "download"), - ("split_size", "10000000000"), ] assert call["timeout"] == (100, 100) assert isinstance(call["auth"], requests.auth.HTTPBasicAuth) @pytest.mark.skipif(sys.version_info < (3, 8), reason="incompatible mocking") - def test_batch_list_jobs_sends_expected_request(self, mocker) -> None: + def test_batch_list_jobs_sends_expected_request( + self, mocker: MockerFixture + ) -> None: # Arrange mocked_get = mocker.patch("requests.get") diff --git a/tests/test_historical_bento.py b/tests/test_historical_bento.py index ae95b01c..1733a743 100644 --- a/tests/test_historical_bento.py +++ b/tests/test_historical_bento.py @@ -2,12 +2,14 @@ import os import sys from pathlib import Path +from typing import List, Tuple, Union import numpy as np import pandas as pd import pytest from databento.common.bento import Bento, FileBento, MemoryBento from databento.common.enums import Compression, Encoding, Schema, SType + from tests.fixtures import get_test_data, get_test_data_path @@ -97,8 +99,8 @@ def test_bento_given_initial_nbytes_returns_expected_metadata(self) -> None: # Assert assert data.dtype == np.dtype( [ - ("nwords", "u1"), - ("type", "u1"), + ("length", "u1"), + ("rtype", "u1"), ("publisher_id", " None: assert data.end == pd.Timestamp("2020-12-29 00:00:00+0000", tz="UTC") assert data.limit == 2 assert data.compression == Compression.ZSTD - assert data.shape == (2, 15) + assert data.record_count == 2 assert data.mappings == { "ESH1": [ { @@ -200,7 +202,7 @@ def test_replay_with_stub_data_record_passes_to_callback(self) -> None: stub_data = get_test_data(schema=Schema.MBO) data = MemoryBento(initial_bytes=stub_data) - handler = [] + handler: List[Tuple[Union[int, bytes], ...]] = [] # Act data.replay(callback=handler.append) @@ -226,7 +228,9 @@ def test_replay_with_stub_data_record_passes_to_callback(self) -> None: ) ], ) - def test_to_df_across_schemas_returns_identical_dimension_dfs(self, schema) -> None: + def test_to_df_across_schemas_returns_identical_dimension_dfs( + self, schema: Schema + ) -> None: # Arrange stub_data = get_test_data(schema=schema) data = MemoryBento(initial_bytes=stub_data) @@ -257,7 +261,7 @@ def test_to_df_with_mbo_data_returns_expected_record(self) -> None: assert df.iloc[0].action == "C" assert df.iloc[0].side == "A" assert df.iloc[0].price == 3722750000000 - assert df.iloc[0].size == 11 + assert df.iloc[0].size == 12 assert df.iloc[0].sequence == 1170352 def test_to_df_with_stub_ohlcv_data_returns_expected_record(self) -> None: @@ -333,8 +337,8 @@ def test_to_df_with_pretty_ts_converts_timestamps_as_expected(self) -> None: ) def test_to_df_with_pretty_px_with_various_schemas_converts_prices_as_expected( self, - schema, - columns, + schema: Schema, + columns: List[str], ) -> None: # Arrange stub_data = get_test_data(schema=schema) @@ -365,9 +369,9 @@ def test_to_df_with_pretty_px_with_various_schemas_converts_prices_as_expected( ) def test_from_file_given_various_paths_returns_expected_metadata( self, - expected_schema, - expected_encoding, - expected_compression, + expected_schema: Schema, + expected_encoding: Encoding, + expected_compression: Compression, ) -> None: # Arrange, Act path = get_test_data_path(schema=expected_schema) @@ -377,7 +381,7 @@ def test_from_file_given_various_paths_returns_expected_metadata( assert data.schema == expected_schema assert data.compression == expected_compression - def test_to_csv_writes_expected_file_to_disk(self) -> None: + def test_mbo_to_csv_writes_expected_file_to_disk(self) -> None: # Arrange test_data_path = get_test_data_path(schema=Schema.MBO) data = FileBento(path=test_data_path) @@ -391,10 +395,11 @@ def test_to_csv_writes_expected_file_to_disk(self) -> None: written = open(path, mode="rb").read() assert os.path.isfile(path) expected = ( - b"ts_recv,ts_event,ts_in_delta,publisher_id,product_id,order_id,action,side,flags,pr" # noqa - b"ice,size,sequence\n1609160400000704060,1609160400000429831,22993,1,5482,6" # noqa - b"47784973705,C,A,128,3722750000000,1,1170352\n1609160400000711344,160916" # noqa - b"0400000431665,19621,1,5482,647784973631,C,A,128,3723000000000,1,1170353\n" # noqa + b"ts_recv,ts_event,ts_in_delta,publisher_id,channel_id,product_id,order_id,act" # noqa + b"ion,side,flags,price,size,sequence\n1609160400000704060,16091604000004298" # noqa + b"31,22993,1,0,5482,647784973705,C,A,128,3722750000000,1,1170352\n160916040" # noqa + b"0000711344,1609160400000431665,19621,1,0,5482,647784973631,C,A,128,372300000" # noqa + b"0000,1,1170353\n" ) if sys.platform == "win32": expected = expected.replace(b"\n", b"\r\n") @@ -403,7 +408,35 @@ def test_to_csv_writes_expected_file_to_disk(self) -> None: # Cleanup os.remove(path) - def test_to_json_writes_expected_file_to_disk(self) -> None: + def test_mbp_1_to_csv_writes_expected_file_to_disk(self) -> None: + # Arrange + test_data_path = get_test_data_path(schema=Schema.MBP_1) + data = FileBento(path=test_data_path) + + path = "test.my_mbo.csv" + + # Act + data.to_csv(path) + + # Assert + written = open(path, mode="rb").read() + assert os.path.isfile(path) + expected = ( + b"ts_recv,ts_event,ts_in_delta,publisher_id,product_id,action,side,depth,flags" # noqa + b",price,size,sequence,bid_px_00,ask_px_00,bid_sz_00,ask_sz_00,bid_oq_00,ask_o" # noqa + b"q_00\n1609160400006136329,1609160400006001487,17214,1,5482,A,A,0,128,3720" # noqa + b"500000000,1,1170362,3720250000000,3720500000000,24,11,15,9\n1609160400006" # noqa + b"246513,1609160400006146661,18858,1,5482,A,A,0,128,3720500000000,1,1170364,37" # noqa + b"20250000000,3720500000000,24,12,15,10\n" + ) + if sys.platform == "win32": + expected = expected.replace(b"\n", b"\r\n") + assert written == expected + + # Cleanup + os.remove(path) + + def test_mbo_to_json_writes_expected_file_to_disk(self) -> None: # Arrange test_data_path = get_test_data_path(schema=Schema.MBO) data = FileBento(path=test_data_path) @@ -417,12 +450,39 @@ def test_to_json_writes_expected_file_to_disk(self) -> None: written = open(path, mode="rb").read() assert os.path.isfile(path) assert written == ( - b'{"ts_event":1609160400000429831,"ts_in_delta":22993,"publisher_id":1,"product_id":' # noqa - b'5482,"order_id":647784973705,"action":"C","side":"A","flags":128,"price":372' # noqa - b'2750000000,"size":1,"sequence":1170352}\n{"ts_event":160916040000043166' # noqa - b'5,"ts_in_delta":19621,"publisher_id":1,"product_id":5482,"order_id":647784973631,"' # noqa - b'action":"C","side":"A","flags":128,"price":3723000000000,"size":1,"sequenc' # noqa - b'e":1170353}\n' + b'{"ts_event":1609160400000429831,"ts_in_delta":22993,"publisher_id":1,"channe' # noqa + b'l_id":0,"product_id":5482,"order_id":647784973705,"action":"C","side":"A","f' # noqa + b'lags":128,"price":3722750000000,"size":1,"sequence":1170352}\n{"ts_event"' # noqa + b':1609160400000431665,"ts_in_delta":19621,"publisher_id":1,"channel_id":0,"pr' # noqa + b'oduct_id":5482,"order_id":647784973631,"action":"C","side":"A","flags":128,"' # noqa + b'price":3723000000000,"size":1,"sequence":1170353}\n' + ) + + # Cleanup + os.remove(path) + + def test_mbp_1_to_json_writes_expected_file_to_disk(self) -> None: + # Arrange + test_data_path = get_test_data_path(schema=Schema.MBP_1) + data = FileBento(path=test_data_path) + + path = "test.my_mbo.json" + + # Act + data.to_json(path) + + # Assert + written = open(path, mode="rb").read() + assert os.path.isfile(path) + assert written == ( + b'{"ts_event":1609160400006001487,"ts_in_delta":17214,"publisher_id":1,"produc' # noqa + b't_id":5482,"action":"A","side":"A","depth":0,"flags":128,"price":37205000000' # noqa + b'00,"size":1,"sequence":1170362,"bid_px_00":3720250000000,"ask_px_00":3720500' # noqa + b'000000,"bid_sz_00":24,"ask_sz_00":11,"bid_oq_00":15,"ask_oq_00":9}\n{"ts_' # noqa + b'event":1609160400006146661,"ts_in_delta":18858,"publisher_id":1,"product_id"' # noqa + b':5482,"action":"A","side":"A","depth":0,"flags":128,"price":3720500000000,"s' # noqa + b'ize":1,"sequence":1170364,"bid_px_00":3720250000000,"ask_px_00":372050000000' # noqa + b'0,"bid_sz_00":24,"ask_sz_00":12,"bid_oq_00":15,"ask_oq_00":10}\n' # noqa ) # Cleanup diff --git a/tests/test_historical_client.py b/tests/test_historical_client.py index 4ff14e4c..83e912e4 100644 --- a/tests/test_historical_client.py +++ b/tests/test_historical_client.py @@ -1,10 +1,13 @@ import sys +from typing import Union import databento as db import pytest import requests from databento import FileBento, Historical from databento.common.enums import HistoricalGateway, Schema +from pytest_mock import MockerFixture + from tests.fixtures import get_test_data_path @@ -30,23 +33,21 @@ def test_default_host_returns_expected(self) -> None: "gateway, expected", [ [HistoricalGateway.BO1, "https://hist.databento.com"], - [HistoricalGateway.NEAREST, "https://hist.databento.com"], ["bo1", "https://hist.databento.com"], - ["nearest", "https://hist.databento.com"], ], ) def test_gateway_nearest_and_bo1_map_to_hist_databento( self, - gateway, - expected, - ): + gateway: Union[HistoricalGateway, str], + expected: str, + ) -> None: # Arrange, Act client = db.Historical(key="DUMMY_API_KEY", gateway=gateway) # Assert assert client.gateway == expected - def test_custom_gateway_returns_expected(self): + def test_custom_gateway_returns_expected(self) -> None: # Arrange ny4_gateway = "ny4.databento.com" @@ -57,7 +58,9 @@ def test_custom_gateway_returns_expected(self): assert client.gateway == ny4_gateway @pytest.mark.skipif(sys.version_info < (3, 8), reason="incompatible mocking") - def test_re_request_symbology_makes_expected_request(self, mocker) -> None: + def test_re_request_symbology_makes_expected_request( + self, mocker: MockerFixture + ) -> None: # Arrange mocked_get = mocker.patch("requests.get") @@ -93,7 +96,9 @@ def test_re_request_symbology_makes_expected_request(self, mocker) -> None: assert isinstance(call["auth"], requests.auth.HTTPBasicAuth) @pytest.mark.skipif(sys.version_info < (3, 8), reason="incompatible mocking") - def test_request_full_definitions_expected_request(self, mocker) -> None: + def test_request_full_definitions_expected_request( + self, mocker: MockerFixture + ) -> None: # Arrange mocked_get = mocker.patch("requests.get") @@ -113,10 +118,10 @@ def test_request_full_definitions_expected_request(self, mocker) -> None: ) assert call["params"] == [ ("dataset", "glbx.mdp3"), - ("symbols", "ESH1"), - ("schema", "definition"), ("start", "2020-12-28T13:00:00+00:00"), ("end", "2020-12-29T00:00:00+00:00"), + ("symbols", "ESH1"), + ("schema", "definition"), ("stype_in", "native"), ("stype_out", "product_id"), ("encoding", "dbz"), diff --git a/tests/test_historical_metadata.py b/tests/test_historical_metadata.py index 00b7bcdc..fe0f5614 100644 --- a/tests/test_historical_metadata.py +++ b/tests/test_historical_metadata.py @@ -1,18 +1,22 @@ import sys +from typing import Union import databento as db import pytest import requests from databento.common.enums import Dataset, FeedMode, Schema +from pytest_mock import MockerFixture class TestHistoricalMetadata: - def setup(self) -> None: + def setup_method(self) -> None: key = "DUMMY_API_KEY" self.client = db.Historical(key=key) @pytest.mark.skipif(sys.version_info < (3, 8), reason="incompatible mocking") - def test_list_publishers_sends_expected_request(self, mocker) -> None: + def test_list_publishers_sends_expected_request( + self, mocker: MockerFixture + ) -> None: # Arrange mocked_get = mocker.patch("requests.get") @@ -34,7 +38,7 @@ def test_list_publishers_sends_expected_request(self, mocker) -> None: assert isinstance(call["auth"], requests.auth.HTTPBasicAuth) @pytest.mark.skipif(sys.version_info < (3, 8), reason="incompatible mocking") - def test_list_datasets_sends_expected_request(self, mocker) -> None: + def test_list_datasets_sends_expected_request(self, mocker: MockerFixture) -> None: # Arrange mocked_get = mocker.patch("requests.get") @@ -61,7 +65,7 @@ def test_list_datasets_sends_expected_request(self, mocker) -> None: assert isinstance(call["auth"], requests.auth.HTTPBasicAuth) @pytest.mark.skipif(sys.version_info < (3, 8), reason="incompatible mocking") - def test_list_schemas_sends_expected_request(self, mocker) -> None: + def test_list_schemas_sends_expected_request(self, mocker: MockerFixture) -> None: # Arrange mocked_get = mocker.patch("requests.get") @@ -89,7 +93,7 @@ def test_list_schemas_sends_expected_request(self, mocker) -> None: assert isinstance(call["auth"], requests.auth.HTTPBasicAuth) @pytest.mark.skipif(sys.version_info < (3, 8), reason="incompatible mocking") - def test_list_fields_sends_expected_request(self, mocker) -> None: + def test_list_fields_sends_expected_request(self, mocker: MockerFixture) -> None: # Arrange mocked_get = mocker.patch("requests.get") @@ -117,7 +121,7 @@ def test_list_fields_sends_expected_request(self, mocker) -> None: assert isinstance(call["auth"], requests.auth.HTTPBasicAuth) @pytest.mark.skipif(sys.version_info < (3, 8), reason="incompatible mocking") - def test_list_encodings_sends_expected_request(self, mocker) -> None: + def test_list_encodings_sends_expected_request(self, mocker: MockerFixture) -> None: # Arrange mocked_get = mocker.patch("requests.get") @@ -139,7 +143,9 @@ def test_list_encodings_sends_expected_request(self, mocker) -> None: assert isinstance(call["auth"], requests.auth.HTTPBasicAuth) @pytest.mark.skipif(sys.version_info < (3, 8), reason="incompatible mocking") - def test_list_compressions_sends_expected_request(self, mocker) -> None: + def test_list_compressions_sends_expected_request( + self, mocker: MockerFixture + ) -> None: # Arrange mocked_get = mocker.patch("requests.get") @@ -169,7 +175,11 @@ def test_list_compressions_sends_expected_request(self, mocker) -> None: ], ) def test_list_unit_price_sends_expected_request( - self, dataset, schema, mode, mocker + self, + dataset: Union[str, Dataset], + schema: Union[str, Schema], + mode: Union[str, FeedMode], + mocker: MockerFixture, ) -> None: # Arrange mocked_get = mocker.patch("requests.get") @@ -201,12 +211,14 @@ def test_list_unit_price_sends_expected_request( assert isinstance(call["auth"], requests.auth.HTTPBasicAuth) @pytest.mark.skipif(sys.version_info < (3, 8), reason="incompatible mocking") - def test_get_shape_sends_expected_request(self, mocker) -> None: + def test_get_record_count_sends_expected_request( + self, mocker: MockerFixture + ) -> None: # Arrange mocked_get = mocker.patch("requests.get") # Act - self.client.metadata.get_shape( + self.client.metadata.get_record_count( dataset="GLBX.MDP3", symbols=["ESH1"], schema="mbo", @@ -219,7 +231,7 @@ def test_get_shape_sends_expected_request(self, mocker) -> None: call = mocked_get.call_args.kwargs assert ( call["url"] - == f"https://hist.databento.com/v{db.API_VERSION}/metadata.get_shape" + == f"https://hist.databento.com/v{db.API_VERSION}/metadata.get_record_count" ) assert sorted(call["headers"].keys()) == ["accept", "user-agent"] assert call["headers"]["accept"] == "application/json" @@ -240,7 +252,9 @@ def test_get_shape_sends_expected_request(self, mocker) -> None: assert isinstance(call["auth"], requests.auth.HTTPBasicAuth) @pytest.mark.skipif(sys.version_info < (3, 8), reason="incompatible mocking") - def test_get_billable_size_sends_expected_request(self, mocker) -> None: + def test_get_billable_size_sends_expected_request( + self, mocker: MockerFixture + ) -> None: # Arrange mocked_get = mocker.patch("requests.get") @@ -267,10 +281,10 @@ def test_get_billable_size_sends_expected_request(self, mocker) -> None: ) assert call["params"] == [ ("dataset", "glbx.mdp3"), - ("symbols", "ESH1"), - ("schema", "mbo"), ("start", "2020-12-28T12:00:00"), ("end", "2020-12-29T00:00:00"), + ("symbols", "ESH1"), + ("schema", "mbo"), ("stype_in", "native"), ("stype_out", "product_id"), ("limit", "1000000"), @@ -279,7 +293,7 @@ def test_get_billable_size_sends_expected_request(self, mocker) -> None: assert isinstance(call["auth"], requests.auth.HTTPBasicAuth) @pytest.mark.skipif(sys.version_info < (3, 8), reason="incompatible mocking") - def test_get_cost_sends_expected_request(self, mocker) -> None: + def test_get_cost_sends_expected_request(self, mocker: MockerFixture) -> None: # Arrange mocked_get = mocker.patch("requests.get") @@ -306,10 +320,10 @@ def test_get_cost_sends_expected_request(self, mocker) -> None: ) assert call["params"] == [ ("dataset", "glbx.mdp3"), - ("symbols", "ESH1"), - ("schema", "mbo"), ("start", "2020-12-28T12:00:00"), ("end", "2020-12-29T00:00:00"), + ("symbols", "ESH1"), + ("schema", "mbo"), ("stype_in", "native"), ("stype_out", "product_id"), ("limit", "1000000"), diff --git a/tests/test_historical_timeseries.py b/tests/test_historical_timeseries.py index 365890b0..26c62981 100644 --- a/tests/test_historical_timeseries.py +++ b/tests/test_historical_timeseries.py @@ -3,10 +3,11 @@ import databento as db import pytest import requests +from pytest_mock import MockerFixture class TestHistoricalTimeSeries: - def setup(self) -> None: + def setup_method(self) -> None: key = "DUMMY_API_KEY" self.client = db.Historical(key=key) @@ -46,7 +47,7 @@ def test_stream_given_invalid_stype_out_raises_error(self) -> None: ) @pytest.mark.skipif(sys.version_info < (3, 8), reason="incompatible mocking") - def test_stream_sends_expected_request(self, mocker) -> None: + def test_stream_sends_expected_request(self, mocker: MockerFixture) -> None: # Arrange mocked_get = mocker.patch("requests.get") @@ -72,10 +73,10 @@ def test_stream_sends_expected_request(self, mocker) -> None: ) assert call["params"] == [ ("dataset", "glbx.mdp3"), - ("symbols", "ESH1"), - ("schema", "trades"), ("start", "2020-12-28T12:00:00"), ("end", "2020-12-29T00:00:00"), + ("symbols", "ESH1"), + ("schema", "trades"), ("stype_in", "native"), ("stype_out", "product_id"), ("encoding", "dbz"), @@ -84,7 +85,9 @@ def test_stream_sends_expected_request(self, mocker) -> None: assert isinstance(call["auth"], requests.auth.HTTPBasicAuth) @pytest.mark.skipif(sys.version_info < (3, 8), reason="incompatible mocking") - def test_stream_with_limit_sends_expected_request(self, mocker) -> None: + def test_stream_with_limit_sends_expected_request( + self, mocker: MockerFixture + ) -> None: # Arrange mocked_get = mocker.patch("requests.get") @@ -111,10 +114,10 @@ def test_stream_with_limit_sends_expected_request(self, mocker) -> None: ) assert call["params"] == [ ("dataset", "glbx.mdp3"), - ("symbols", "ESH1"), - ("schema", "trades"), ("start", "2020-12-28T12:00:00"), ("end", "2020-12-29T00:00:00"), + ("symbols", "ESH1"), + ("schema", "trades"), ("stype_in", "native"), ("stype_out", "product_id"), ("limit", "1000000"),