From 552e319e023d2db183f5de90290076e8dde2ebf9 Mon Sep 17 00:00:00 2001 From: Paco Valdez Date: Wed, 22 Oct 2025 15:12:01 -0700 Subject: [PATCH 1/4] fix utf8 issue --- .gitignore | 2 + CLAUDE.md | 136 ++++++++++++++++++++++++++++++++++++++++++++ QUICK_REFERENCE.md | 89 +++++++++++++++++++++++++++++ src/cube_dbt/dbt.py | 2 +- 4 files changed, 228 insertions(+), 1 deletion(-) create mode 100644 CLAUDE.md create mode 100644 QUICK_REFERENCE.md diff --git a/.gitignore b/.gitignore index 4b3ab30..c5530d4 100644 --- a/.gitignore +++ b/.gitignore @@ -160,3 +160,5 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ +.DS_Store + diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..efd0745 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,136 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +**cube_dbt** is a Python package that converts dbt models and columns into Cube semantic layer definitions. It parses dbt manifest files and provides Jinja-compatible YAML output for integrating data models with Cube's semantic layer. + +## Common Development Commands + +```bash +# Testing +pdm run test # Run all tests (34 unit tests) +pytest tests/ -v # Run tests with verbose output +pytest tests/test_dbt.py # Run specific test file +pytest -k "test_model" # Run tests matching pattern + +# Development Setup +pdm install # Install project with dev dependencies +pdm install --prod # Install production dependencies only +pdm lock # Update pdm.lock file +pdm update # Update all dependencies + +# Building & Publishing +pdm build # Build distribution packages +pdm publish # Publish to PyPI (requires credentials) + +# Development Workflow +pdm run python -m cube_dbt # Run the module directly +python -c "from cube_dbt import Dbt; print(Dbt.version())" # Check version +``` + +## High-Level Architecture + +The package consists of 4 core classes that work together: + +### Core Classes + +**Dbt (src/cube_dbt/dbt.py)** +- Entry point for loading dbt manifest files +- Supports file paths and URLs via `from_file()` and `from_url()` class methods +- Implements chainable filtering API: `filter(paths=[], tags=[], names=[])` +- Lazy initialization - models are only loaded when accessed +- Handles manifest v1-v12 formats + +**Model (src/cube_dbt/model.py)** +- Represents a single dbt model from the manifest +- Key method: `as_cube()` - exports model as Cube-compatible YAML +- Supports multiple primary keys via column tags +- Provides access to columns, description, database, schema, and alias +- Handles special characters in model names (spaces, dots, dashes) + +**Column (src/cube_dbt/column.py)** +- Represents dbt columns with comprehensive type mapping +- Maps 130+ database-specific types to 5 Cube dimension types: + - string, number, time, boolean, geo +- Database support: BigQuery, Snowflake, Redshift, generic SQL +- Primary key detection via `primary_key` tag in column metadata +- Raises RuntimeError for unknown column types (fail-fast approach) + +**Dump (src/cube_dbt/dump.py)** +- Custom YAML serialization utilities +- Returns Jinja SafeString for template compatibility +- Handles proper indentation for nested structures +- Used internally by Model.as_cube() for output formatting + +### Key Design Patterns + +1. **Lazy Loading**: Models are loaded only when first accessed via `dbt.models` property +2. **Builder Pattern**: Filter methods return self for chaining: `dbt.filter(tags=['tag1']).filter(paths=['path1'])` +3. **Factory Methods**: `Dbt.from_file()` and `Dbt.from_url()` for different data sources +4. **Type Mapping Strategy**: Centralized database type to Cube type conversion in Column class + +### Data Flow + +``` +manifest.json → Dbt.from_file() → filter() → models → Model.as_cube() → YAML output + ↓ + columns → Column.dimension_type() +``` + +## Testing Structure + +Tests use a real dbt manifest fixture (tests/manifest.json, ~397KB) with example models: + +- **test_dbt.py**: Tests manifest loading, filtering by paths/tags/names, version checking +- **test_model.py**: Tests YAML export, primary key handling, special character escaping +- **test_column.py**: Tests type mapping for different databases, primary key detection +- **test_dump.py**: Tests YAML formatting and Jinja compatibility + +Run specific test scenarios: +```bash +pytest tests/test_column.py::TestColumn::test_bigquery_types -v +pytest tests/test_model.py::TestModel::test_multiple_primary_keys -v +``` + +## Important Implementation Details + +### Primary Key Configuration +Primary keys are defined using tags in dbt column metadata: +```yaml +# In dbt schema.yml +columns: + - name: id + meta: + tags: ['primary_key'] +``` + +### Type Mapping Behavior +- Unknown types raise RuntimeError immediately (fail-fast) +- Database-specific types are checked first, then generic SQL types +- Default mappings can be found in `src/cube_dbt/column.py` TYPE_MAP dictionaries + +### Jinja Template Integration +All output from `as_cube()` is wrapped in Jinja SafeString to prevent double-escaping in templates. Use the `safe` filter if needed in templates. + +### URL Loading Authentication +When using `Dbt.from_url()`, basic authentication is supported: +```python +dbt = Dbt.from_url("https://user:pass@example.com/manifest.json") +``` + +## Recent Changes (from git history) + +- Multiple primary key support (#15) +- Documentation of package properties (#14) +- Extended dbt contract data type support (#10) +- Jinja escaping protection for as_cube() (#2) + +## Package Metadata + +- **Version**: Defined in `src/cube_dbt/__init__.py` +- **Python Requirement**: >= 3.8 +- **Production Dependency**: PyYAML >= 6.0.1 +- **License**: MIT +- **Build System**: PDM with PEP 517/518 compliance \ No newline at end of file diff --git a/QUICK_REFERENCE.md b/QUICK_REFERENCE.md new file mode 100644 index 0000000..583782c --- /dev/null +++ b/QUICK_REFERENCE.md @@ -0,0 +1,89 @@ +# cube_dbt Quick Reference + +## What is cube_dbt? +A Python package that converts dbt models and columns into Cube semantic layer definitions. It parses dbt manifests and provides Jinja-compatible YAML output. + +## Install & Run Tests +```bash +pdm install # Set up environment +pdm run test # Run all tests +``` + +## Basic Usage +```python +from cube_dbt import Dbt + +# Load and filter +dbt = Dbt.from_file('manifest.json').filter( + paths=['marts/'], + tags=['cube'], + names=['model_name'] +) + +# Access models +model = dbt.model('my_model') +print(model.name) +print(model.sql_table) +print(model.columns) + +# Export to Cube (YAML) +print(model.as_cube()) +print(model.as_dimensions()) +``` + +## Project Structure +``` +src/cube_dbt/ +├── dbt.py - Dbt class (manifest loading & filtering) +├── model.py - Model class (cube export) +├── column.py - Column class (type mapping) +├── dump.py - YAML utilities (Jinja-safe) +└── __init__.py - Public exports + +tests/ - 34 unit tests, all passing +``` + +## Key Classes + +### Dbt +- `from_file(path)` - Load from JSON +- `from_url(url)` - Load from remote URL +- `filter(paths=[], tags=[], names=[])` - Chainable filtering +- `.models` - Get all filtered models +- `.model(name)` - Get single model + +### Model +- `.name`, `.description`, `.sql_table` - Properties +- `.columns` - List of Column objects +- `.primary_key` - List of primary key columns +- `.as_cube()` - Export as Cube definition (YAML) +- `.as_dimensions()` - Export dimensions (YAML) + +### Column +- `.name`, `.description`, `.type`, `.meta` - Properties +- `.primary_key` - Boolean +- `.as_dimension()` - Export dimension (YAML) + +Type mapping: BigQuery, Snowflake, Redshift → Cube types (number, string, time, boolean, geo) + +## Dependencies +- Production: PyYAML >= 6.0.1 +- Development: pytest >= 7.4.2 +- Python: >= 3.8 + +## Common Tasks +| Task | Command | +|------|---------| +| Run tests | `pdm run test` | +| Run specific test | `pytest tests/test_dbt.py -v` | +| Install deps | `pdm install` | +| Lock deps | `pdm lock` | +| Build package | `pdm build` | + +## Recent Changes +- v0.6.2: Multiple primary keys support +- Type support for dbt contracts +- Jinja template safe rendering + +## Publishing +GitHub Actions auto-publishes to PyPI on release. diff --git a/src/cube_dbt/dbt.py b/src/cube_dbt/dbt.py index d7c7e25..a4350b8 100644 --- a/src/cube_dbt/dbt.py +++ b/src/cube_dbt/dbt.py @@ -14,7 +14,7 @@ def __init__(self, manifest: dict) -> None: @staticmethod def from_file(manifest_path: str) -> 'Dbt': - with open(manifest_path, 'r') as file: + with open(manifest_path, 'r', encoding='utf-8') as file: manifest = json.loads(file.read()) return Dbt(manifest) From d35fed1ff9b8f63f03fba7da849164bb1a511215 Mon Sep 17 00:00:00 2001 From: Paco Valdez Date: Wed, 22 Oct 2025 15:41:40 -0700 Subject: [PATCH 2/4] Support primary keys from dbt tests --- pyproject.toml | 3 +- src/cube_dbt/column.py | 19 +++-- src/cube_dbt/dbt.py | 51 +++++++++++++- src/cube_dbt/model.py | 46 ++++++++++-- tests/manifest.json | 145 +++++++++++++++++++++++++++++++++++++- tests/test_dbt.py | 28 +++++++- tests/test_model.py | 155 +++++++++++++++++++++++++++++++++++++++++ 7 files changed, 432 insertions(+), 15 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index eb180bc..105466f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,11 @@ [project] name = "cube_dbt" -version = "0.6.2" +version = "0.6.3" description = "dbt integration for Cube" authors = [ {name = "Artyom Keydunov", email = "artyom@cube.dev"}, {name = "Igor Lukanin", email = "igor@cube.dev"}, + {name = "Paco Valdez", email = "paco@cube.dev"}, ] dependencies = [ "PyYAML>=6.0.1", diff --git a/src/cube_dbt/column.py b/src/cube_dbt/column.py index 52e8647..ed90faa 100644 --- a/src/cube_dbt/column.py +++ b/src/cube_dbt/column.py @@ -194,9 +194,10 @@ class Column: - def __init__(self, model_name: str, column_dict: dict) -> None: + def __init__(self, model_name: str, column_dict: dict, tests: list = None) -> None: self._model_name = model_name self._column_dict = column_dict + self._tests = tests or [] pass def __repr__(self) -> str: @@ -239,10 +240,20 @@ def meta(self) -> dict: @property def primary_key(self) -> bool: """ - Convention: if the column is marked with the 'primary_key' tag, - it will be mapped to a primary key dimension + Detects if a column is a primary key using multiple methods: + 1. Column tagged with 'primary_key' tag (legacy cube_dbt convention) + 2. Column has both 'unique' and 'not_null' tests (standard dbt convention) + 3. Column has 'primary_key' constraint (checked at model level) """ - return 'primary_key' in self._column_dict['tags'] + # Method 1: Check for 'primary_key' tag (legacy approach) + if 'primary_key' in self._column_dict['tags']: + return True + + # Method 2: Check for unique + not_null tests (standard dbt approach) + if 'unique' in self._tests and 'not_null' in self._tests: + return True + + return False def _as_dimension(self) -> dict: data = {} diff --git a/src/cube_dbt/dbt.py b/src/cube_dbt/dbt.py index a4350b8..44f8fbe 100644 --- a/src/cube_dbt/dbt.py +++ b/src/cube_dbt/dbt.py @@ -10,6 +10,7 @@ def __init__(self, manifest: dict) -> None: self.tags = [] self.names = [] self._models = None + self._test_index = None pass @staticmethod @@ -30,10 +31,58 @@ def filter(self, paths: list[str]=[], tags: list[str]=[], names: list[str]=[]) - self.names = names return self + def _build_test_index(self): + """ + Build an index of tests by model and column for efficient lookup. + Returns a dict like: + { + 'model_name': { + 'column_name': ['unique', 'not_null', ...] + } + } + """ + if self._test_index is not None: + return + + self._test_index = {} + + for key, node in self.manifest.get('nodes', {}).items(): + if node.get('resource_type') != 'test': + continue + + test_metadata = node.get('test_metadata') + if not test_metadata: + continue + + test_name = test_metadata.get('name') + kwargs = test_metadata.get('kwargs', {}) + column_name = kwargs.get('column_name') + + if not test_name or not column_name: + continue + + # Get the model this test depends on + depends_on = node.get('depends_on', {}).get('nodes', []) + for dep in depends_on: + if dep.startswith('model.'): + # Extract model name from unique_id like "model.project.model_name" + model_name = dep.split('.')[-1] + + if model_name not in self._test_index: + self._test_index[model_name] = {} + + if column_name not in self._test_index[model_name]: + self._test_index[model_name][column_name] = [] + + self._test_index[model_name][column_name].append(test_name) + def _init_models(self): if self._models == None: + # Build test index first + self._build_test_index() + self._models = list( - Model(node) for key, node in self.manifest['nodes'].items() + Model(node, self._test_index.get(node['name'], {})) for key, node in self.manifest['nodes'].items() if node['resource_type'] == 'model' and node['config']['materialized'] != 'ephemeral' and (any(node['path'].startswith(path) for path in self.paths) if self.paths else True) and diff --git a/src/cube_dbt/model.py b/src/cube_dbt/model.py index 4e3ff3b..9c85684 100644 --- a/src/cube_dbt/model.py +++ b/src/cube_dbt/model.py @@ -2,10 +2,12 @@ from cube_dbt.dump import dump, SafeString class Model: - def __init__(self, model_dict: dict) -> None: + def __init__(self, model_dict: dict, test_index: dict = None) -> None: self._model_dict = model_dict + self._test_index = test_index or {} self._columns = None self._primary_key = None + self._constraint_primary_keys = None pass def __repr__(self) -> str: @@ -14,15 +16,47 @@ def __repr__(self) -> str: def _init_columns(self) -> None: if self._columns == None: self._columns = list( - Column(self.name, column) for key, column in self._model_dict['columns'].items() + Column(self.name, column, self._test_index.get(column['name'], [])) + for key, column in self._model_dict['columns'].items() ) + self._detect_constraint_primary_keys() self._detect_primary_key() + def _detect_constraint_primary_keys(self) -> None: + """ + Detect primary keys defined via dbt 1.5+ constraints. + Example: constraints: [{type: 'primary_key', columns: ['id']}] + """ + self._constraint_primary_keys = [] + constraints = self._model_dict.get('constraints', []) + + for constraint in constraints: + if constraint.get('type') == 'primary_key': + columns = constraint.get('columns', []) + self._constraint_primary_keys.extend(columns) + def _detect_primary_key(self) -> None: - self._primary_key = list( - column for column in self._columns - if column.primary_key - ) + """ + Detect primary keys from multiple sources: + 1. Constraint-based primary keys (dbt 1.5+) + 2. Column tags (legacy cube_dbt approach) + 3. unique + not_null tests (standard dbt approach) + """ + primary_keys = [] + + # First check for constraint-based primary keys + if self._constraint_primary_keys: + for column in self._columns: + if column.name in self._constraint_primary_keys: + primary_keys.append(column) + else: + # Fall back to tag-based or test-based detection + primary_keys = [ + column for column in self._columns + if column.primary_key + ] + + self._primary_key = primary_keys @property def name(self) -> str: diff --git a/tests/manifest.json b/tests/manifest.json index a8839c8..affd550 100644 --- a/tests/manifest.json +++ b/tests/manifest.json @@ -390,7 +390,35 @@ "cube" ], "description": "", - "columns": {}, + "columns": { + "id": { + "name": "id", + "description": "Product ID", + "meta": {}, + "data_type": "integer", + "constraints": [], + "quote": null, + "tags": [] + }, + "name": { + "name": "name", + "description": "Product name", + "meta": {}, + "data_type": "string", + "constraints": [], + "quote": null, + "tags": [] + }, + "price": { + "name": "price", + "description": "Product price", + "meta": {}, + "data_type": "numeric", + "constraints": [], + "quote": null, + "tags": [] + } + }, "meta": {}, "group": null, "docs": { @@ -428,10 +456,123 @@ "checksum": null }, "access": "protected", - "constraints": [], + "constraints": [ + { + "type": "primary_key", + "columns": [ + "id" + ] + } + ], "version": null, "latest_version": null, "deprecation_date": null + }, + "test.jaffle_shop.unique_orders_copy_id": { + "database": "ecom", + "schema": "public_dbt_test__audit", + "name": "unique_orders_copy_id", + "resource_type": "test", + "package_name": "jaffle_shop", + "path": "unique_orders_copy_id.sql", + "original_file_path": "models/schema.yml", + "unique_id": "test.jaffle_shop.unique_orders_copy_id", + "fqn": [ + "jaffle_shop", + "unique_orders_copy_id" + ], + "alias": "unique_orders_copy_id", + "checksum": { + "name": "none", + "checksum": "" + }, + "config": { + "enabled": true, + "alias": null, + "schema": "dbt_test__audit", + "database": null, + "tags": [], + "meta": {}, + "materialized": "test", + "severity": "ERROR", + "store_failures": null, + "where": null, + "limit": null, + "fail_calc": "count(*)", + "warn_if": "!= 0", + "error_if": "!= 0" + }, + "tags": [], + "description": "", + "columns": {}, + "meta": {}, + "depends_on": { + "macros": [], + "nodes": [ + "model.jaffle_shop.orders_copy" + ] + }, + "test_metadata": { + "name": "unique", + "kwargs": { + "column_name": "id", + "model": "{{ get_where_subquery(ref('orders_copy')) }}" + }, + "namespace": null + } + }, + "test.jaffle_shop.not_null_orders_copy_id": { + "database": "ecom", + "schema": "public_dbt_test__audit", + "name": "not_null_orders_copy_id", + "resource_type": "test", + "package_name": "jaffle_shop", + "path": "not_null_orders_copy_id.sql", + "original_file_path": "models/schema.yml", + "unique_id": "test.jaffle_shop.not_null_orders_copy_id", + "fqn": [ + "jaffle_shop", + "not_null_orders_copy_id" + ], + "alias": "not_null_orders_copy_id", + "checksum": { + "name": "none", + "checksum": "" + }, + "config": { + "enabled": true, + "alias": null, + "schema": "dbt_test__audit", + "database": null, + "tags": [], + "meta": {}, + "materialized": "test", + "severity": "ERROR", + "store_failures": null, + "where": null, + "limit": null, + "fail_calc": "count(*)", + "warn_if": "!= 0", + "error_if": "!= 0" + }, + "tags": [], + "description": "", + "columns": {}, + "meta": {}, + "depends_on": { + "macros": [], + "nodes": [ + "model.jaffle_shop.orders_copy" + ] + }, + "test_metadata": { + "name": "not_null", + "kwargs": { + "column_name": "id", + "model": "{{ get_where_subquery(ref('orders_copy')) }}" + }, + "namespace": null + } } }, "sources": {}, diff --git a/tests/test_dbt.py b/tests/test_dbt.py index 24c1f9b..531eded 100644 --- a/tests/test_dbt.py +++ b/tests/test_dbt.py @@ -188,4 +188,30 @@ def test_model(self): } } dbt = Dbt(manifest) - assert dbt.model('users_copy_2').name == 'users_copy_2' \ No newline at end of file + assert dbt.model('users_copy_2').name == 'users_copy_2' + + def test_primary_key_detection_from_tests(self): + """ + Integration test: Load manifest with test nodes and verify + primary key detection from unique+not_null tests + """ + directory_path = os.path.dirname(os.path.realpath(__file__)) + dbt = Dbt.from_file(directory_path + '/manifest.json') + + # orders_copy should have id as primary key (from unique+not_null tests) + orders_model = dbt.model('orders_copy') + assert len(orders_model.primary_key) == 1 + assert orders_model.primary_key[0].name == 'id' + + def test_primary_key_detection_from_constraints(self): + """ + Integration test: Load manifest and verify primary key detection + from model constraints + """ + directory_path = os.path.dirname(os.path.realpath(__file__)) + dbt = Dbt.from_file(directory_path + '/manifest.json') + + # products_copy should have id as primary key (from constraints) + products_model = dbt.model('products_copy') + assert len(products_model.primary_key) == 1 + assert products_model.primary_key[0].name == 'id' \ No newline at end of file diff --git a/tests/test_model.py b/tests/test_model.py index 60f086a..3b1f721 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -320,3 +320,158 @@ def test_as_dimensions_render_two_primary_keys(self): primary_key: true """ ) + + def test_detect_primary_key_from_tests(self): + """ + If a column has both unique and not_null tests, + it should be detected as a primary key + """ + model_dict = { + 'name': 'model', + 'columns': { + 'id': { + 'name': 'id', + 'data_type': 'numeric', + 'tags': [] + }, + 'status': { + 'name': 'status', + 'data_type': None, + 'tags': [] + } + } + } + test_index = { + 'id': ['unique', 'not_null'], + 'status': [] + } + model = Model(model_dict, test_index) + assert len(model.primary_key) == 1 + assert model.primary_key[0].name == "id" + + def test_detect_primary_key_from_tests_multiple(self): + """ + Multiple columns with unique+not_null tests should all be detected + """ + model_dict = { + 'name': 'model', + 'columns': { + 'id': { + 'name': 'id', + 'data_type': 'numeric', + 'tags': [] + }, + 'account_id': { + 'name': 'account_id', + 'data_type': 'numeric', + 'tags': [] + } + } + } + test_index = { + 'id': ['unique', 'not_null'], + 'account_id': ['unique', 'not_null'] + } + model = Model(model_dict, test_index) + assert len(model.primary_key) == 2 + assert model.primary_key[0].name == "id" + assert model.primary_key[1].name == "account_id" + + def test_detect_primary_key_from_constraints(self): + """ + If a model has constraints with type='primary_key', + those columns should be detected as primary keys + """ + model_dict = { + 'name': 'model', + 'constraints': [ + { + 'type': 'primary_key', + 'columns': ['id'] + } + ], + 'columns': { + 'id': { + 'name': 'id', + 'data_type': 'numeric', + 'tags': [] + }, + 'status': { + 'name': 'status', + 'data_type': None, + 'tags': [] + } + } + } + model = Model(model_dict) + assert len(model.primary_key) == 1 + assert model.primary_key[0].name == "id" + + def test_detect_primary_key_from_constraints_composite(self): + """ + Composite primary keys from constraints should be detected + """ + model_dict = { + 'name': 'model', + 'constraints': [ + { + 'type': 'primary_key', + 'columns': ['customer_id', 'order_id'] + } + ], + 'columns': { + 'customer_id': { + 'name': 'customer_id', + 'data_type': 'numeric', + 'tags': [] + }, + 'order_id': { + 'name': 'order_id', + 'data_type': 'numeric', + 'tags': [] + }, + 'status': { + 'name': 'status', + 'data_type': None, + 'tags': [] + } + } + } + model = Model(model_dict) + assert len(model.primary_key) == 2 + assert model.primary_key[0].name == "customer_id" + assert model.primary_key[1].name == "order_id" + + def test_primary_key_priority_constraints_over_tests(self): + """ + Constraint-based primary keys should take priority over test-based detection + """ + model_dict = { + 'name': 'model', + 'constraints': [ + { + 'type': 'primary_key', + 'columns': ['id'] + } + ], + 'columns': { + 'id': { + 'name': 'id', + 'data_type': 'numeric', + 'tags': [] + }, + 'account_id': { + 'name': 'account_id', + 'data_type': 'numeric', + 'tags': [] + } + } + } + test_index = { + 'id': ['unique', 'not_null'], + 'account_id': ['unique', 'not_null'] + } + model = Model(model_dict, test_index) + # Should only detect id from constraints, not account_id from tests + assert len(model.primary_key) == 1 + assert model.primary_key[0].name == "id" From d5f12e9360dd0d25667a9173143e72ad016c9e4a Mon Sep 17 00:00:00 2001 From: Paco Valdez Date: Wed, 22 Oct 2025 15:47:17 -0700 Subject: [PATCH 3/4] Added orjson for perf --- pdm.lock | 89 ++++++++++++++++++++++++++++++++++++++++++++- pyproject.toml | 1 + src/cube_dbt/dbt.py | 26 ++++++++++--- 3 files changed, 109 insertions(+), 7 deletions(-) diff --git a/pdm.lock b/pdm.lock index 92c121c..a8b161e 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "test"] strategy = ["cross_platform"] lock_version = "4.5.0" -content_hash = "sha256:a589f1f5c7d1c0e5d8aebda64d675370839c23a9245f624cb9e694c86e77a473" +content_hash = "sha256:626fa5c8b3f826f87dc7c098c37f2aa564c08d970be1dc473a6db7efe4c7f232" [[metadata.targets]] requires_python = ">=3.8" @@ -40,6 +40,93 @@ files = [ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] +[[package]] +name = "orjson" +version = "3.10.15" +requires_python = ">=3.8" +summary = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" +files = [ + {file = "orjson-3.10.15-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:552c883d03ad185f720d0c09583ebde257e41b9521b74ff40e08b7dec4559c04"}, + {file = "orjson-3.10.15-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:616e3e8d438d02e4854f70bfdc03a6bcdb697358dbaa6bcd19cbe24d24ece1f8"}, + {file = "orjson-3.10.15-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7c2c79fa308e6edb0ffab0a31fd75a7841bf2a79a20ef08a3c6e3b26814c8ca8"}, + {file = "orjson-3.10.15-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:73cb85490aa6bf98abd20607ab5c8324c0acb48d6da7863a51be48505646c814"}, + {file = "orjson-3.10.15-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:763dadac05e4e9d2bc14938a45a2d0560549561287d41c465d3c58aec818b164"}, + {file = "orjson-3.10.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a330b9b4734f09a623f74a7490db713695e13b67c959713b78369f26b3dee6bf"}, + {file = "orjson-3.10.15-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a61a4622b7ff861f019974f73d8165be1bd9a0855e1cad18ee167acacabeb061"}, + {file = "orjson-3.10.15-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:acd271247691574416b3228db667b84775c497b245fa275c6ab90dc1ffbbd2b3"}, + {file = "orjson-3.10.15-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:e4759b109c37f635aa5c5cc93a1b26927bfde24b254bcc0e1149a9fada253d2d"}, + {file = "orjson-3.10.15-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:9e992fd5cfb8b9f00bfad2fd7a05a4299db2bbe92e6440d9dd2fab27655b3182"}, + {file = "orjson-3.10.15-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f95fb363d79366af56c3f26b71df40b9a583b07bbaaf5b317407c4d58497852e"}, + {file = "orjson-3.10.15-cp310-cp310-win32.whl", hash = "sha256:f9875f5fea7492da8ec2444839dcc439b0ef298978f311103d0b7dfd775898ab"}, + {file = "orjson-3.10.15-cp310-cp310-win_amd64.whl", hash = "sha256:17085a6aa91e1cd70ca8533989a18b5433e15d29c574582f76f821737c8d5806"}, + {file = "orjson-3.10.15-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:c4cc83960ab79a4031f3119cc4b1a1c627a3dc09df125b27c4201dff2af7eaa6"}, + {file = "orjson-3.10.15-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ddbeef2481d895ab8be5185f2432c334d6dec1f5d1933a9c83014d188e102cef"}, + {file = "orjson-3.10.15-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9e590a0477b23ecd5b0ac865b1b907b01b3c5535f5e8a8f6ab0e503efb896334"}, + {file = "orjson-3.10.15-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a6be38bd103d2fd9bdfa31c2720b23b5d47c6796bcb1d1b598e3924441b4298d"}, + {file = "orjson-3.10.15-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ff4f6edb1578960ed628a3b998fa54d78d9bb3e2eb2cfc5c2a09732431c678d0"}, + {file = "orjson-3.10.15-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b0482b21d0462eddd67e7fce10b89e0b6ac56570424662b685a0d6fccf581e13"}, + {file = "orjson-3.10.15-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bb5cc3527036ae3d98b65e37b7986a918955f85332c1ee07f9d3f82f3a6899b5"}, + {file = "orjson-3.10.15-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d569c1c462912acdd119ccbf719cf7102ea2c67dd03b99edcb1a3048651ac96b"}, + {file = "orjson-3.10.15-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:1e6d33efab6b71d67f22bf2962895d3dc6f82a6273a965fab762e64fa90dc399"}, + {file = "orjson-3.10.15-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:c33be3795e299f565681d69852ac8c1bc5c84863c0b0030b2b3468843be90388"}, + {file = "orjson-3.10.15-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:eea80037b9fae5339b214f59308ef0589fc06dc870578b7cce6d71eb2096764c"}, + {file = "orjson-3.10.15-cp311-cp311-win32.whl", hash = "sha256:d5ac11b659fd798228a7adba3e37c010e0152b78b1982897020a8e019a94882e"}, + {file = "orjson-3.10.15-cp311-cp311-win_amd64.whl", hash = "sha256:cf45e0214c593660339ef63e875f32ddd5aa3b4adc15e662cdb80dc49e194f8e"}, + {file = "orjson-3.10.15-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:9d11c0714fc85bfcf36ada1179400862da3288fc785c30e8297844c867d7505a"}, + {file = "orjson-3.10.15-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dba5a1e85d554e3897fa9fe6fbcff2ed32d55008973ec9a2b992bd9a65d2352d"}, + {file = "orjson-3.10.15-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7723ad949a0ea502df656948ddd8b392780a5beaa4c3b5f97e525191b102fff0"}, + {file = "orjson-3.10.15-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6fd9bc64421e9fe9bd88039e7ce8e58d4fead67ca88e3a4014b143cec7684fd4"}, + {file = "orjson-3.10.15-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dadba0e7b6594216c214ef7894c4bd5f08d7c0135f4dd0145600be4fbcc16767"}, + {file = "orjson-3.10.15-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b48f59114fe318f33bbaee8ebeda696d8ccc94c9e90bc27dbe72153094e26f41"}, + {file = "orjson-3.10.15-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:035fb83585e0f15e076759b6fedaf0abb460d1765b6a36f48018a52858443514"}, + {file = "orjson-3.10.15-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d13b7fe322d75bf84464b075eafd8e7dd9eae05649aa2a5354cfa32f43c59f17"}, + {file = "orjson-3.10.15-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:7066b74f9f259849629e0d04db6609db4cf5b973248f455ba5d3bd58a4daaa5b"}, + {file = "orjson-3.10.15-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:88dc3f65a026bd3175eb157fea994fca6ac7c4c8579fc5a86fc2114ad05705b7"}, + {file = "orjson-3.10.15-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b342567e5465bd99faa559507fe45e33fc76b9fb868a63f1642c6bc0735ad02a"}, + {file = "orjson-3.10.15-cp312-cp312-win32.whl", hash = "sha256:0a4f27ea5617828e6b58922fdbec67b0aa4bb844e2d363b9244c47fa2180e665"}, + {file = "orjson-3.10.15-cp312-cp312-win_amd64.whl", hash = "sha256:ef5b87e7aa9545ddadd2309efe6824bd3dd64ac101c15dae0f2f597911d46eaa"}, + {file = "orjson-3.10.15-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:bae0e6ec2b7ba6895198cd981b7cca95d1487d0147c8ed751e5632ad16f031a6"}, + {file = "orjson-3.10.15-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f93ce145b2db1252dd86af37d4165b6faa83072b46e3995ecc95d4b2301b725a"}, + {file = "orjson-3.10.15-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7c203f6f969210128af3acae0ef9ea6aab9782939f45f6fe02d05958fe761ef9"}, + {file = "orjson-3.10.15-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8918719572d662e18b8af66aef699d8c21072e54b6c82a3f8f6404c1f5ccd5e0"}, + {file = "orjson-3.10.15-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f71eae9651465dff70aa80db92586ad5b92df46a9373ee55252109bb6b703307"}, + {file = "orjson-3.10.15-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e117eb299a35f2634e25ed120c37c641398826c2f5a3d3cc39f5993b96171b9e"}, + {file = "orjson-3.10.15-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:13242f12d295e83c2955756a574ddd6741c81e5b99f2bef8ed8d53e47a01e4b7"}, + {file = "orjson-3.10.15-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7946922ada8f3e0b7b958cc3eb22cfcf6c0df83d1fe5521b4a100103e3fa84c8"}, + {file = "orjson-3.10.15-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:b7155eb1623347f0f22c38c9abdd738b287e39b9982e1da227503387b81b34ca"}, + {file = "orjson-3.10.15-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:208beedfa807c922da4e81061dafa9c8489c6328934ca2a562efa707e049e561"}, + {file = "orjson-3.10.15-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eca81f83b1b8c07449e1d6ff7074e82e3fd6777e588f1a6632127f286a968825"}, + {file = "orjson-3.10.15-cp313-cp313-win32.whl", hash = "sha256:c03cd6eea1bd3b949d0d007c8d57049aa2b39bd49f58b4b2af571a5d3833d890"}, + {file = "orjson-3.10.15-cp313-cp313-win_amd64.whl", hash = "sha256:fd56a26a04f6ba5fb2045b0acc487a63162a958ed837648c5781e1fe3316cfbf"}, + {file = "orjson-3.10.15-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:5e8afd6200e12771467a1a44e5ad780614b86abb4b11862ec54861a82d677746"}, + {file = "orjson-3.10.15-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da9a18c500f19273e9e104cca8c1f0b40a6470bcccfc33afcc088045d0bf5ea6"}, + {file = "orjson-3.10.15-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bb00b7bfbdf5d34a13180e4805d76b4567025da19a197645ca746fc2fb536586"}, + {file = "orjson-3.10.15-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:33aedc3d903378e257047fee506f11e0833146ca3e57a1a1fb0ddb789876c1e1"}, + {file = "orjson-3.10.15-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dd0099ae6aed5eb1fc84c9eb72b95505a3df4267e6962eb93cdd5af03be71c98"}, + {file = "orjson-3.10.15-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c864a80a2d467d7786274fce0e4f93ef2a7ca4ff31f7fc5634225aaa4e9e98c"}, + {file = "orjson-3.10.15-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c25774c9e88a3e0013d7d1a6c8056926b607a61edd423b50eb5c88fd7f2823ae"}, + {file = "orjson-3.10.15-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:e78c211d0074e783d824ce7bb85bf459f93a233eb67a5b5003498232ddfb0e8a"}, + {file = "orjson-3.10.15-cp38-cp38-musllinux_1_2_armv7l.whl", hash = "sha256:43e17289ffdbbac8f39243916c893d2ae41a2ea1a9cbb060a56a4d75286351ae"}, + {file = "orjson-3.10.15-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:781d54657063f361e89714293c095f506c533582ee40a426cb6489c48a637b81"}, + {file = "orjson-3.10.15-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:6875210307d36c94873f553786a808af2788e362bd0cf4c8e66d976791e7b528"}, + {file = "orjson-3.10.15-cp38-cp38-win32.whl", hash = "sha256:305b38b2b8f8083cc3d618927d7f424349afce5975b316d33075ef0f73576b60"}, + {file = "orjson-3.10.15-cp38-cp38-win_amd64.whl", hash = "sha256:5dd9ef1639878cc3efffed349543cbf9372bdbd79f478615a1c633fe4e4180d1"}, + {file = "orjson-3.10.15-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:ffe19f3e8d68111e8644d4f4e267a069ca427926855582ff01fc012496d19969"}, + {file = "orjson-3.10.15-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d433bf32a363823863a96561a555227c18a522a8217a6f9400f00ddc70139ae2"}, + {file = "orjson-3.10.15-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:da03392674f59a95d03fa5fb9fe3a160b0511ad84b7a3914699ea5a1b3a38da2"}, + {file = "orjson-3.10.15-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3a63bb41559b05360ded9132032239e47983a39b151af1201f07ec9370715c82"}, + {file = "orjson-3.10.15-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3766ac4702f8f795ff3fa067968e806b4344af257011858cc3d6d8721588b53f"}, + {file = "orjson-3.10.15-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a1c73dcc8fadbd7c55802d9aa093b36878d34a3b3222c41052ce6b0fc65f8e8"}, + {file = "orjson-3.10.15-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b299383825eafe642cbab34be762ccff9fd3408d72726a6b2a4506d410a71ab3"}, + {file = "orjson-3.10.15-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:abc7abecdbf67a173ef1316036ebbf54ce400ef2300b4e26a7b843bd446c2480"}, + {file = "orjson-3.10.15-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:3614ea508d522a621384c1d6639016a5a2e4f027f3e4a1c93a51867615d28829"}, + {file = "orjson-3.10.15-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:295c70f9dc154307777ba30fe29ff15c1bcc9dfc5c48632f37d20a607e9ba85a"}, + {file = "orjson-3.10.15-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:63309e3ff924c62404923c80b9e2048c1f74ba4b615e7584584389ada50ed428"}, + {file = "orjson-3.10.15-cp39-cp39-win32.whl", hash = "sha256:a2f708c62d026fb5340788ba94a55c23df4e1869fec74be455e0b2f5363b8507"}, + {file = "orjson-3.10.15-cp39-cp39-win_amd64.whl", hash = "sha256:efcf6c735c3d22ef60c4aa27a5238f1a477df85e9b15f2142f9d669beb2d13fd"}, + {file = "orjson-3.10.15.tar.gz", hash = "sha256:05ca7fe452a2e9d8d9d706a2984c95b9c2ebc5db417ce0b7a49b91d50642a23e"}, +] + [[package]] name = "packaging" version = "23.1" diff --git a/pyproject.toml b/pyproject.toml index 105466f..a4da81c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,6 +9,7 @@ authors = [ ] dependencies = [ "PyYAML>=6.0.1", + "orjson>=3.10.15", ] requires-python = ">=3.8" readme = "README.md" diff --git a/src/cube_dbt/dbt.py b/src/cube_dbt/dbt.py index 44f8fbe..1a0559c 100644 --- a/src/cube_dbt/dbt.py +++ b/src/cube_dbt/dbt.py @@ -1,4 +1,10 @@ -import json +try: + import orjson as json + # orjson.loads() requires bytes, returns dict + _USE_ORJSON = True +except ImportError: + import json + _USE_ORJSON = False from urllib.request import urlopen from cube_dbt.model import Model @@ -15,15 +21,23 @@ def __init__(self, manifest: dict) -> None: @staticmethod def from_file(manifest_path: str) -> 'Dbt': - with open(manifest_path, 'r', encoding='utf-8') as file: - manifest = json.loads(file.read()) - return Dbt(manifest) + if _USE_ORJSON: + with open(manifest_path, 'rb') as file: + manifest = json.loads(file.read()) + else: + with open(manifest_path, 'r', encoding='utf-8') as file: + manifest = json.load(file) + return Dbt(manifest) @staticmethod def from_url(manifest_url: str) -> 'Dbt': with urlopen(manifest_url) as file: - manifest = json.loads(file.read()) - return Dbt(manifest) + data = file.read() + if _USE_ORJSON: + manifest = json.loads(data) + else: + manifest = json.loads(data.decode('utf-8')) + return Dbt(manifest) def filter(self, paths: list[str]=[], tags: list[str]=[], names: list[str]=[]) -> 'Dbt': self.paths = paths From a86d25472bf47365865a98752d0fb356f343f579 Mon Sep 17 00:00:00 2001 From: Francisco Valdez de la Fuente Date: Wed, 22 Oct 2025 15:50:57 -0700 Subject: [PATCH 4/4] Update QUICK_REFERENCE.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- QUICK_REFERENCE.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/QUICK_REFERENCE.md b/QUICK_REFERENCE.md index 583782c..d9aad90 100644 --- a/QUICK_REFERENCE.md +++ b/QUICK_REFERENCE.md @@ -67,7 +67,8 @@ tests/ - 34 unit tests, all passing Type mapping: BigQuery, Snowflake, Redshift → Cube types (number, string, time, boolean, geo) ## Dependencies -- Production: PyYAML >= 6.0.1 +- Production: PyYAML >= 6.0.1, orjson >= 3.10.15 + - Note: orjson is used for fast JSON parsing. If unavailable, the package may fall back to standard libraries. - Development: pytest >= 7.4.2 - Python: >= 3.8