diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f72bc5a15..4efaba0c5 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -148,10 +148,11 @@ The suites marked `[not documented]` require additional configuration which will SQLAlchemy provides reusable tests for testing dialect implementations. -To run these tests, assuming the environment variables needed for e2e tests are set: +To run these tests, assuming the environment variables needed for e2e tests are set, do the following: ``` -poetry run python -m pytest tests/sqlalchemy_dialect_compliance --dburi \ +cd src/databricks/sqlalchemy +poetry run python -m pytest test/sqlalchemy_dialect_compliance.py --dburi \ "databricks://token:$access_token@$host?http_path=$http_path&catalog=$catalog&schema=$schema" ``` diff --git a/examples/sqlalchemy.py b/examples/sqlalchemy.py index 2d6b796bc..650fb293b 100644 --- a/examples/sqlalchemy.py +++ b/examples/sqlalchemy.py @@ -34,7 +34,7 @@ # Known Gaps - MAP, ARRAY, and STRUCT types: this dialect can read these types out as strings. But you cannot - define a SQLAlchemy model with databricks.sqlalchemy.dialect.types.DatabricksMap (e.g.) because + define a SQLAlchemy model with databricks.sqlalchemy.types.DatabricksMap (e.g.) because we haven't implemented them yet. - Constraints: with the addition of information_schema to Unity Catalog, Databricks SQL supports foreign key and primary key constraints. This dialect can write these constraints but the ability diff --git a/poetry.lock b/poetry.lock index 432907c58..44046b2bf 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2,13 +2,13 @@ [[package]] name = "alembic" -version = "1.11.2" +version = "1.12.0" description = "A database migration tool for SQLAlchemy." optional = false python-versions = ">=3.7" files = [ - {file = "alembic-1.11.2-py3-none-any.whl", hash = "sha256:7981ab0c4fad4fe1be0cf183aae17689fe394ff874fd2464adb774396faf0796"}, - {file = "alembic-1.11.2.tar.gz", hash = "sha256:678f662130dc540dac12de0ea73de9f89caea9dbea138f60ef6263149bf84657"}, + {file = "alembic-1.12.0-py3-none-any.whl", hash = "sha256:03226222f1cf943deee6c85d9464261a6c710cd19b4fe867a3ad1f25afda610f"}, + {file = "alembic-1.12.0.tar.gz", hash = "sha256:8e7645c32e4f200675e69f0745415335eb59a3663f5feb487abfa0b30c45888b"}, ] [package.dependencies] @@ -172,13 +172,13 @@ files = [ [[package]] name = "click" -version = "8.1.6" +version = "8.1.7" description = "Composable command line interface toolkit" optional = false python-versions = ">=3.7" files = [ - {file = "click-8.1.6-py3-none-any.whl", hash = "sha256:fa244bb30b3b5ee2cae3da8f55c9e5e0c0e86093306301fb418eb9dc40fbded5"}, - {file = "click-8.1.6.tar.gz", hash = "sha256:48ee849951919527a045bfe3bf7baa8a959c423134e1a5b98c05c20ba75a1cbd"}, + {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, + {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, ] [package.dependencies] @@ -223,13 +223,13 @@ files = [ [[package]] name = "exceptiongroup" -version = "1.1.2" +version = "1.1.3" description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" files = [ - {file = "exceptiongroup-1.1.2-py3-none-any.whl", hash = "sha256:e346e69d186172ca7cf029c8c1d16235aa0e04035e5750b4b95039e65204328f"}, - {file = "exceptiongroup-1.1.2.tar.gz", hash = "sha256:12c3e887d6485d16943a309616de20ae5582633e0a2eda17f4e10fd61c1e8af5"}, + {file = "exceptiongroup-1.1.3-py3-none-any.whl", hash = "sha256:343280667a4585d195ca1cf9cef84a4e178c4b6cf2274caef9859782b567d5e3"}, + {file = "exceptiongroup-1.1.3.tar.gz", hash = "sha256:097acd85d473d75af5bb98e41b61ff7fe35efe6675e4f9370ec6ec5126d160e9"}, ] [package.extras] @@ -571,34 +571,35 @@ files = [ [[package]] name = "mypy" -version = "0.950" +version = "0.981" description = "Optional static typing for Python" optional = false -python-versions = ">=3.6" +python-versions = ">=3.7" files = [ - {file = "mypy-0.950-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cf9c261958a769a3bd38c3e133801ebcd284ffb734ea12d01457cb09eacf7d7b"}, - {file = "mypy-0.950-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b5b5bd0ffb11b4aba2bb6d31b8643902c48f990cc92fda4e21afac658044f0c0"}, - {file = "mypy-0.950-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5e7647df0f8fc947388e6251d728189cfadb3b1e558407f93254e35abc026e22"}, - {file = "mypy-0.950-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:eaff8156016487c1af5ffa5304c3e3fd183edcb412f3e9c72db349faf3f6e0eb"}, - {file = "mypy-0.950-cp310-cp310-win_amd64.whl", hash = "sha256:563514c7dc504698fb66bb1cf897657a173a496406f1866afae73ab5b3cdb334"}, - {file = "mypy-0.950-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:dd4d670eee9610bf61c25c940e9ade2d0ed05eb44227275cce88701fee014b1f"}, - {file = "mypy-0.950-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ca75ecf2783395ca3016a5e455cb322ba26b6d33b4b413fcdedfc632e67941dc"}, - {file = "mypy-0.950-cp36-cp36m-win_amd64.whl", hash = "sha256:6003de687c13196e8a1243a5e4bcce617d79b88f83ee6625437e335d89dfebe2"}, - {file = "mypy-0.950-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4c653e4846f287051599ed8f4b3c044b80e540e88feec76b11044ddc5612ffed"}, - {file = "mypy-0.950-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e19736af56947addedce4674c0971e5dceef1b5ec7d667fe86bcd2b07f8f9075"}, - {file = "mypy-0.950-cp37-cp37m-win_amd64.whl", hash = "sha256:ef7beb2a3582eb7a9f37beaf38a28acfd801988cde688760aea9e6cc4832b10b"}, - {file = "mypy-0.950-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:0112752a6ff07230f9ec2f71b0d3d4e088a910fdce454fdb6553e83ed0eced7d"}, - {file = "mypy-0.950-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ee0a36edd332ed2c5208565ae6e3a7afc0eabb53f5327e281f2ef03a6bc7687a"}, - {file = "mypy-0.950-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:77423570c04aca807508a492037abbd72b12a1fb25a385847d191cd50b2c9605"}, - {file = "mypy-0.950-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:5ce6a09042b6da16d773d2110e44f169683d8cc8687e79ec6d1181a72cb028d2"}, - {file = "mypy-0.950-cp38-cp38-win_amd64.whl", hash = "sha256:5b231afd6a6e951381b9ef09a1223b1feabe13625388db48a8690f8daa9b71ff"}, - {file = "mypy-0.950-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:0384d9f3af49837baa92f559d3fa673e6d2652a16550a9ee07fc08c736f5e6f8"}, - {file = "mypy-0.950-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1fdeb0a0f64f2a874a4c1f5271f06e40e1e9779bf55f9567f149466fc7a55038"}, - {file = "mypy-0.950-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:61504b9a5ae166ba5ecfed9e93357fd51aa693d3d434b582a925338a2ff57fd2"}, - {file = "mypy-0.950-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a952b8bc0ae278fc6316e6384f67bb9a396eb30aced6ad034d3a76120ebcc519"}, - {file = "mypy-0.950-cp39-cp39-win_amd64.whl", hash = "sha256:eaea21d150fb26d7b4856766e7addcf929119dd19fc832b22e71d942835201ef"}, - {file = "mypy-0.950-py3-none-any.whl", hash = "sha256:a4d9898f46446bfb6405383b57b96737dcfd0a7f25b748e78ef3e8c576bba3cb"}, - {file = "mypy-0.950.tar.gz", hash = "sha256:1b333cfbca1762ff15808a0ef4f71b5d3eed8528b23ea1c3fb50543c867d68de"}, + {file = "mypy-0.981-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:4bc460e43b7785f78862dab78674e62ec3cd523485baecfdf81a555ed29ecfa0"}, + {file = "mypy-0.981-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:756fad8b263b3ba39e4e204ee53042671b660c36c9017412b43af210ddee7b08"}, + {file = "mypy-0.981-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a16a0145d6d7d00fbede2da3a3096dcc9ecea091adfa8da48fa6a7b75d35562d"}, + {file = "mypy-0.981-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce65f70b14a21fdac84c294cde75e6dbdabbcff22975335e20827b3b94bdbf49"}, + {file = "mypy-0.981-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6e35d764784b42c3e256848fb8ed1d4292c9fc0098413adb28d84974c095b279"}, + {file = "mypy-0.981-cp310-cp310-win_amd64.whl", hash = "sha256:e53773073c864d5f5cec7f3fc72fbbcef65410cde8cc18d4f7242dea60dac52e"}, + {file = "mypy-0.981-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6ee196b1d10b8b215e835f438e06965d7a480f6fe016eddbc285f13955cca659"}, + {file = "mypy-0.981-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ad21d4c9d3673726cf986ea1d0c9fb66905258709550ddf7944c8f885f208be"}, + {file = "mypy-0.981-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d1debb09043e1f5ee845fa1e96d180e89115b30e47c5d3ce53bc967bab53f62d"}, + {file = "mypy-0.981-cp37-cp37m-win_amd64.whl", hash = "sha256:9f362470a3480165c4c6151786b5379351b790d56952005be18bdbdd4c7ce0ae"}, + {file = "mypy-0.981-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:c9e0efb95ed6ca1654951bd5ec2f3fa91b295d78bf6527e026529d4aaa1e0c30"}, + {file = "mypy-0.981-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e178eaffc3c5cd211a87965c8c0df6da91ed7d258b5fc72b8e047c3771317ddb"}, + {file = "mypy-0.981-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:06e1eac8d99bd404ed8dd34ca29673c4346e76dd8e612ea507763dccd7e13c7a"}, + {file = "mypy-0.981-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa38f82f53e1e7beb45557ff167c177802ba7b387ad017eab1663d567017c8ee"}, + {file = "mypy-0.981-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:64e1f6af81c003f85f0dfed52db632817dabb51b65c0318ffbf5ff51995bbb08"}, + {file = "mypy-0.981-cp38-cp38-win_amd64.whl", hash = "sha256:e1acf62a8c4f7c092462c738aa2c2489e275ed386320c10b2e9bff31f6f7e8d6"}, + {file = "mypy-0.981-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b6ede64e52257931315826fdbfc6ea878d89a965580d1a65638ef77cb551f56d"}, + {file = "mypy-0.981-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eb3978b191b9fa0488524bb4ffedf2c573340e8c2b4206fc191d44c7093abfb7"}, + {file = "mypy-0.981-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:77f8fcf7b4b3cc0c74fb33ae54a4cd00bb854d65645c48beccf65fa10b17882c"}, + {file = "mypy-0.981-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f64d2ce043a209a297df322eb4054dfbaa9de9e8738291706eaafda81ab2b362"}, + {file = "mypy-0.981-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2ee3dbc53d4df7e6e3b1c68ac6a971d3a4fb2852bf10a05fda228721dd44fae1"}, + {file = "mypy-0.981-cp39-cp39-win_amd64.whl", hash = "sha256:8e8e49aa9cc23aa4c926dc200ce32959d3501c4905147a66ce032f05cb5ecb92"}, + {file = "mypy-0.981-py3-none-any.whl", hash = "sha256:794f385653e2b749387a42afb1e14c2135e18daeb027e0d97162e4b7031210f8"}, + {file = "mypy-0.981.tar.gz", hash = "sha256:ad77c13037d3402fbeffda07d51e3f228ba078d1c7096a73759c9419ea031bf4"}, ] [package.dependencies] @@ -935,6 +936,47 @@ files = [ [package.dependencies] numpy = ">=1.16.6" +[[package]] +name = "pyarrow" +version = "13.0.0" +description = "Python library for Apache Arrow" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pyarrow-13.0.0-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:1afcc2c33f31f6fb25c92d50a86b7a9f076d38acbcb6f9e74349636109550148"}, + {file = "pyarrow-13.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:70fa38cdc66b2fc1349a082987f2b499d51d072faaa6b600f71931150de2e0e3"}, + {file = "pyarrow-13.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cd57b13a6466822498238877892a9b287b0a58c2e81e4bdb0b596dbb151cbb73"}, + {file = "pyarrow-13.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8ce69f7bf01de2e2764e14df45b8404fc6f1a5ed9871e8e08a12169f87b7a26"}, + {file = "pyarrow-13.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:588f0d2da6cf1b1680974d63be09a6530fd1bd825dc87f76e162404779a157dc"}, + {file = "pyarrow-13.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:6241afd72b628787b4abea39e238e3ff9f34165273fad306c7acf780dd850956"}, + {file = "pyarrow-13.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:fda7857e35993673fcda603c07d43889fca60a5b254052a462653f8656c64f44"}, + {file = "pyarrow-13.0.0-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:aac0ae0146a9bfa5e12d87dda89d9ef7c57a96210b899459fc2f785303dcbb67"}, + {file = "pyarrow-13.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d7759994217c86c161c6a8060509cfdf782b952163569606bb373828afdd82e8"}, + {file = "pyarrow-13.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:868a073fd0ff6468ae7d869b5fc1f54de5c4255b37f44fb890385eb68b68f95d"}, + {file = "pyarrow-13.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51be67e29f3cfcde263a113c28e96aa04362ed8229cb7c6e5f5c719003659d33"}, + {file = "pyarrow-13.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:d1b4e7176443d12610874bb84d0060bf080f000ea9ed7c84b2801df851320295"}, + {file = "pyarrow-13.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:69b6f9a089d116a82c3ed819eea8fe67dae6105f0d81eaf0fdd5e60d0c6e0944"}, + {file = "pyarrow-13.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:ab1268db81aeb241200e321e220e7cd769762f386f92f61b898352dd27e402ce"}, + {file = "pyarrow-13.0.0-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:ee7490f0f3f16a6c38f8c680949551053c8194e68de5046e6c288e396dccee80"}, + {file = "pyarrow-13.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e3ad79455c197a36eefbd90ad4aa832bece7f830a64396c15c61a0985e337287"}, + {file = "pyarrow-13.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68fcd2dc1b7d9310b29a15949cdd0cb9bc34b6de767aff979ebf546020bf0ba0"}, + {file = "pyarrow-13.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc6fd330fd574c51d10638e63c0d00ab456498fc804c9d01f2a61b9264f2c5b2"}, + {file = "pyarrow-13.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:e66442e084979a97bb66939e18f7b8709e4ac5f887e636aba29486ffbf373763"}, + {file = "pyarrow-13.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:0f6eff839a9e40e9c5610d3ff8c5bdd2f10303408312caf4c8003285d0b49565"}, + {file = "pyarrow-13.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:8b30a27f1cddf5c6efcb67e598d7823a1e253d743d92ac32ec1eb4b6a1417867"}, + {file = "pyarrow-13.0.0-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:09552dad5cf3de2dc0aba1c7c4b470754c69bd821f5faafc3d774bedc3b04bb7"}, + {file = "pyarrow-13.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3896ae6c205d73ad192d2fc1489cd0edfab9f12867c85b4c277af4d37383c18c"}, + {file = "pyarrow-13.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6647444b21cb5e68b593b970b2a9a07748dd74ea457c7dadaa15fd469c48ada1"}, + {file = "pyarrow-13.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47663efc9c395e31d09c6aacfa860f4473815ad6804311c5433f7085415d62a7"}, + {file = "pyarrow-13.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:b9ba6b6d34bd2563345488cf444510588ea42ad5613df3b3509f48eb80250afd"}, + {file = "pyarrow-13.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:d00d374a5625beeb448a7fa23060df79adb596074beb3ddc1838adb647b6ef09"}, + {file = "pyarrow-13.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:c51afd87c35c8331b56f796eff954b9c7f8d4b7fef5903daf4e05fcf017d23a8"}, + {file = "pyarrow-13.0.0.tar.gz", hash = "sha256:83333726e83ed44b0ac94d8d7a21bbdee4a05029c3b1e8db58a863eec8fd8a33"}, +] + +[package.dependencies] +numpy = ">=1.16.6" + [[package]] name = "pylint" version = "2.13.9" @@ -961,13 +1003,13 @@ testutil = ["gitpython (>3)"] [[package]] name = "pytest" -version = "7.4.0" +version = "7.4.2" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.7" files = [ - {file = "pytest-7.4.0-py3-none-any.whl", hash = "sha256:78bf16451a2eb8c7a2ea98e32dc119fd2aa758f1d5d66dbf0a59d69a3969df32"}, - {file = "pytest-7.4.0.tar.gz", hash = "sha256:b4bf8c45bd59934ed84001ad51e11b4ee40d40a1229d2c79f9c592b0a3f6bd8a"}, + {file = "pytest-7.4.2-py3-none-any.whl", hash = "sha256:1d881c6124e08ff0a1bb75ba3ec0bfd8b5354a01c194ddd5a0a870a48d99b002"}, + {file = "pytest-7.4.2.tar.gz", hash = "sha256:a766259cfab564a2ad52cb1aae1b881a75c3eb7e34ca3779697c23ed47c47069"}, ] [package.dependencies] @@ -1027,13 +1069,13 @@ cli = ["click (>=5.0)"] [[package]] name = "pytz" -version = "2023.3" +version = "2023.3.post1" description = "World timezone definitions, modern and historical" optional = false python-versions = "*" files = [ - {file = "pytz-2023.3-py2.py3-none-any.whl", hash = "sha256:a151b3abb88eda1d4e34a9814df37de2a80e301e68ba0fd856fb9b46bfbbbffb"}, - {file = "pytz-2023.3.tar.gz", hash = "sha256:1d8ce29db189191fb55338ee6d0387d82ab59f3d00eac103412d64e0ebd0c588"}, + {file = "pytz-2023.3.post1-py2.py3-none-any.whl", hash = "sha256:ce42d816b81b68506614c11e8937d3aa9e41007ceb50bfdcb0749b921bf646c7"}, + {file = "pytz-2023.3.post1.tar.gz", hash = "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b"}, ] [[package]] @@ -1259,13 +1301,13 @@ files = [ [[package]] name = "urllib3" -version = "2.0.4" +version = "2.0.5" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.7" files = [ - {file = "urllib3-2.0.4-py3-none-any.whl", hash = "sha256:de7df1803967d2c2a98e4b11bb7d6bd9210474c46e8a0401514e3a42a75ebde4"}, - {file = "urllib3-2.0.4.tar.gz", hash = "sha256:8d22f86aae8ef5e410d4f539fde9ce6b2113a001bb4d189e0aed70642d602b11"}, + {file = "urllib3-2.0.5-py3-none-any.whl", hash = "sha256:ef16afa8ba34a1f989db38e1dbbe0c302e4289a47856990d0682e374563ce35e"}, + {file = "urllib3-2.0.5.tar.gz", hash = "sha256:13abf37382ea2ce6fb744d4dad67838eec857c9f4f57009891805e0b5e123594"}, ] [package.extras] @@ -1376,4 +1418,4 @@ testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more [metadata] lock-version = "2.0" python-versions = "^3.7.1" -content-hash = "8e61ec31838813ee794b06670a32118e6089b13c99b86a8da6850066cedbac2c" +content-hash = "f0408e32f442f13bdabd8013cadcd7993c8d6ff33a9eaa3f1dba0d33be4ce98e" diff --git a/pyproject.toml b/pyproject.toml index a250274d9..fcf5a41cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ urllib3 = ">=1.0" [tool.poetry.dev-dependencies] pytest = "^7.1.2" -mypy = "^0.950" +mypy = "^0.981" pylint = ">=2.12.0" black = "^22.3.0" pytest-dotenv = "^0.5.2" @@ -62,5 +62,3 @@ log_cli = "false" log_cli_level = "INFO" testpaths = ["tests"] env_files = ["test.env"] -addopts = "--ignore=tests/sqlalchemy_dialect_compliance" - diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 8cc37d920..000000000 --- a/setup.cfg +++ /dev/null @@ -1,4 +0,0 @@ - -[sqla_testing] -requirement_cls=databricks.sqlalchemy.dialect.requirements:Requirements -profile_file=profiles.txt diff --git a/src/databricks/sql/__init__.py b/src/databricks/sql/__init__.py index 918bd47a6..713a7dbb8 100644 --- a/src/databricks/sql/__init__.py +++ b/src/databricks/sql/__init__.py @@ -5,7 +5,7 @@ # PEP 249 module globals apilevel = "2.0" threadsafety = 1 # Threads may share the module, but not connections. -paramstyle = "pyformat" # Python extended format codes, e.g. ...WHERE name=%(name)s +paramstyle = "named" # Python extended format codes, e.g. ...WHERE name=%(name)s class DBAPITypeObject(object): diff --git a/src/databricks/sqlalchemy/__init__.py b/src/databricks/sqlalchemy/__init__.py index 1df1e1d4d..b2eaa3785 100644 --- a/src/databricks/sqlalchemy/__init__.py +++ b/src/databricks/sqlalchemy/__init__.py @@ -1 +1,341 @@ -from databricks.sqlalchemy.dialect import DatabricksDialect +"""This module's layout loosely follows example of SQLAlchemy's postgres dialect +""" + +import decimal, re, datetime +from dateutil.parser import parse + +import sqlalchemy +from sqlalchemy import types, event +from sqlalchemy.engine import default, Engine +from sqlalchemy.exc import DatabaseError, SQLAlchemyError +from sqlalchemy.engine import reflection + +from databricks import sql + + +from databricks.sqlalchemy.base import ( + DatabricksDDLCompiler, + DatabricksIdentifierPreparer, +) +from databricks.sqlalchemy.compiler import DatabricksTypeCompiler + +try: + import alembic +except ImportError: + pass +else: + from alembic.ddl import DefaultImpl + + class DatabricksImpl(DefaultImpl): + __dialect__ = "databricks" + + +class DatabricksDecimal(types.TypeDecorator): + """Translates strings to decimals""" + + impl = types.DECIMAL + + def process_result_value(self, value, dialect): + if value is not None: + return decimal.Decimal(value) + else: + return None + + +class DatabricksTimestamp(types.TypeDecorator): + """Translates timestamp strings to datetime objects""" + + impl = types.TIMESTAMP + + def process_result_value(self, value, dialect): + return value + + def adapt(self, impltype, **kwargs): + return self.impl + + +class DatabricksDate(types.TypeDecorator): + """Translates date strings to date objects""" + + impl = types.DATE + + def process_result_value(self, value, dialect): + return value + + def adapt(self, impltype, **kwargs): + return self.impl + + +class DatabricksDialect(default.DefaultDialect): + """This dialect implements only those methods required to pass our e2e tests""" + + # Possible attributes are defined here: https://docs.sqlalchemy.org/en/14/core/internals.html#sqlalchemy.engine.Dialect + name: str = "databricks" + driver: str = "databricks" + default_schema_name: str = "default" + + preparer = DatabricksIdentifierPreparer # type: ignore + type_compiler = DatabricksTypeCompiler + ddl_compiler = DatabricksDDLCompiler + supports_statement_cache: bool = True + supports_multivalues_insert: bool = True + supports_native_decimal: bool = True + supports_sane_rowcount: bool = False + non_native_boolean_check_constraint: bool = False + paramstyle: str = "named" + + @classmethod + def dbapi(cls): + return sql + + def create_connect_args(self, url): + # TODO: can schema be provided after HOST? + # Expected URI format is: databricks+thrift://token:dapi***@***.cloud.databricks.com?http_path=/sql/*** + + kwargs = { + "server_hostname": url.host, + "access_token": url.password, + "http_path": url.query.get("http_path"), + "catalog": url.query.get("catalog"), + "schema": url.query.get("schema"), + } + + self.schema = kwargs["schema"] + self.catalog = kwargs["catalog"] + + return [], kwargs + + def get_columns(self, connection, table_name, schema=None, **kwargs): + """Return information about columns in `table_name`. + + Given a :class:`_engine.Connection`, a string + `table_name`, and an optional string `schema`, return column + information as a list of dictionaries with these keys: + + name + the column's name + + type + [sqlalchemy.types#TypeEngine] + + nullable + boolean + + default + the column's default value + + autoincrement + boolean + + sequence + a dictionary of the form + {'name' : str, 'start' :int, 'increment': int, 'minvalue': int, + 'maxvalue': int, 'nominvalue': bool, 'nomaxvalue': bool, + 'cycle': bool, 'cache': int, 'order': bool} + + Additional column attributes may be present. + """ + + _type_map = { + "boolean": types.Boolean, + "smallint": types.SmallInteger, + "int": types.Integer, + "bigint": types.BigInteger, + "float": types.Float, + "double": types.Float, + "string": types.String, + "varchar": types.String, + "char": types.String, + "binary": types.String, + "array": types.String, + "map": types.String, + "struct": types.String, + "uniontype": types.String, + "decimal": DatabricksDecimal, + "timestamp": DatabricksTimestamp, + "date": DatabricksDate, + } + + with self.get_connection_cursor(connection) as cur: + resp = cur.columns( + catalog_name=self.catalog, + schema_name=schema or self.schema, + table_name=table_name, + ).fetchall() + + columns = [] + + for col in resp: + + # Taken from PyHive. This removes added type info from decimals and maps + _col_type = re.search(r"^\w+", col.TYPE_NAME).group(0) + this_column = { + "name": col.COLUMN_NAME, + "type": _type_map[_col_type.lower()], + "nullable": bool(col.NULLABLE), + "default": col.COLUMN_DEF, + "autoincrement": False if col.IS_AUTO_INCREMENT == "NO" else True, + } + columns.append(this_column) + + return columns + + def get_pk_constraint(self, connection, table_name, schema=None, **kw): + """Return information about the primary key constraint on + table_name`. + + Given a :class:`_engine.Connection`, a string + `table_name`, and an optional string `schema`, return primary + key information as a dictionary with these keys: + + constrained_columns + a list of column names that make up the primary key + + name + optional name of the primary key constraint. + + """ + # TODO: implement this behaviour + return {"constrained_columns": []} + + def get_foreign_keys(self, connection, table_name, schema=None, **kw): + """Return information about foreign_keys in `table_name`. + + Given a :class:`_engine.Connection`, a string + `table_name`, and an optional string `schema`, return foreign + key information as a list of dicts with these keys: + + name + the constraint's name + + constrained_columns + a list of column names that make up the foreign key + + referred_schema + the name of the referred schema + + referred_table + the name of the referred table + + referred_columns + a list of column names in the referred table that correspond to + constrained_columns + """ + # TODO: Implement this behaviour + return [] + + def get_indexes(self, connection, table_name, schema=None, **kw): + """Return information about indexes in `table_name`. + + Given a :class:`_engine.Connection`, a string + `table_name` and an optional string `schema`, return index + information as a list of dictionaries with these keys: + + name + the index's name + + column_names + list of column names in order + + unique + boolean + """ + # TODO: Implement this behaviour + return [] + + def get_table_names(self, connection, schema=None, **kwargs): + TABLE_NAME = 1 + with self.get_connection_cursor(connection) as cur: + sql_str = "SHOW TABLES FROM {}".format( + ".".join([self.catalog, schema or self.schema]) + ) + data = cur.execute(sql_str).fetchall() + _tables = [i[TABLE_NAME] for i in data] + + return _tables + + def get_view_names(self, connection, schema=None, **kwargs): + VIEW_NAME = 1 + with self.get_connection_cursor(connection) as cur: + sql_str = "SHOW VIEWS FROM {}".format( + ".".join([self.catalog, schema or self.schema]) + ) + data = cur.execute(sql_str).fetchall() + _tables = [i[VIEW_NAME] for i in data] + + return _tables + + def do_rollback(self, dbapi_connection): + # Databricks SQL Does not support transactions + pass + + def has_table( + self, connection, table_name, schema=None, catalog=None, **kwargs + ) -> bool: + """SQLAlchemy docstrings say dialect providers must implement this method""" + + _schema = schema or self.schema + _catalog = catalog or self.catalog + + # DBR >12.x uses underscores in error messages + DBR_LTE_12_NOT_FOUND_STRING = "Table or view not found" + DBR_GT_12_NOT_FOUND_STRING = "TABLE_OR_VIEW_NOT_FOUND" + + try: + res = connection.execute( + f"DESCRIBE TABLE {_catalog}.{_schema}.{table_name}" + ) + return True + except DatabaseError as e: + if DBR_GT_12_NOT_FOUND_STRING in str( + e + ) or DBR_LTE_12_NOT_FOUND_STRING in str(e): + return False + else: + raise e + + def get_connection_cursor(self, connection): + """Added for backwards compatibility with 1.3.x""" + if hasattr(connection, "_dbapi_connection"): + return connection._dbapi_connection.dbapi_connection.cursor() + elif hasattr(connection, "raw_connection"): + return connection.raw_connection().cursor() + elif hasattr(connection, "connection"): + return connection.connection.cursor() + + raise SQLAlchemyError( + "Databricks dialect can't obtain a cursor context manager from the dbapi" + ) + + @reflection.cache + def get_schema_names(self, connection, **kw): + # Equivalent to SHOW DATABASES + + # TODO: replace with call to cursor.schemas() once its performance matches raw SQL + return [row[0] for row in connection.execute("SHOW SCHEMAS")] + + +@event.listens_for(Engine, "do_connect") +def receive_do_connect(dialect, conn_rec, cargs, cparams): + """Helpful for DS on traffic from clients using SQLAlchemy in particular""" + + # Ignore connect invocations that don't use our dialect + if not dialect.name == "databricks": + return + + if "_user_agent_entry" in cparams: + new_user_agent = f"sqlalchemy + {cparams['_user_agent_entry']}" + else: + new_user_agent = "sqlalchemy" + + cparams["_user_agent_entry"] = new_user_agent + + if sqlalchemy.__version__.startswith("1.3"): + # SQLAlchemy 1.3.x fails to parse the http_path, catalog, and schema from our connection string + # These should be passed in as connect_args when building the Engine + + if "schema" in cparams: + dialect.schema = cparams["schema"] + + if "catalog" in cparams: + dialect.catalog = cparams["catalog"] diff --git a/src/databricks/sqlalchemy/dialect/base.py b/src/databricks/sqlalchemy/base.py similarity index 100% rename from src/databricks/sqlalchemy/dialect/base.py rename to src/databricks/sqlalchemy/base.py diff --git a/src/databricks/sqlalchemy/dialect/compiler.py b/src/databricks/sqlalchemy/compiler.py similarity index 100% rename from src/databricks/sqlalchemy/dialect/compiler.py rename to src/databricks/sqlalchemy/compiler.py diff --git a/src/databricks/sqlalchemy/dialect/__init__.py b/src/databricks/sqlalchemy/dialect/__init__.py deleted file mode 100644 index d2acac897..000000000 --- a/src/databricks/sqlalchemy/dialect/__init__.py +++ /dev/null @@ -1,340 +0,0 @@ -"""This module's layout loosely follows example of SQLAlchemy's postgres dialect -""" - -import decimal, re, datetime -from dateutil.parser import parse - -import sqlalchemy -from sqlalchemy import types, event -from sqlalchemy.engine import default, Engine -from sqlalchemy.exc import DatabaseError, SQLAlchemyError -from sqlalchemy.engine import reflection - -from databricks import sql - - -from databricks.sqlalchemy.dialect.base import ( - DatabricksDDLCompiler, - DatabricksIdentifierPreparer, -) -from databricks.sqlalchemy.dialect.compiler import DatabricksTypeCompiler - -try: - import alembic -except ImportError: - pass -else: - from alembic.ddl import DefaultImpl - - class DatabricksImpl(DefaultImpl): - __dialect__ = "databricks" - - -class DatabricksDecimal(types.TypeDecorator): - """Translates strings to decimals""" - - impl = types.DECIMAL - - def process_result_value(self, value, dialect): - if value is not None: - return decimal.Decimal(value) - else: - return None - - -class DatabricksTimestamp(types.TypeDecorator): - """Translates timestamp strings to datetime objects""" - - impl = types.TIMESTAMP - - def process_result_value(self, value, dialect): - return value - - def adapt(self, impltype, **kwargs): - return self.impl - - -class DatabricksDate(types.TypeDecorator): - """Translates date strings to date objects""" - - impl = types.DATE - - def process_result_value(self, value, dialect): - return value - - def adapt(self, impltype, **kwargs): - return self.impl - - -class DatabricksDialect(default.DefaultDialect): - """This dialect implements only those methods required to pass our e2e tests""" - - # Possible attributes are defined here: https://docs.sqlalchemy.org/en/14/core/internals.html#sqlalchemy.engine.Dialect - name: str = "databricks" - driver: str = "databricks" - default_schema_name: str = "default" - - preparer = DatabricksIdentifierPreparer # type: ignore - type_compiler = DatabricksTypeCompiler - ddl_compiler = DatabricksDDLCompiler - supports_statement_cache: bool = True - supports_multivalues_insert: bool = True - supports_native_decimal: bool = True - supports_sane_rowcount: bool = False - non_native_boolean_check_constraint: bool = False - - @classmethod - def dbapi(cls): - return sql - - def create_connect_args(self, url): - # TODO: can schema be provided after HOST? - # Expected URI format is: databricks+thrift://token:dapi***@***.cloud.databricks.com?http_path=/sql/*** - - kwargs = { - "server_hostname": url.host, - "access_token": url.password, - "http_path": url.query.get("http_path"), - "catalog": url.query.get("catalog"), - "schema": url.query.get("schema"), - } - - self.schema = kwargs["schema"] - self.catalog = kwargs["catalog"] - - return [], kwargs - - def get_columns(self, connection, table_name, schema=None, **kwargs): - """Return information about columns in `table_name`. - - Given a :class:`_engine.Connection`, a string - `table_name`, and an optional string `schema`, return column - information as a list of dictionaries with these keys: - - name - the column's name - - type - [sqlalchemy.types#TypeEngine] - - nullable - boolean - - default - the column's default value - - autoincrement - boolean - - sequence - a dictionary of the form - {'name' : str, 'start' :int, 'increment': int, 'minvalue': int, - 'maxvalue': int, 'nominvalue': bool, 'nomaxvalue': bool, - 'cycle': bool, 'cache': int, 'order': bool} - - Additional column attributes may be present. - """ - - _type_map = { - "boolean": types.Boolean, - "smallint": types.SmallInteger, - "int": types.Integer, - "bigint": types.BigInteger, - "float": types.Float, - "double": types.Float, - "string": types.String, - "varchar": types.String, - "char": types.String, - "binary": types.String, - "array": types.String, - "map": types.String, - "struct": types.String, - "uniontype": types.String, - "decimal": DatabricksDecimal, - "timestamp": DatabricksTimestamp, - "date": DatabricksDate, - } - - with self.get_connection_cursor(connection) as cur: - resp = cur.columns( - catalog_name=self.catalog, - schema_name=schema or self.schema, - table_name=table_name, - ).fetchall() - - columns = [] - - for col in resp: - - # Taken from PyHive. This removes added type info from decimals and maps - _col_type = re.search(r"^\w+", col.TYPE_NAME).group(0) - this_column = { - "name": col.COLUMN_NAME, - "type": _type_map[_col_type.lower()], - "nullable": bool(col.NULLABLE), - "default": col.COLUMN_DEF, - "autoincrement": False if col.IS_AUTO_INCREMENT == "NO" else True, - } - columns.append(this_column) - - return columns - - def get_pk_constraint(self, connection, table_name, schema=None, **kw): - """Return information about the primary key constraint on - table_name`. - - Given a :class:`_engine.Connection`, a string - `table_name`, and an optional string `schema`, return primary - key information as a dictionary with these keys: - - constrained_columns - a list of column names that make up the primary key - - name - optional name of the primary key constraint. - - """ - # TODO: implement this behaviour - return {"constrained_columns": []} - - def get_foreign_keys(self, connection, table_name, schema=None, **kw): - """Return information about foreign_keys in `table_name`. - - Given a :class:`_engine.Connection`, a string - `table_name`, and an optional string `schema`, return foreign - key information as a list of dicts with these keys: - - name - the constraint's name - - constrained_columns - a list of column names that make up the foreign key - - referred_schema - the name of the referred schema - - referred_table - the name of the referred table - - referred_columns - a list of column names in the referred table that correspond to - constrained_columns - """ - # TODO: Implement this behaviour - return [] - - def get_indexes(self, connection, table_name, schema=None, **kw): - """Return information about indexes in `table_name`. - - Given a :class:`_engine.Connection`, a string - `table_name` and an optional string `schema`, return index - information as a list of dictionaries with these keys: - - name - the index's name - - column_names - list of column names in order - - unique - boolean - """ - # TODO: Implement this behaviour - return [] - - def get_table_names(self, connection, schema=None, **kwargs): - TABLE_NAME = 1 - with self.get_connection_cursor(connection) as cur: - sql_str = "SHOW TABLES FROM {}".format( - ".".join([self.catalog, schema or self.schema]) - ) - data = cur.execute(sql_str).fetchall() - _tables = [i[TABLE_NAME] for i in data] - - return _tables - - def get_view_names(self, connection, schema=None, **kwargs): - VIEW_NAME = 1 - with self.get_connection_cursor(connection) as cur: - sql_str = "SHOW VIEWS FROM {}".format( - ".".join([self.catalog, schema or self.schema]) - ) - data = cur.execute(sql_str).fetchall() - _tables = [i[VIEW_NAME] for i in data] - - return _tables - - def do_rollback(self, dbapi_connection): - # Databricks SQL Does not support transactions - pass - - def has_table( - self, connection, table_name, schema=None, catalog=None, **kwargs - ) -> bool: - """SQLAlchemy docstrings say dialect providers must implement this method""" - - _schema = schema or self.schema - _catalog = catalog or self.catalog - - # DBR >12.x uses underscores in error messages - DBR_LTE_12_NOT_FOUND_STRING = "Table or view not found" - DBR_GT_12_NOT_FOUND_STRING = "TABLE_OR_VIEW_NOT_FOUND" - - try: - res = connection.execute( - f"DESCRIBE TABLE {_catalog}.{_schema}.{table_name}" - ) - return True - except DatabaseError as e: - if DBR_GT_12_NOT_FOUND_STRING in str( - e - ) or DBR_LTE_12_NOT_FOUND_STRING in str(e): - return False - else: - raise e - - def get_connection_cursor(self, connection): - """Added for backwards compatibility with 1.3.x""" - if hasattr(connection, "_dbapi_connection"): - return connection._dbapi_connection.dbapi_connection.cursor() - elif hasattr(connection, "raw_connection"): - return connection.raw_connection().cursor() - elif hasattr(connection, "connection"): - return connection.connection.cursor() - - raise SQLAlchemyError( - "Databricks dialect can't obtain a cursor context manager from the dbapi" - ) - - @reflection.cache - def get_schema_names(self, connection, **kw): - # Equivalent to SHOW DATABASES - - # TODO: replace with call to cursor.schemas() once its performance matches raw SQL - return [row[0] for row in connection.execute("SHOW SCHEMAS")] - - -@event.listens_for(Engine, "do_connect") -def receive_do_connect(dialect, conn_rec, cargs, cparams): - """Helpful for DS on traffic from clients using SQLAlchemy in particular""" - - # Ignore connect invocations that don't use our dialect - if not dialect.name == "databricks": - return - - if "_user_agent_entry" in cparams: - new_user_agent = f"sqlalchemy + {cparams['_user_agent_entry']}" - else: - new_user_agent = "sqlalchemy" - - cparams["_user_agent_entry"] = new_user_agent - - if sqlalchemy.__version__.startswith("1.3"): - # SQLAlchemy 1.3.x fails to parse the http_path, catalog, and schema from our connection string - # These should be passed in as connect_args when building the Engine - - if "schema" in cparams: - dialect.schema = cparams["schema"] - - if "catalog" in cparams: - dialect.catalog = cparams["catalog"] diff --git a/src/databricks/sqlalchemy/dialect/requirements.py b/src/databricks/sqlalchemy/requirements.py similarity index 100% rename from src/databricks/sqlalchemy/dialect/requirements.py rename to src/databricks/sqlalchemy/requirements.py diff --git a/src/databricks/sqlalchemy/setup.cfg b/src/databricks/sqlalchemy/setup.cfg new file mode 100644 index 000000000..ab89d17db --- /dev/null +++ b/src/databricks/sqlalchemy/setup.cfg @@ -0,0 +1,4 @@ + +[sqla_testing] +requirement_cls=databricks.sqlalchemy.requirements:Requirements +profile_file=profiles.txt diff --git a/tests/sqlalchemy_dialect_compliance/conftest.py b/src/databricks/sqlalchemy/test/conftest.py similarity index 100% rename from tests/sqlalchemy_dialect_compliance/conftest.py rename to src/databricks/sqlalchemy/test/conftest.py diff --git a/tests/sqlalchemy_dialect_compliance/test_dialect_compliance.py b/src/databricks/sqlalchemy/test/test_suite.py similarity index 99% rename from tests/sqlalchemy_dialect_compliance/test_dialect_compliance.py rename to src/databricks/sqlalchemy/test/test_suite.py index a5ac2ad3b..7a840404b 100644 --- a/tests/sqlalchemy_dialect_compliance/test_dialect_compliance.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -1,3 +1,4 @@ +# type: ignore from sqlalchemy.testing.suite import * import pytest diff --git a/tests/e2e/sqlalchemy/test_basic.py b/tests/e2e/sqlalchemy/test_basic.py index f17828eb1..c730b21e4 100644 --- a/tests/e2e/sqlalchemy/test_basic.py +++ b/tests/e2e/sqlalchemy/test_basic.py @@ -115,6 +115,7 @@ def test_connect_args(db_engine): @pytest.mark.skipif(sqlalchemy_1_3(), reason="Pandas requires SQLAlchemy >= 1.4") +@pytest.mark.skip(reason="DBR is currently limited to 256 parameters per call to .execute(). Test cannot pass.") def test_pandas_upload(db_engine, metadata_obj): import pandas as pd @@ -170,7 +171,8 @@ def test_bulk_insert_with_core(db_engine, metadata_obj, session): import random - num_to_insert = random.choice(range(10_000, 20_000)) + # Maximum number of parameter is 256. 256/4 == 64 + num_to_insert = 64 table_name = "PySQLTest_{}".format(datetime.datetime.utcnow().strftime("%s")) @@ -181,7 +183,7 @@ def test_bulk_insert_with_core(db_engine, metadata_obj, session): ) rows = [ - {"name": names[i % 3], "number": random.choice(range(10000))} + {"name": names[i % 3], "number": random.choice(range(64))} for i in range(num_to_insert) ]