From a5b0eb4cf27fc05f3b971fed091b0a663058aef2 Mon Sep 17 00:00:00 2001 From: David Gallagher Date: Sun, 20 Nov 2022 15:21:49 -0500 Subject: [PATCH] feat(backends/mssql): add backend support for Microsoft Sql Server --- .github/workflows/ibis-backends.yml | 46 ++++++--- ci/schema/mssql.sql | 74 ++++++++++++++ docker-compose.yml | 19 ++++ ibis/backends/conftest.py | 8 +- ibis/backends/mssql/__init__.py | 39 ++++++++ ibis/backends/mssql/compiler.py | 35 +++++++ ibis/backends/mssql/registry.py | 124 ++++++++++++++++++++++++ ibis/backends/mssql/tests/__init__.py | 0 ibis/backends/mssql/tests/conftest.py | 106 ++++++++++++++++++++ ibis/backends/tests/test_aggregation.py | 58 ++++++++--- ibis/backends/tests/test_api.py | 7 +- ibis/backends/tests/test_array.py | 4 +- ibis/backends/tests/test_client.py | 24 ++--- ibis/backends/tests/test_column.py | 2 + ibis/backends/tests/test_dot_sql.py | 19 ++-- ibis/backends/tests/test_generic.py | 51 +++++++--- ibis/backends/tests/test_json.py | 2 +- ibis/backends/tests/test_map.py | 1 + ibis/backends/tests/test_numeric.py | 47 +++++++-- ibis/backends/tests/test_param.py | 8 +- ibis/backends/tests/test_pretty.py | 8 +- ibis/backends/tests/test_set_ops.py | 4 +- ibis/backends/tests/test_string.py | 80 ++++++++++----- ibis/backends/tests/test_struct.py | 2 +- ibis/backends/tests/test_temporal.py | 72 +++++++++----- ibis/backends/tests/test_timecontext.py | 1 + ibis/backends/tests/test_window.py | 4 + poetry-overrides.nix | 5 + poetry.lock | 74 +++++++++++++- pyproject.toml | 4 + shell.nix | 1 + 31 files changed, 781 insertions(+), 148 deletions(-) create mode 100644 ci/schema/mssql.sql create mode 100644 ibis/backends/mssql/__init__.py create mode 100644 ibis/backends/mssql/compiler.py create mode 100644 ibis/backends/mssql/registry.py create mode 100644 ibis/backends/mssql/tests/__init__.py create mode 100644 ibis/backends/mssql/tests/conftest.py diff --git a/.github/workflows/ibis-backends.yml b/.github/workflows/ibis-backends.yml index 39750a0e0991..d18e62d6a791 100644 --- a/.github/workflows/ibis-backends.yml +++ b/.github/workflows/ibis-backends.yml @@ -32,6 +32,8 @@ jobs: test_backends: name: ${{ matrix.backend.title }} ${{ matrix.os }} python-${{ matrix.python-version }} runs-on: ${{ matrix.os }} + env: + MSSQL_SA_PASSWORD: "1bis_Testing!" strategy: fail-fast: false matrix: @@ -82,6 +84,13 @@ jobs: sys-deps: - cmake - ninja-build + - name: mssql + title: MS SQL Server + services: + - mssql + sys-deps: + - libkrb5-dev + - krb5-config exclude: - os: windows-latest backend: @@ -121,6 +130,15 @@ jobs: sys-deps: - cmake - ninja-build + - os: windows-latest + backend: + name: mssql + title: MS SQL Server + services: + - mssql + sys-deps: + - libkrb5-dev + - krb5-config steps: - name: update and install system dependencies if: ${{ matrix.os == 'ubuntu-latest' && matrix.backend.sys-deps != null }} @@ -143,6 +161,13 @@ jobs: - name: checkout uses: actions/checkout@v3 + - uses: extractions/setup-just@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: download backend data + run: just download-data + - name: start services if: ${{ matrix.backend.services != null }} run: docker compose up --wait ${{ join(matrix.backend.services, ' ') }} @@ -173,13 +198,6 @@ jobs: if: ${{ matrix.backend.has_geo }} run: poetry install --without dev --without docs --extras ${{ matrix.backend.name }} --extras geospatial - - uses: extractions/setup-just@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - name: download backend data - run: just download-data - - name: "run parallel tests: ${{ matrix.backend.name }}" if: ${{ matrix.backend.name != 'pyspark' && matrix.backend.name != 'impala' }} run: just ci-check -m ${{ matrix.backend.name }} --numprocesses auto --dist=loadgroup @@ -264,6 +282,13 @@ jobs: if: ${{ matrix.backend.name == 'postgres' }} run: sudo apt-get install -qq -y build-essential libgeos-dev + - uses: extractions/setup-just@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: download backend data + run: just download-data + - name: start services if: ${{ matrix.backend.services != null }} run: docker compose up --wait ${{ join(matrix.backend.services, ' ') }} @@ -288,10 +313,6 @@ jobs: # without updating anything except the requested versions run: poetry lock --no-update - - uses: extractions/setup-just@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: install ibis if: ${{ matrix.backend.name != 'postgres' }} run: poetry install --without dev --without docs --extras ${{ matrix.backend.name }} @@ -300,9 +321,6 @@ jobs: if: ${{ matrix.backend.name == 'postgres' }} run: poetry install --without dev --without docs --extras ${{ matrix.backend.name }} --extras geospatial - - name: download backend data - run: just download-data - - name: run tests run: just ci-check -m ${{ matrix.backend.name }} --numprocesses auto --dist=loadgroup diff --git a/ci/schema/mssql.sql b/ci/schema/mssql.sql new file mode 100644 index 000000000000..1821d3fdf3bd --- /dev/null +++ b/ci/schema/mssql.sql @@ -0,0 +1,74 @@ +DROP TABLE IF EXISTS diamonds; + +CREATE TABLE diamonds ( + carat FLOAT, + cut VARCHAR(MAX), + color VARCHAR(MAX), + clarity VARCHAR(MAX), + depth FLOAT, + "table" FLOAT, + price BIGINT, + x FLOAT, + y FLOAT, + z FLOAT +); + +DROP TABLE IF EXISTS batting; + +CREATE TABLE batting ( + "playerID" VARCHAR(MAX), + "yearID" BIGINT, + stint BIGINT, + "teamID" VARCHAR(MAX), + "lgID" VARCHAR(MAX), + "G" BIGINT, + "AB" BIGINT, + "R" BIGINT, + "H" BIGINT, + "X2B" BIGINT, + "X3B" BIGINT, + "HR" BIGINT, + "RBI" BIGINT, + "SB" BIGINT, + "CS" BIGINT, + "BB" BIGINT, + "SO" BIGINT, + "IBB" BIGINT, + "HBP" BIGINT, + "SH" BIGINT, + "SF" BIGINT, + "GIDP" BIGINT +); + +DROP TABLE IF EXISTS awards_players; + +CREATE TABLE awards_players ( + "playerID" VARCHAR(MAX), + "awardID" VARCHAR(MAX), + "yearID" BIGINT, + "lgID" VARCHAR(MAX), + tie VARCHAR(MAX), + notes VARCHAR(MAX) +); + +DROP TABLE IF EXISTS functional_alltypes; + +CREATE TABLE functional_alltypes ( + "index" BIGINT, + "Unnamed: 0" BIGINT, + id INTEGER, + bool_col BIT, + tinyint_col SMALLINT, + smallint_col SMALLINT, + int_col INTEGER, + bigint_col BIGINT, + float_col REAL, + double_col DOUBLE PRECISION, + date_string_col VARCHAR(MAX), + string_col VARCHAR(MAX), + timestamp_col DATETIME2, + year INTEGER, + month INTEGER +); + +CREATE INDEX "ix_functional_alltypes_index" ON functional_alltypes ("index"); diff --git a/docker-compose.yml b/docker-compose.yml index 8c0f62bcdc25..f285b9e4bf6d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -112,9 +112,28 @@ services: - 5432:5432 networks: - postgres + mssql: + environment: + MSSQL_SA_PASSWORD: 1bis_Testing! + ACCEPT_EULA: "Y" + healthcheck: + interval: 10s + retries: 3 + test: + - CMD-SHELL + - /opt/mssql-tools/bin/sqlcmd -S localhost -U sa -P "$MSSQL_SA_PASSWORD" -Q "SELECT 1 AS one" + timeout: 10s + image: mcr.microsoft.com/mssql/server:2022-latest + ports: + - 1433:1433 + networks: + - mssql + volumes: + - $PWD/ci/ibis-testing-data:/data:ro networks: impala: mysql: + mssql: clickhouse: postgres: diff --git a/ibis/backends/conftest.py b/ibis/backends/conftest.py index b7c2579d4d3c..51ed778f17b7 100644 --- a/ibis/backends/conftest.py +++ b/ibis/backends/conftest.py @@ -131,7 +131,7 @@ def recreate_database( database: str, **kwargs: Any, ) -> None: - """Drop the {database} at {url}, if it exists. + """Drop the `database` at `url`, if it exists. Create a new, blank database with the same name. @@ -142,7 +142,7 @@ def recreate_database( database : str Name of the database to be dropped. """ - engine = sa.create_engine(url, **kwargs) + engine = sa.create_engine(url.set(database=""), **kwargs) if url.database is not None: with engine.connect() as conn: @@ -157,9 +157,9 @@ def init_database( recreate: bool = True, **kwargs: Any, ) -> sa.engine.Engine: - """Initialise {database} at {url} with {schema}. + """Initialise `database` at `url` with `schema`. - If {recreate}, drop the {database} at {url}, if it exists. + If `recreate`, drop the `database` at `url`, if it exists. Parameters ---------- diff --git a/ibis/backends/mssql/__init__.py b/ibis/backends/mssql/__init__.py new file mode 100644 index 000000000000..4e15ddb6585a --- /dev/null +++ b/ibis/backends/mssql/__init__.py @@ -0,0 +1,39 @@ +"""The Microsoft Sql Server backend.""" + +from __future__ import annotations + +from typing import Literal + +import sqlalchemy as sa + +from ibis.backends.base.sql.alchemy import BaseAlchemyBackend +from ibis.backends.mssql.compiler import MsSqlCompiler + + +class Backend(BaseAlchemyBackend): + name = "mssql" + compiler = MsSqlCompiler + + def do_connect( + self, + host: str = "localhost", + user: str | None = None, + password: str | None = None, + port: int = 1433, + database: str | None = None, + url: str | None = None, + driver: Literal["pymssql"] = "pymssql", + ) -> None: + if driver != "pymssql": + raise NotImplementedError("pymssql is currently the only supported driver") + alchemy_url = self._build_alchemy_url( + url=url, + host=host, + port=port, + user=user, + password=password, + database=database, + driver=f'mssql+{driver}', + ) + self.database_name = alchemy_url.database + super().do_connect(sa.create_engine(alchemy_url)) diff --git a/ibis/backends/mssql/compiler.py b/ibis/backends/mssql/compiler.py new file mode 100644 index 000000000000..6facf28a8bec --- /dev/null +++ b/ibis/backends/mssql/compiler.py @@ -0,0 +1,35 @@ +import sqlalchemy as sa +from sqlalchemy.dialects import mssql + +import ibis.expr.datatypes as dt +from ibis.backends.base.sql.alchemy import AlchemyCompiler, AlchemyExprTranslator +from ibis.backends.mssql.registry import operation_registry + + +class MsSqlExprTranslator(AlchemyExprTranslator): + _registry = operation_registry + _rewrites = AlchemyExprTranslator._rewrites.copy() + _type_map = AlchemyExprTranslator._type_map.copy() + _type_map.update( + { + dt.Boolean: mssql.BIT, + dt.Int8: mssql.TINYINT, + dt.Int16: mssql.SMALLINT, + dt.Int32: mssql.INTEGER, + dt.Int64: mssql.BIGINT, + dt.Float16: mssql.FLOAT, + dt.Float32: mssql.FLOAT, + dt.Float64: mssql.REAL, + dt.String: mssql.NVARCHAR, + } + ) + _bool_aggs_need_cast_to_int32 = True + integer_to_timestamp = sa.func.from_unixtime + native_json_type = False + + +rewrites = MsSqlExprTranslator.rewrites + + +class MsSqlCompiler(AlchemyCompiler): + translator_class = MsSqlExprTranslator diff --git a/ibis/backends/mssql/registry.py b/ibis/backends/mssql/registry.py new file mode 100644 index 000000000000..c644895bd15a --- /dev/null +++ b/ibis/backends/mssql/registry.py @@ -0,0 +1,124 @@ +import sqlalchemy as sa + +import ibis.expr.datatypes as dt +import ibis.expr.operations as ops +from ibis.backends.base.sql.alchemy import ( + fixed_arity, + sqlalchemy_operation_registry, + unary, +) + + +def _reduction(func, cast_type='int32'): + def reduction_compiler(t, op): + arg, where = op.args + + if arg.output_dtype.is_boolean(): + nullable = arg.output_dtype.nullable + arg = ops.Cast(arg, dt.dtype(cast_type)(nullable=nullable)) + + if where is not None: + arg = ops.Where(where, arg, None) + return func(t.translate(arg)) + + return reduction_compiler + + +# String +# TODO: substr and find are copied from SQLite, we should really have a +# "base" set of SQL functions that are the most common APIs across the major +# RDBMS +def _substr(t, op): + f = sa.func.substring + + arg, start, length = op.args + + sa_arg = t.translate(arg) + sa_start = t.translate(start) + + if length is None: + return f(sa_arg, sa_start + 1) + else: + sa_length = t.translate(length) + return f(sa_arg, sa_start + 1, sa_length) + + +def _string_find(t, op): + arg, substr, start, _ = op.args + + sa_arg = t.translate(arg) + sa_substr = t.translate(substr) + + if start is not None: + sa_start = t.translate(start) + return sa.func.charindex(sa_substr, sa_arg, sa_start) - 1 + + return sa.func.charindex(sa_substr, sa_arg) - 1 + + +# Numerical +def _floor_divide(t, op): + left, right = map(t.translate, op.args) + return sa.func.floor(left / right) + + +def _extract(fmt): + def translator(t, op): + (arg,) = op.args + sa_arg = t.translate(arg) + # sa.literal_column is used becuase it makes the argument pass + # in NOT as a parameter + return sa.cast(sa.func.datepart(sa.literal_column(fmt), sa_arg), sa.SMALLINT) + + return translator + + +operation_registry = sqlalchemy_operation_registry.copy() + +operation_registry.update( + { + # aggregate methods + ops.Count: _reduction(sa.func.count), + ops.Max: _reduction(sa.func.max), + ops.Min: _reduction(sa.func.min), + ops.Sum: _reduction(sa.func.sum), + ops.Mean: _reduction(sa.func.avg, 'float64'), + ops.Where: fixed_arity(sa.func.iif, 3), + # string methods + ops.LStrip: unary(sa.func.ltrim), + ops.Lowercase: unary(sa.func.lower), + ops.RStrip: unary(sa.func.rtrim), + ops.Repeat: fixed_arity(sa.func.replicate, 2), + ops.Reverse: unary(sa.func.reverse), + ops.StringFind: _string_find, + ops.StringLength: unary(sa.func.datalength), + ops.StringReplace: fixed_arity(sa.func.replace, 3), + ops.Strip: unary(sa.func.trim), + ops.Substring: _substr, + ops.Uppercase: unary(sa.func.upper), + # math + ops.Abs: unary(sa.func.abs), + ops.Acos: unary(sa.func.acos), + ops.Asin: unary(sa.func.asin), + ops.Atan2: fixed_arity(sa.func.atn2, 2), + ops.Atan: unary(sa.func.atan), + ops.Ceil: unary(sa.func.ceiling), + ops.Cos: unary(sa.func.cos), + ops.Floor: unary(sa.func.floor), + ops.FloorDivide: _floor_divide, + ops.Power: fixed_arity(sa.func.power, 2), + ops.Sign: unary(sa.func.sign), + ops.Sin: unary(sa.func.sin), + ops.Sqrt: unary(sa.func.sqrt), + ops.Tan: unary(sa.func.tan), + # timestamp methods + ops.TimestampNow: fixed_arity(sa.func.GETDATE, 0), + ops.ExtractYear: _extract('year'), + ops.ExtractMonth: _extract('month'), + ops.ExtractDay: _extract('day'), + ops.ExtractHour: _extract('hour'), + ops.ExtractMinute: _extract('minute'), + ops.ExtractSecond: _extract('second'), + ops.ExtractMillisecond: _extract('millisecond'), + } +) diff --git a/ibis/backends/mssql/tests/__init__.py b/ibis/backends/mssql/tests/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/ibis/backends/mssql/tests/conftest.py b/ibis/backends/mssql/tests/conftest.py new file mode 100644 index 000000000000..d1053a06af17 --- /dev/null +++ b/ibis/backends/mssql/tests/conftest.py @@ -0,0 +1,106 @@ +from __future__ import annotations + +import concurrent.futures +import os +from pathlib import Path +from typing import Any + +import pytest +import sqlalchemy as sa + +import ibis +from ibis.backends.conftest import TEST_TABLES, init_database +from ibis.backends.tests.base import BackendTest, RoundHalfToEven + +MSSQL_USER = os.environ.get('IBIS_TEST_MSSQL_USER', 'sa') +MSSQL_PASS = os.environ.get('IBIS_TEST_MSSQL_PASSWORD', '1bis_Testing!') +MSSQL_HOST = os.environ.get('IBIS_TEST_MSSQL_HOST', 'localhost') +MSSQL_PORT = int(os.environ.get('IBIS_TEST_MSSQL_PORT', 1433)) +IBIS_TEST_MSSQL_DB = os.environ.get('IBIS_TEST_MSSQL_DATABASE', 'ibis_testing') + + +class TestConf(BackendTest, RoundHalfToEven): + # MSSQL has the same rounding behavior as postgres + check_dtype = False + supports_window_operations = False + returned_timestamp_unit = 's' + supports_arrays = False + supports_arrays_outside_of_select = supports_arrays + supports_structs = False + supports_arrays = False + supports_json = False + + def __init__(self, data_directory: Path) -> None: + super().__init__(data_directory) + + @staticmethod + def _load_data( + data_dir: Path, + script_dir: Path, + user: str = MSSQL_USER, + password: str = MSSQL_PASS, + host: str = MSSQL_HOST, + port: int = MSSQL_PORT, + database: str = IBIS_TEST_MSSQL_DB, + **_: Any, + ) -> None: + """Load test data into a MSSQL backend instance. + + Parameters + ---------- + data_dir + Location of testdata + script_dir + Location of scripts defining schemas + """ + with open(script_dir / 'schema' / 'mssql.sql') as schema: + engine = init_database( + url=sa.engine.make_url( + f"mssql+pymssql://{user}:{password}@{host}:{port:d}/{database}" + ), + database=database, + schema=schema, + isolation_level="AUTOCOMMIT", + ) + + futures = [] + with concurrent.futures.ThreadPoolExecutor() as e: + for table in TEST_TABLES: + # /data is a volume mount to the ibis testing data + # used for snappy test data loading + # DataFrame.to_sql is unusably slow for loading CSVs + query = f""" + BULK INSERT {table} + FROM '/data/{table}.csv' + WITH ( + FORMAT = 'CSV', + FIELDTERMINATOR = ',', + ROWTERMINATOR = '\\n', + FIRSTROW = 2 + ) + """ + futures.append(e.submit(engine.execute, query)) + + for future in concurrent.futures.as_completed(futures): + future.result() + + @staticmethod + def connect(_: Path): + return ibis.mssql.connect( + host=MSSQL_HOST, + user=MSSQL_USER, + password=MSSQL_PASS, + database=IBIS_TEST_MSSQL_DB, + port=MSSQL_PORT, + ) + + +@pytest.fixture(scope='session') +def con(): + return ibis.mssql.connect( + host=MSSQL_HOST, + user=MSSQL_USER, + password=MSSQL_PASS, + database=IBIS_TEST_MSSQL_DB, + port=MSSQL_PORT, + ) diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index 77f16462691d..a5561b3bc725 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -34,6 +34,7 @@ def mean_udf(s): "duckdb", "polars", "snowflake", + "mssql", ] ), pytest.mark.never(["sqlite", "mysql"], reason="no udf support"), @@ -62,6 +63,7 @@ def mean_udf(s): "mysql", "pyspark", "sqlite", + "mssql", ] ), ), @@ -86,6 +88,7 @@ def mean_udf(s): "sqlite", "snowflake", "polars", + "mssql", ] argidx_grouped_marks = ["dask"] + argidx_not_grouped_marks @@ -165,9 +168,10 @@ def test_aggregate_grouped(backend, alltypes, df, result_fn, expected_fn): "sqlite", "snowflake", "polars", + "mssql", ] ) -def test_aggregate_multikey_group_reduction(backend, alltypes, df): +def test_aggregate_multikey_group_reduction_udf(backend, alltypes, df): """Tests .aggregate() on a multi-key group_by with a reduction operation.""" @@ -217,13 +221,13 @@ def mean_and_std(v): lambda t, _: t.bool_col.notany(), lambda t, _: ~t.bool_col.any(), id='notany', - marks=pytest.mark.notimpl(["polars", "datafusion"]), + marks=pytest.mark.notimpl(["polars", "datafusion", "mssql"]), ), param( lambda t, _: -t.bool_col.any(), lambda t, _: ~t.bool_col.any(), id='any_negate', - marks=pytest.mark.notimpl(["polars", "datafusion"]), + marks=pytest.mark.notimpl(["polars", "datafusion", "mssql"]), ), param( lambda t, _: t.bool_col.all(), @@ -235,13 +239,13 @@ def mean_and_std(v): lambda t, _: t.bool_col.notall(), lambda t, _: ~t.bool_col.all(), id='notall', - marks=pytest.mark.notimpl(["polars", "datafusion"]), + marks=pytest.mark.notimpl(["polars", "datafusion", "mssql"]), ), param( lambda t, _: -t.bool_col.all(), lambda t, _: ~t.bool_col.all(), id='all_negate', - marks=pytest.mark.notimpl(["polars", "datafusion"]), + marks=pytest.mark.notimpl(["polars", "datafusion", "mssql"]), ), param( lambda t, where: t.double_col.sum(where=where), @@ -276,6 +280,7 @@ def mean_and_std(v): "mysql", "pyspark", "sqlite", + "mssql", ] ), ), @@ -293,6 +298,7 @@ def mean_and_std(v): "snowflake", "polars", "datafusion", + "mssql", ] ), ), @@ -310,6 +316,7 @@ def mean_and_std(v): "snowflake", "polars", "datafusion", + "mssql", ] ), ), @@ -317,25 +324,25 @@ def mean_and_std(v): lambda t, where: t.double_col.std(how='sample', where=where), lambda t, where: t.double_col[where].std(ddof=1), id='std', - marks=[mark.notimpl(["datafusion"])], + marks=[mark.notimpl(["datafusion", "mssql"])], ), param( lambda t, where: t.double_col.var(how='sample', where=where), lambda t, where: t.double_col[where].var(ddof=1), id='var', - marks=[mark.notimpl(["datafusion"])], + marks=[mark.notimpl(["datafusion", "mssql"])], ), param( lambda t, where: t.double_col.std(how='pop', where=where), lambda t, where: t.double_col[where].std(ddof=0), id='std_pop', - marks=[mark.notimpl(["datafusion"])], + marks=[mark.notimpl(["datafusion", "mssql"])], ), param( lambda t, where: t.double_col.var(how='pop', where=where), lambda t, where: t.double_col[where].var(ddof=0), id='var_pop', - marks=[mark.notimpl(["datafusion"])], + marks=[mark.notimpl(["datafusion", "mssql"])], ), param( lambda t, where: t.string_col.approx_nunique(where=where), @@ -356,6 +363,7 @@ def mean_and_std(v): 'snowflake', 'polars', 'datafusion', + "mssql", ] ), ), @@ -372,6 +380,7 @@ def mean_and_std(v): 'snowflake', 'polars', 'datafusion', + "mssql", ] ), ), @@ -393,6 +402,7 @@ def mean_and_std(v): "sqlite", "snowflake", "polars", + "mssql", ], ), ), @@ -401,7 +411,9 @@ def mean_and_std(v): lambda t, where: np.bitwise_and.reduce(t.bigint_col[where].values), id='bit_and', marks=[ - pytest.mark.notimpl(["dask", "snowflake", "polars", "datafusion"]), + pytest.mark.notimpl( + ["dask", "snowflake", "polars", "datafusion", "mssql"] + ), pytest.mark.notyet(["impala", "pyspark"]), ], ), @@ -410,7 +422,9 @@ def mean_and_std(v): lambda t, where: np.bitwise_or.reduce(t.bigint_col[where].values), id='bit_or', marks=[ - pytest.mark.notimpl(["dask", "snowflake", "polars", "datafusion"]), + pytest.mark.notimpl( + ["dask", "snowflake", "polars", "datafusion", "mssql"] + ), pytest.mark.notyet(["impala", "pyspark"]), ], ), @@ -419,7 +433,9 @@ def mean_and_std(v): lambda t, where: np.bitwise_xor.reduce(t.bigint_col[where].values), id='bit_xor', marks=[ - pytest.mark.notimpl(["dask", "snowflake", "polars", "datafusion"]), + pytest.mark.notimpl( + ["dask", "snowflake", "polars", "datafusion", "mssql"] + ), pytest.mark.notyet(["impala", "pyspark"]), ], ), @@ -434,7 +450,15 @@ def mean_and_std(v): id="collect", marks=[ mark.notimpl( - ["dask", "impala", "mysql", "snowflake", "sqlite", "datafusion"] + [ + "dask", + "impala", + "mysql", + "snowflake", + "sqlite", + "datafusion", + "mssql", + ] ) ], ), @@ -569,6 +593,7 @@ def test_reduction_ops( ), ], ) +@pytest.mark.notimpl(["mssql"]) def test_corr_cov( batting, batting_df, @@ -599,6 +624,7 @@ def test_corr_cov( "postgres", "sqlite", "snowflake", + "mssql", ] ) def test_approx_median(alltypes): @@ -664,7 +690,7 @@ def test_approx_median(alltypes): ), ], ) -@mark.notimpl(["datafusion", "snowflake", "polars"]) +@mark.notimpl(["datafusion", "snowflake", "polars", "mssql"]) def test_group_concat( backend, alltypes, @@ -752,6 +778,7 @@ def test_topk_filter_op(alltypes, df, result_fn, expected_fn): "sqlite", "snowflake", "polars", + "mssql", ] ) def test_aggregate_list_like(backend, alltypes, df, agg_fn): @@ -784,9 +811,10 @@ def test_aggregate_list_like(backend, alltypes, df, agg_fn): "sqlite", "snowflake", "polars", + "mssql", ] ) -def test_aggregate_mixed(backend, alltypes, df): +def test_aggregate_mixed_udf(backend, alltypes, df): """Tests .aggregate() with multiple aggregations with mixed result types. (In particular, one aggregation that results in an array, and other diff --git a/ibis/backends/tests/test_api.py b/ibis/backends/tests/test_api.py index 7963f960abe6..9acbf4ff73de 100644 --- a/ibis/backends/tests/test_api.py +++ b/ibis/backends/tests/test_api.py @@ -17,7 +17,7 @@ def test_version(backend): # 2. list_databases() returns directories which don't make sense as HDF5 # databases @pytest.mark.never(["dask", "pandas"], reason="pass") -@pytest.mark.notimpl(["datafusion", "duckdb", "polars"]) +@pytest.mark.notimpl(["datafusion", "duckdb", "polars", "mssql"]) def test_database_consistency(backend, con): # every backend has a different set of databases, not testing the # exact names for now @@ -52,9 +52,10 @@ def test_tables_accessor_mapping(con): con.tables["doesnt_exist"] tables = con.list_tables() + con_tables = con.tables - assert len(con.tables) == len(tables) - assert sorted(con.tables) == sorted(tables) + assert len(con_tables) == len(tables) + assert sorted(con_tables) == sorted(tables) def test_tables_accessor_getattr(con): diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index 92a6d676ba37..c5fb8b53923e 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -15,7 +15,9 @@ except ImportError: duckdb = None -pytestmark = [pytest.mark.never(["sqlite", "mysql"], reason="No array support")] +pytestmark = [ + pytest.mark.never(["sqlite", "mysql", "mssql"], reason="No array support") +] @pytest.mark.notimpl(["impala", "datafusion", "snowflake"]) diff --git a/ibis/backends/tests/test_client.py b/ibis/backends/tests/test_client.py index d718eecebd85..6edf1f9c144c 100644 --- a/ibis/backends/tests/test_client.py +++ b/ibis/backends/tests/test_client.py @@ -89,7 +89,7 @@ def test_query_schema(ddl_backend, ddl_con, expr_fn, expected): assert schema.equals(expected) -@pytest.mark.notimpl(["datafusion", "snowflake", "polars"]) +@pytest.mark.notimpl(["datafusion", "snowflake", "polars", "mssql"]) @pytest.mark.notyet(["sqlite"]) @pytest.mark.never( ["dask", "pandas"], @@ -124,6 +124,7 @@ def test_create_table_from_schema(con, new_schema, temp_table): "sqlite", "snowflake", "polars", + "mssql", ] ) def test_rename_table(con, temp_table, new_schema): @@ -170,6 +171,7 @@ def test_nullable_input_output(con, temp_table): "sqlite", "snowflake", "polars", + "mssql", ] ) @mark.notyet(["pyspark"]) @@ -597,24 +599,10 @@ def test_invalid_connect(): ibis.connect(url) -@pytest.mark.never( - [ - "clickhouse", - "dask", - "datafusion", - "impala", - "mysql", - "pandas", - "postgres", - "pyspark", - "snowflake", - "polars", - ], - reason="backend isn't file-based", -) -def test_deprecated_path_argument(backend, tmp_path): +@pytest.mark.parametrize("backend_name", ["sqlite", "duckdb"]) +def test_deprecated_path_argument(backend_name, tmp_path): with pytest.warns(UserWarning, match="The `path` argument is deprecated"): - getattr(ibis, backend.name()).connect(path=str(tmp_path / "test.db")) + getattr(ibis, backend_name).connect(path=str(tmp_path / "test.db")) @pytest.mark.parametrize( diff --git a/ibis/backends/tests/test_column.py b/ibis/backends/tests/test_column.py index eab1c6917cd0..bf5e81a392c6 100644 --- a/ibis/backends/tests/test_column.py +++ b/ibis/backends/tests/test_column.py @@ -17,6 +17,7 @@ "postgres", "pyspark", "polars", + "mssql", ] ) def test_rowid(con): @@ -43,6 +44,7 @@ def test_rowid(con): "postgres", "pyspark", "polars", + "mssql", ] ) def test_named_rowid(con): diff --git a/ibis/backends/tests/test_dot_sql.py b/ibis/backends/tests/test_dot_sql.py index 4c81adcbafa1..76438f2808b7 100644 --- a/ibis/backends/tests/test_dot_sql.py +++ b/ibis/backends/tests/test_dot_sql.py @@ -1,11 +1,12 @@ import pandas as pd import pytest +from pytest import param import ibis from ibis import util REQUIRES_EXPLICIT_SCHEMA = {"sqlite"} -table_dot_sql_notimpl = pytest.mark.notimpl(["sqlite", "clickhouse", "impala"]) +table_dot_sql_notimpl = pytest.mark.notimpl(["sqlite", "clickhouse", "impala", "mssql"]) dot_sql_notimpl = pytest.mark.notimpl(["datafusion"]) dot_sql_notyet = pytest.mark.notyet( ["snowflake"], @@ -22,14 +23,16 @@ @dot_sql_notimpl @dot_sql_notyet @dot_sql_never -@pytest.mark.parametrize("explicit_schema", [False, True]) -def test_con_dot_sql(backend, con, explicit_schema): - if not explicit_schema and con.name in REQUIRES_EXPLICIT_SCHEMA: +@pytest.mark.parametrize( + "schema", + [ + param(None, marks=pytest.mark.notimpl(["mssql"]), id="implicit_schema"), + param({"s": "string", "new_col": "double"}, id="explicit_schema"), + ], +) +def test_con_dot_sql(backend, con, schema): + if schema is None and con.name in REQUIRES_EXPLICIT_SCHEMA: pytest.xfail(f"{con.name} requires an explicit schema for .sql") - if explicit_schema: - schema = {"s": "string", "new_col": "double"} - else: - schema = None alltypes = con.table("functional_alltypes") t = ( con.sql( diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 3351f860c5d4..7aa5802826c1 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -18,10 +18,14 @@ @pytest.mark.parametrize( ('expr', 'expected'), [ - (ibis.NA.fillna(5), 5), - (L(5).fillna(10), 5), - (L(5).nullif(5), None), - (L(10).nullif(5), 10), + param( + ibis.NA.fillna(5), 5, marks=pytest.mark.notimpl(["mssql"]), id="na_fillna" + ), + param( + L(5).fillna(10), 5, marks=pytest.mark.notimpl(["mssql"]), id="non_na_fillna" + ), + param(L(5).nullif(5), None, id="nullif_null"), + param(L(10).nullif(5), 10, id="nullif_not_null"), ], ) @pytest.mark.notimpl(["datafusion"]) @@ -69,6 +73,7 @@ def test_scalar_fillna_nullif(con, expr, expected): @na_none_cols +@pytest.mark.notimpl(["mssql"]) def test_isna(backend, alltypes, col): table = alltypes.mutate(na_col=np.nan) table = table.mutate(none_col=None) @@ -102,7 +107,7 @@ def test_isna(backend, alltypes, col): ), ], ) -@pytest.mark.notimpl(["datafusion"]) +@pytest.mark.notimpl(["datafusion", "mssql"]) def test_column_fillna(backend, alltypes, value): table = alltypes.mutate(missing=ibis.literal(value).cast("float64")) pd_table = table.execute() @@ -142,7 +147,7 @@ def test_coalesce(con, expr, expected): # TODO(dask) - identicalTo - #2553 -@pytest.mark.notimpl(["clickhouse", "datafusion", "polars", "dask", "pyspark"]) +@pytest.mark.notimpl(["clickhouse", "datafusion", "polars", "dask", "pyspark", "mssql"]) def test_identical_to(backend, alltypes, sorted_df): sorted_alltypes = alltypes.order_by('id') df = sorted_df @@ -171,6 +176,7 @@ def test_identical_to(backend, alltypes, sorted_df): ('int_col', frozenset({1})), ], ) +@pytest.mark.notimpl(["mssql"]) def test_isin(backend, alltypes, sorted_df, column, elements): sorted_alltypes = alltypes.order_by('id') expr = sorted_alltypes[ @@ -194,6 +200,7 @@ def test_isin(backend, alltypes, sorted_df, column, elements): ('int_col', frozenset({1})), ], ) +@pytest.mark.notimpl(["mssql"]) def test_notin(backend, alltypes, sorted_df, column, elements): sorted_alltypes = alltypes.order_by('id') expr = sorted_alltypes[ @@ -254,6 +261,7 @@ def test_filter(backend, alltypes, sorted_df, predicate_fn, expected_fn): "sqlite", "snowflake", "polars", + "mssql", ] ) def test_filter_with_window_op(backend, alltypes, sorted_df): @@ -298,7 +306,7 @@ def test_case_where(backend, alltypes, df): # TODO: some of these are notimpl (datafusion) others are probably never -@pytest.mark.notimpl(["datafusion", "mysql", "sqlite"]) +@pytest.mark.notimpl(["datafusion", "mysql", "sqlite", "mssql"]) @pytest.mark.min_version(duckdb="0.3.3", reason="isnan/isinf unsupported") def test_select_filter_mutate(backend, alltypes, df): """Test that select, filter and mutate are executed in right order. @@ -352,7 +360,7 @@ def test_table_fillna_invalid(alltypes): {"double_col": -1.5, "string_col": "missing"}, ], ) -@pytest.mark.notimpl(["datafusion"]) +@pytest.mark.notimpl(["datafusion", "mssql"]) def test_table_fillna_mapping(backend, alltypes, replacements): table = alltypes.mutate( int_col=alltypes.int_col.nullif(1), @@ -367,7 +375,7 @@ def test_table_fillna_mapping(backend, alltypes, replacements): backend.assert_frame_equal(result, expected, check_dtype=False) -@pytest.mark.notimpl(["datafusion"]) +@pytest.mark.notimpl(["datafusion", "mssql"]) def test_table_fillna_scalar(backend, alltypes): table = alltypes.mutate( int_col=alltypes.int_col.nullif(1), @@ -468,7 +476,7 @@ def test_order_by(backend, alltypes, df, key, df_kwargs): backend.assert_frame_equal(result, expected) -@pytest.mark.notimpl(["dask", "datafusion", "impala", "pandas", "polars"]) +@pytest.mark.notimpl(["dask", "datafusion", "impala", "pandas", "polars", "mssql"]) @pytest.mark.notyet( ["clickhouse"], reason="clickhouse doesn't have a [0.0, 1.0) random implementation", @@ -579,17 +587,30 @@ def test_isin_notin_column_expr(backend, alltypes, df, ibis_op, pandas_op): [ param(True, True, toolz.identity, id="true_noop"), param(False, False, toolz.identity, id="false_noop"), - param(True, False, invert, id="true_invert"), - param(False, True, invert, id="false_invert"), - param(True, False, neg, id="true_negate"), - param(False, True, neg, id="false_negate"), + param( + True, False, invert, id="true_invert", marks=pytest.mark.notimpl(["mssql"]) + ), + param( + False, True, invert, id="false_invert", marks=pytest.mark.notimpl(["mssql"]) + ), + param(True, False, neg, id="true_negate", marks=pytest.mark.notimpl(["mssql"])), + param( + False, True, neg, id="false_negate", marks=pytest.mark.notimpl(["mssql"]) + ), ], ) def test_logical_negation_literal(con, expr, expected, op): assert con.execute(op(ibis.literal(expr))) == expected -@pytest.mark.parametrize("op", [toolz.identity, invert, neg]) +@pytest.mark.parametrize( + "op", + [ + toolz.identity, + param(invert, marks=pytest.mark.notimpl(["mssql"])), + param(neg, marks=pytest.mark.notimpl(["mssql"])), + ], +) def test_logical_negation_column(backend, alltypes, df, op): result = op(alltypes["bool_col"]).execute() expected = op(df["bool_col"]) diff --git a/ibis/backends/tests/test_json.py b/ibis/backends/tests/test_json.py index c6de446a2f0e..5b2a5625dfec 100644 --- a/ibis/backends/tests/test_json.py +++ b/ibis/backends/tests/test_json.py @@ -5,7 +5,7 @@ from pytest import param -@pytest.mark.notimpl(["datafusion", "pyspark"]) +@pytest.mark.notimpl(["datafusion", "pyspark", "mssql"]) @pytest.mark.notyet(["clickhouse"], reason="upstream is broken") @pytest.mark.never(["impala"], reason="doesn't support JSON and never will") @pytest.mark.parametrize( diff --git a/ibis/backends/tests/test_map.py b/ibis/backends/tests/test_map.py index 8f37f790bde1..1e9ba6b61587 100644 --- a/ibis/backends/tests/test_map.py +++ b/ibis/backends/tests/test_map.py @@ -15,6 +15,7 @@ "pyspark", "snowflake", "polars", + "mssql", ], reason="Not implemented yet", ), diff --git a/ibis/backends/tests/test_numeric.py b/ibis/backends/tests/test_numeric.py index fb1739b767a5..6ea99d9468cc 100644 --- a/ibis/backends/tests/test_numeric.py +++ b/ibis/backends/tests/test_numeric.py @@ -51,7 +51,7 @@ param(operator.methodcaller('isinf'), np.isinf, id='isinf'), ], ) -@pytest.mark.notimpl(["mysql", "sqlite", "datafusion"]) +@pytest.mark.notimpl(["mysql", "sqlite", "datafusion", "mssql"]) @pytest.mark.xfail( duckdb is not None and vparse(duckdb.__version__) < vparse("0.3.3"), reason="<0.3.3 does not support isnan/isinf properly", @@ -98,7 +98,12 @@ def test_isnan_isinf( id='greatest', marks=pytest.mark.notimpl(["datafusion"]), ), - param(L(5.5).round(), 6.0, id='round'), + param( + L(5.5).round(), + 6.0, + id='round', + marks=pytest.mark.notimpl(["mssql"]), + ), param( L(5.556).round(2), 5.56, @@ -136,11 +141,26 @@ def test_isnan_isinf( L(5.556).log(2), math.log(5.556, 2), id='log-base', - marks=pytest.mark.notimpl(["datafusion"]), + marks=pytest.mark.notimpl(["datafusion", "mssql"]), + ), + param( + L(5.556).ln(), + math.log(5.556), + id='ln', + marks=pytest.mark.notimpl(["mssql"]), + ), + param( + L(5.556).log2(), + math.log(5.556, 2), + id='log2', + marks=pytest.mark.notimpl(["mssql"]), + ), + param( + L(5.556).log10(), + math.log10(5.556), + id='log10', + marks=pytest.mark.notimpl(["mssql"]), ), - param(L(5.556).ln(), math.log(5.556), id='ln'), - param(L(5.556).log2(), math.log(5.556, 2), id='log2'), - param(L(5.556).log10(), math.log10(5.556), id='log10'), param( L(5.556).radians(), math.radians(5.556), @@ -282,17 +302,19 @@ def test_simple_math_functions_columns( lambda t: t.double_col.add(1).log(2), lambda t: np.log2(t.double_col + 1), id='log2', - marks=pytest.mark.notimpl(["datafusion"]), + marks=pytest.mark.notimpl(["datafusion", "mssql"]), ), param( lambda t: t.double_col.add(1).ln(), lambda t: np.log(t.double_col + 1), id='ln', + marks=pytest.mark.notimpl(["mssql"]), ), param( lambda t: t.double_col.add(1).log10(), lambda t: np.log10(t.double_col + 1), id='log10', + marks=pytest.mark.notimpl(["mssql"]), ), param( lambda t: (t.double_col + 1).log( @@ -305,7 +327,7 @@ def test_simple_math_functions_columns( np.log(t.double_col + 1) / np.log(np.maximum(9_000, t.bigint_col)) ), id="log_base_bigint", - marks=pytest.mark.notimpl(["clickhouse", "datafusion", "polars"]), + marks=pytest.mark.notimpl(["clickhouse", "datafusion", "polars", "mssql"]), ), ], ) @@ -325,6 +347,7 @@ def test_complex_math_functions_columns( lambda be, t: t.double_col.round(), lambda be, t: be.round(t.double_col), id='round', + marks=pytest.mark.notimpl(["mssql"]), ), param( lambda be, t: t.double_col.add(0.05).round(3), @@ -412,6 +435,7 @@ def test_mod(backend, alltypes, df): backend.assert_series_equal(result, expected, check_dtype=False) +@pytest.mark.notimpl(["mssql"]) def test_floating_mod(backend, alltypes, df): expr = operator.mod(alltypes.double_col, alltypes.smallint_col + 1).name('tmp') @@ -449,6 +473,7 @@ def test_floating_mod(backend, alltypes, df): "pyspark", "sqlite", "snowflake", + "mssql", ] ) @pytest.mark.parametrize('denominator', [0, 0.0]) @@ -471,7 +496,7 @@ def test_divide_by_zero(backend, alltypes, df, column, denominator): ) ], ) -@pytest.mark.notimpl(["sqlite", "duckdb"]) +@pytest.mark.notimpl(["sqlite", "duckdb", "mssql"]) @pytest.mark.never( [ "clickhouse", @@ -525,7 +550,9 @@ def test_sa_default_numeric_precision_and_scale( con.drop_table(table_name, force=True) -@pytest.mark.notimpl(["dask", "datafusion", "impala", "pandas", "sqlite", "polars"]) +@pytest.mark.notimpl( + ["dask", "datafusion", "impala", "pandas", "sqlite", "polars", "mssql"] +) @pytest.mark.notyet( ["clickhouse"], reason="backend doesn't implement a [0.0, 1.0) or [0.0, 1.0] RANDOM() function", diff --git a/ibis/backends/tests/test_param.py b/ibis/backends/tests/test_param.py index a9fc5ea14dec..0c6fd3a03e43 100644 --- a/ibis/backends/tests/test_param.py +++ b/ibis/backends/tests/test_param.py @@ -27,7 +27,7 @@ def test_floating_scalar_parameter(backend, alltypes, df, column, raw_value): ('start_string', 'end_string'), [('2009-03-01', '2010-07-03'), ('2014-12-01', '2017-01-05')], ) -@pytest.mark.notimpl(["datafusion", "pyspark"]) +@pytest.mark.notimpl(["datafusion", "pyspark", "mssql"]) def test_date_scalar_parameter(backend, alltypes, start_string, end_string): start, end = ibis.param(dt.date), ibis.param(dt.date) @@ -61,7 +61,7 @@ def test_timestamp_accepts_date_literals(backend, alltypes): ] ) @pytest.mark.never( - ["mysql", "sqlite"], + ["mysql", "sqlite", "mssql"], reason="mysql and sqlite will never implement array types", ) def test_scalar_param_array(backend, con): @@ -82,7 +82,7 @@ def test_scalar_param_array(backend, con): ] ) @pytest.mark.never( - ["mysql", "sqlite"], + ["mysql", "sqlite", "mssql"], reason="mysql and sqlite will never implement struct types", ) def test_scalar_param_struct(backend, con): @@ -105,7 +105,7 @@ def test_scalar_param_struct(backend, con): ] ) @pytest.mark.never( - ["mysql", "sqlite"], + ["mysql", "sqlite", "mssql"], reason="mysql and sqlite will never implement map types", ) @pytest.mark.notyet(["postgres"]) diff --git a/ibis/backends/tests/test_pretty.py b/ibis/backends/tests/test_pretty.py index 32cb57866c9d..509a8ce87a38 100644 --- a/ibis/backends/tests/test_pretty.py +++ b/ibis/backends/tests/test_pretty.py @@ -21,6 +21,7 @@ reason="Not clear how to extract SQL from the backend", raises=(exc.OperationNotDefinedError, NotImplementedError, AssertionError), ) +@mark.notimpl(["mssql"], raises=ValueError, reason="no sqlglot dialect for mssql") def test_table(con): expr = con.tables.functional_alltypes.select(c=_.int_col + 1) buf = io.StringIO() @@ -30,13 +31,14 @@ def test_table(con): simple_literal = param( ibis.literal(1), + marks=[pytest.mark.notimpl(["mssql"], reason="no sqlglot dialect for mssql")], id="simple_literal", ) array_literal = param( ibis.array([1]), marks=[ mark.never( - ["mysql", "sqlite"], + ["mysql", "sqlite", "mssql"], raises=sa.exc.CompileError, reason="arrays not supported in the backend", ), @@ -59,12 +61,12 @@ def test_table(con): id="array_literal", ) no_structs = mark.never( - ["impala", "mysql", "sqlite"], + ["impala", "mysql", "sqlite", "mssql"], raises=(NotImplementedError, sa.exc.CompileError), reason="structs not supported in the backend", ) no_struct_literals = mark.notimpl( - ["postgres", "snowflake"], + ["postgres", "snowflake", "mssql"], reason="struct literals are not yet implemented", ) not_sql = mark.never( diff --git a/ibis/backends/tests/test_set_ops.py b/ibis/backends/tests/test_set_ops.py index 1c9e83e1efad..0051a026c32e 100644 --- a/ibis/backends/tests/test_set_ops.py +++ b/ibis/backends/tests/test_set_ops.py @@ -56,7 +56,7 @@ def test_union_mixed_distinct(backend, union_subsets): param( False, marks=pytest.mark.notyet( - ["dask", "pandas", "sqlite", "snowflake"], + ["dask", "pandas", "sqlite", "snowflake", "mssql"], reason="backend doesn't support INTERSECT ALL", ), id="all", @@ -94,7 +94,7 @@ def test_intersect(backend, alltypes, df, distinct): param( False, marks=pytest.mark.notyet( - ["dask", "pandas", "sqlite", "snowflake"], + ["dask", "pandas", "sqlite", "snowflake", "mssql"], reason="backend doesn't support EXCEPT ALL", ), id="all", diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index d3f091016122..ef4dd5970749 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -24,73 +24,97 @@ def test_string_col_is_unicode(alltypes, df): lambda t: t.string_col.contains('6'), lambda t: t.string_col.str.contains('6'), id='contains', - marks=pytest.mark.notimpl(["datafusion"]), + marks=pytest.mark.notimpl(["datafusion", "mssql"]), ), param( lambda t: t.string_col.like('6%'), lambda t: t.string_col.str.contains('6.*'), id='like', - marks=pytest.mark.notimpl(["datafusion", "polars"]), + marks=[ + pytest.mark.notimpl(["datafusion", "polars"]), + pytest.mark.notyet( + ["mssql"], reason="mssql doesn't allow like outside of filters" + ), + ], ), param( lambda t: t.string_col.like('6^%'), lambda t: t.string_col.str.contains('6%'), id='complex_like_escape', - marks=pytest.mark.notimpl(["datafusion", "polars"]), + marks=[ + pytest.mark.notimpl(["datafusion", "polars"]), + pytest.mark.notyet( + ["mssql"], reason="mssql doesn't allow like outside of filters" + ), + ], ), param( lambda t: t.string_col.like('6^%%'), lambda t: t.string_col.str.contains('6%.*'), id='complex_like_escape_match', - marks=pytest.mark.notimpl(["datafusion", "polars"]), + marks=[ + pytest.mark.notimpl(["datafusion", "polars"]), + pytest.mark.notyet( + ["mssql"], reason="mssql doesn't allow like outside of filters" + ), + ], ), param( lambda t: t.string_col.ilike('6%'), lambda t: t.string_col.str.contains('6.*'), id='ilike', - marks=pytest.mark.notimpl(["datafusion", "impala", "pyspark", "polars"]), + marks=[ + pytest.mark.notimpl(["datafusion", "impala", "pyspark", "polars"]), + pytest.mark.notyet( + ["mssql"], reason="mssql doesn't allow like outside of filters" + ), + ], ), param( lambda t: t.string_col.re_search(r'\d+'), lambda t: t.string_col.str.contains(r'\d+'), id='re_search', - marks=pytest.mark.notimpl(["impala", "datafusion", "snowflake"]), + marks=pytest.mark.notimpl(["impala", "datafusion", "snowflake", "mssql"]), ), param( lambda t: t.string_col.re_search(r'[[:digit:]]+'), lambda t: t.string_col.str.contains(r'\d+'), id='re_search_posix', - marks=pytest.mark.notimpl(["datafusion", "pyspark", "snowflake"]), + marks=pytest.mark.notimpl(["datafusion", "pyspark", "snowflake", "mssql"]), ), param( lambda t: t.string_col.re_extract(r'(\d+)', 1), lambda t: t.string_col.str.extract(r'(\d+)', expand=False), id='re_extract', - marks=pytest.mark.notimpl(["impala", "mysql", "snowflake"]), + marks=pytest.mark.notimpl(["impala", "mysql", "snowflake", "mssql"]), ), param( lambda t: t.string_col.re_extract(r'([[:digit:]]+)', 1), lambda t: t.string_col.str.extract(r'(\d+)', expand=False), id='re_extract_posix', - marks=pytest.mark.notimpl(["mysql", "pyspark", "snowflake"]), + marks=pytest.mark.notimpl(["mysql", "pyspark", "snowflake", "mssql"]), ), param( lambda t: (t.string_col + "1").re_extract(r'\d(\d+)', 0), lambda t: (t.string_col + "1").str.extract(r'(\d+)', expand=False), id='re_extract_whole_group', - marks=pytest.mark.notimpl(["impala", "mysql", "snowflake"]), + marks=pytest.mark.notimpl(["impala", "mysql", "snowflake", "mssql"]), ), param( lambda t: t.string_col.re_replace(r'[[:digit:]]+', 'a'), lambda t: t.string_col.str.replace(r'\d+', 'a', regex=True), id='re_replace_posix', - marks=pytest.mark.notimpl(['datafusion', "mysql", "pyspark", "snowflake"]), + marks=pytest.mark.notimpl( + ['datafusion', "mysql", "pyspark", "snowflake", "mssql"] + ), ), param( lambda t: t.string_col.re_replace(r'\d+', 'a'), lambda t: t.string_col.str.replace(r'\d+', 'a', regex=True), id='re_replace', - marks=pytest.mark.notimpl(["impala", "datafusion", "mysql", "snowflake"]), + marks=pytest.mark.notimpl( + ["impala", "datafusion", "mysql", "snowflake", "mssql"] + ), ), param( lambda t: t.string_col.repeat(2), @@ -111,7 +135,9 @@ def test_string_col_is_unicode(alltypes, df): lambda t: t.string_col.translate('0', 'a'), lambda t: t.string_col.str.translate(str.maketrans('0', 'a')), id='translate', - marks=pytest.mark.notimpl(["clickhouse", "datafusion", "mysql", "polars"]), + marks=pytest.mark.notimpl( + ["clickhouse", "datafusion", "mysql", "polars", "mssql"] + ), ), param( lambda t: t.string_col.find('a'), @@ -123,18 +149,20 @@ def test_string_col_is_unicode(alltypes, df): lambda t: t.string_col.lpad(10, 'a'), lambda t: t.string_col.str.pad(10, fillchar='a', side='left'), id='lpad', + marks=pytest.mark.notimpl(["mssql"]), ), param( lambda t: t.string_col.rpad(10, 'a'), lambda t: t.string_col.str.pad(10, fillchar='a', side='right'), id='rpad', + marks=pytest.mark.notimpl(["mssql"]), ), param( lambda t: t.string_col.find_in_set(['1']), lambda t: t.string_col.str.find('1'), id='find_in_set', marks=pytest.mark.notimpl( - ["datafusion", "pyspark", "sqlite", "snowflake", "polars"] + ["datafusion", "pyspark", "sqlite", "snowflake", "polars", "mssql"] ), ), param( @@ -142,7 +170,7 @@ def test_string_col_is_unicode(alltypes, df): lambda t: t.string_col.str.find('a'), id='find_in_set_all_missing', marks=pytest.mark.notimpl( - ["datafusion", "pyspark", "sqlite", "snowflake", "polars"] + ["datafusion", "pyspark", "sqlite", "snowflake", "polars", "mssql"] ), ), param( @@ -179,7 +207,9 @@ def test_string_col_is_unicode(alltypes, df): lambda t: t.int_col == 1, id='startswith', # pyspark doesn't support `cases` yet - marks=pytest.mark.notimpl(["dask", "datafusion", "pyspark", "pandas"]), + marks=pytest.mark.notimpl( + ["dask", "datafusion", "pyspark", "pandas", "mssql"] + ), ), param( lambda t: t.int_col.cases([(1, "abcd"), (2, "ABCD")], "dabc").endswith( @@ -188,19 +218,21 @@ def test_string_col_is_unicode(alltypes, df): lambda t: t.int_col == 1, id='endswith', # pyspark doesn't support `cases` yet - marks=pytest.mark.notimpl(["dask", "datafusion", "pyspark", "pandas"]), + marks=pytest.mark.notimpl( + ["dask", "datafusion", "pyspark", "pandas", "mssql"] + ), ), param( lambda t: t.date_string_col.startswith("2010-01"), lambda t: t.date_string_col.str.startswith("2010-01"), id='startswith-simple', - marks=pytest.mark.notimpl(["dask", "datafusion", "pandas"]), + marks=pytest.mark.notimpl(["dask", "datafusion", "pandas", "mssql"]), ), param( lambda t: t.date_string_col.endswith("100"), lambda t: t.date_string_col.str.endswith("100"), id='endswith-simple', - marks=pytest.mark.notimpl(["dask", "datafusion", "pandas"]), + marks=pytest.mark.notimpl(["dask", "datafusion", "pandas", "mssql"]), ), param( lambda t: t.string_col.strip(), @@ -221,7 +253,7 @@ def test_string_col_is_unicode(alltypes, df): lambda t: t.string_col.capitalize(), lambda t: t.string_col.str.capitalize(), id='capitalize', - marks=pytest.mark.notimpl(["clickhouse", "duckdb"]), + marks=pytest.mark.notimpl(["clickhouse", "duckdb", "mssql"]), ), param( lambda t: t.date_string_col.substr(2, 3), @@ -232,7 +264,10 @@ def test_string_col_is_unicode(alltypes, df): lambda t: t.date_string_col.substr(2), lambda t: t.date_string_col.str[2:], id='substr-start-only', - marks=pytest.mark.notimpl(["datafusion", "polars", "pyspark"]), + marks=[ + pytest.mark.notimpl(["datafusion", "polars", "pyspark"]), + pytest.mark.notyet(["mssql"], reason="substr requires 3 arguments"), + ], ), param( lambda t: t.date_string_col.left(2), @@ -292,6 +327,7 @@ def test_string_col_is_unicode(alltypes, df): "mysql", "sqlite", "snowflake", + "mssql", ] ), ), @@ -332,7 +368,7 @@ def test_string(backend, alltypes, df, result_func, expected_func): backend.assert_series_equal(result, expected) -@pytest.mark.notimpl(["datafusion"]) +@pytest.mark.notimpl(["datafusion", "mssql"]) def test_substr_with_null_values(backend, alltypes, df): table = alltypes.mutate( substr_col_null=ibis.case() diff --git a/ibis/backends/tests/test_struct.py b/ibis/backends/tests/test_struct.py index 6511e3b76073..67c00454ec0d 100644 --- a/ibis/backends/tests/test_struct.py +++ b/ibis/backends/tests/test_struct.py @@ -8,7 +8,7 @@ import ibis.expr.datatypes as dt pytestmark = [ - pytest.mark.never(["mysql", "sqlite"], reason="No struct support"), + pytest.mark.never(["mysql", "sqlite", "mssql"], reason="No struct support"), pytest.mark.notyet(["impala"]), pytest.mark.notimpl( [ diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index 12e65577f1b7..ffa7bcd4e3a7 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -43,8 +43,8 @@ def test_date_extract(backend, alltypes, df, attr, expr_fn): 'year', 'month', 'day', - param('day_of_year', marks=pytest.mark.notimpl(["impala"])), - 'quarter', + param('day_of_year', marks=pytest.mark.notimpl(["impala", "mssql"])), + param('quarter', marks=pytest.mark.notimpl(["mssql"])), 'hour', 'minute', 'second', @@ -82,11 +82,17 @@ def test_timestamp_extract(backend, alltypes, df, attr): ), ], ), - param(lambda x: x.day_of_week.index(), 1, id='day_of_week_index'), + param( + lambda x: x.day_of_week.index(), + 1, + id='day_of_week_index', + marks=pytest.mark.notimpl(["mssql"]), + ), param( lambda x: x.day_of_week.full_name(), 'Tuesday', id='day_of_week_full_name', + marks=pytest.mark.notimpl(["mssql"]), ), ], ) @@ -107,7 +113,7 @@ def test_timestamp_extract_milliseconds(backend, alltypes, df): backend.assert_series_equal(result, expected) -@pytest.mark.notimpl(["datafusion"]) +@pytest.mark.notimpl(["datafusion", "mssql"]) def test_timestamp_extract_epoch_seconds(backend, alltypes, df): expr = alltypes.timestamp_col.epoch_seconds().name('tmp') result = expr.execute() @@ -118,7 +124,7 @@ def test_timestamp_extract_epoch_seconds(backend, alltypes, df): backend.assert_series_equal(result, expected) -@pytest.mark.notimpl(["datafusion"]) +@pytest.mark.notimpl(["datafusion", "mssql"]) def test_timestamp_extract_week_of_year(backend, alltypes, df): expr = alltypes.timestamp_col.week_of_year().name('tmp') result = expr.execute() @@ -194,7 +200,7 @@ def test_timestamp_extract_week_of_year(backend, alltypes, df): ), ], ) -@pytest.mark.notimpl(["datafusion"]) +@pytest.mark.notimpl(["datafusion", "mssql"]) def test_timestamp_truncate(backend, alltypes, df, unit): expr = alltypes.timestamp_col.truncate(unit).name('tmp') @@ -230,7 +236,7 @@ def test_timestamp_truncate(backend, alltypes, df, unit): ), ], ) -@pytest.mark.notimpl(["datafusion"]) +@pytest.mark.notimpl(["datafusion", "mssql"]) def test_date_truncate(backend, alltypes, df, unit): expr = alltypes.timestamp_col.date().truncate(unit).name('tmp') @@ -281,7 +287,7 @@ def test_date_truncate(backend, alltypes, df, unit): ), ], ) -@pytest.mark.notimpl(["datafusion", "pyspark", "sqlite", "snowflake"]) +@pytest.mark.notimpl(["datafusion", "pyspark", "sqlite", "snowflake", "mssql"]) def test_integer_to_interval_timestamp( backend, con, alltypes, df, unit, displacement_type ): @@ -318,6 +324,7 @@ def convert_to_offset(offset, displacement_type=displacement_type): "sqlite", "snowflake", "polars", + "mssql", ] ) def test_integer_to_interval_date(backend, con, alltypes, df, unit): @@ -407,7 +414,7 @@ def convert_to_offset(x): ), ], ) -@pytest.mark.notimpl(["datafusion", "sqlite"]) +@pytest.mark.notimpl(["datafusion", "sqlite", "mssql"]) def test_temporal_binop(backend, con, alltypes, df, expr_fn, expected_fn): expr = expr_fn(alltypes, backend).name('tmp') expected = expected_fn(df, backend) @@ -442,7 +449,7 @@ def test_temporal_binop(backend, con, alltypes, df, expr_fn, expected_fn): ], ) @pytest.mark.notimpl( - ["clickhouse", "datafusion", "impala", "sqlite", "snowflake", "polars"] + ["clickhouse", "datafusion", "impala", "sqlite", "snowflake", "polars", "mssql"] ) def test_temporal_binop_pandas_timedelta( backend, con, alltypes, df, timedelta, temporal_fn @@ -467,6 +474,7 @@ def test_temporal_binop_pandas_timedelta( operator.ne, ], ) +@pytest.mark.notimpl(["mssql"]) def test_timestamp_comparison_filter(backend, con, alltypes, df, comparison_fn): ts = pd.Timestamp('20100302', tz="UTC").to_pydatetime() expr = alltypes.filter( @@ -480,7 +488,7 @@ def test_timestamp_comparison_filter(backend, con, alltypes, df, comparison_fn): backend.assert_frame_equal(result, expected) -@pytest.mark.notimpl(["datafusion", "sqlite", "snowflake"]) +@pytest.mark.notimpl(["datafusion", "sqlite", "snowflake", "mssql"]) def test_interval_add_cast_scalar(backend, alltypes): timestamp_date = alltypes.timestamp_col.date() delta = ibis.literal(10).cast("interval('D')") @@ -493,7 +501,7 @@ def test_interval_add_cast_scalar(backend, alltypes): @pytest.mark.never( ['pyspark'], reason="PySpark does not support casting columns to intervals" ) -@pytest.mark.notimpl(["datafusion", "sqlite", "snowflake"]) +@pytest.mark.notimpl(["datafusion", "sqlite", "snowflake", "mssql"]) def test_interval_add_cast_column(backend, alltypes, df): timestamp_date = alltypes.timestamp_col.date() delta = alltypes.bigint_col.cast("interval('D')") @@ -547,7 +555,7 @@ def test_interval_add_cast_column(backend, alltypes, df): ), ], ) -@pytest.mark.notimpl(["datafusion"]) +@pytest.mark.notimpl(["datafusion", "mssql"]) def test_strftime(backend, alltypes, df, expr_fn, pandas_pattern): expr = expr_fn(alltypes) expected = df.timestamp_col.dt.strftime(pandas_pattern).rename("formatted") @@ -577,7 +585,9 @@ def test_strftime(backend, alltypes, df, expr_fn, pandas_pattern): ), ], ) -@pytest.mark.notimpl(["datafusion", "mysql", "postgres", "sqlite", "snowflake"]) +@pytest.mark.notimpl( + ["datafusion", "mysql", "postgres", "sqlite", "snowflake", "mssql"] +) def test_integer_to_timestamp(backend, con, unit): backend_unit = backend.returned_timestamp_unit factor = unit_factors[unit] @@ -627,6 +637,7 @@ def test_integer_to_timestamp(backend, con, unit): 'impala', 'datafusion', 'snowflake', + 'mssql', ] ) def test_string_to_timestamp(alltypes, fmt, timezone): @@ -653,6 +664,7 @@ def test_string_to_timestamp(alltypes, fmt, timezone): 'datafusion', 'snowflake', 'polars', + 'mssql', ] ) def test_string_to_timestamp_tz_error(alltypes): @@ -676,7 +688,7 @@ def test_string_to_timestamp_tz_error(alltypes): param('2017-01-07', 5, 'Saturday', id="saturday"), ], ) -@pytest.mark.notimpl(["datafusion", "impala", "snowflake"]) +@pytest.mark.notimpl(["datafusion", "impala", "snowflake", "mssql"]) def test_day_of_week_scalar(con, date, expected_index, expected_day): expr = ibis.literal(date).cast(dt.date) result_index = con.execute(expr.day_of_week.index()) @@ -686,7 +698,7 @@ def test_day_of_week_scalar(con, date, expected_index, expected_day): assert result_day.lower() == expected_day.lower() -@pytest.mark.notimpl(["datafusion", "snowflake"]) +@pytest.mark.notimpl(["datafusion", "snowflake", "mssql"]) def test_day_of_week_column(backend, alltypes, df): expr = alltypes.timestamp_col.day_of_week @@ -717,7 +729,7 @@ def test_day_of_week_column(backend, alltypes, df): ), ], ) -@pytest.mark.notimpl(["datafusion"]) +@pytest.mark.notimpl(["datafusion", "mssql"]) def test_day_of_week_column_group_by( backend, alltypes, df, day_of_week_expr, day_of_week_pandas ): @@ -739,7 +751,7 @@ def test_day_of_week_column_group_by( backend.assert_frame_equal(result, expected, check_dtype=False) -@pytest.mark.notimpl(["datafusion", "snowflake"]) +@pytest.mark.notimpl(["datafusion", "snowflake", "mssql"]) def test_now(con): expr = ibis.now() result = con.execute(expr) @@ -769,7 +781,9 @@ def test_now_from_projection(alltypes): tm.assert_series_equal(ts.dt.year, year_expected) -@pytest.mark.notimpl(["pandas", "datafusion", "mysql", "dask", "pyspark", "snowflake"]) +@pytest.mark.notimpl( + ["pandas", "datafusion", "mysql", "dask", "pyspark", "snowflake", "mssql"] +) @pytest.mark.notyet(["clickhouse", "impala"]) def test_date_literal(con): expr = ibis.date(2022, 2, 4) @@ -777,7 +791,9 @@ def test_date_literal(con): assert result.strftime('%Y-%m-%d') == '2022-02-04' -@pytest.mark.notimpl(["pandas", "datafusion", "mysql", "dask", "pyspark", "snowflake"]) +@pytest.mark.notimpl( + ["pandas", "datafusion", "mysql", "dask", "pyspark", "snowflake", "mssql"] +) @pytest.mark.notyet(["clickhouse", "impala"]) def test_timestamp_literal(con): expr = ibis.timestamp(2022, 2, 4, 16, 20, 0) @@ -787,7 +803,9 @@ def test_timestamp_literal(con): assert result == '2022-02-04 16:20:00' -@pytest.mark.notimpl(["pandas", "datafusion", "mysql", "dask", "pyspark", "polars"]) +@pytest.mark.notimpl( + ["pandas", "datafusion", "mysql", "dask", "pyspark", "polars", "mssql"] +) @pytest.mark.notyet(["clickhouse", "impala"]) def test_time_literal(con): expr = ibis.time(16, 20, 0) @@ -797,7 +815,9 @@ def test_time_literal(con): assert result == '16:20:00' -@pytest.mark.notimpl(["pandas", "datafusion", "mysql", "dask", "pyspark", "snowflake"]) +@pytest.mark.notimpl( + ["pandas", "datafusion", "mysql", "dask", "pyspark", "snowflake", "mssql"] +) @pytest.mark.notyet(["clickhouse", "impala"]) def test_date_column_from_ymd(con, alltypes, df): c = alltypes.timestamp_col @@ -820,7 +840,7 @@ def test_date_scalar_from_iso(con): assert result.strftime('%Y-%m-%d') == '2022-02-24' -@pytest.mark.notimpl(["datafusion", "impala", "pyspark"]) +@pytest.mark.notimpl(["datafusion", "impala", "pyspark", "mssql"]) def test_date_column_from_iso(con, alltypes, df): expr = ( alltypes.year.cast('string') @@ -846,7 +866,7 @@ def test_timestamp_extract_milliseconds_with_big_value(con): assert result == 333 -@pytest.mark.notimpl(["datafusion"]) +@pytest.mark.notimpl(["datafusion", "mssql"]) @pytest.mark.broken( ["dask", "pandas"], reason="Pandas and Dask interpret integers as nanoseconds since epoch", @@ -873,7 +893,7 @@ def test_integer_cast_to_timestamp(backend, alltypes, df): ["pyspark"], reason="PySpark doesn't handle big timestamps", ) -@pytest.mark.notimpl(["snowflake"]) +@pytest.mark.notimpl(["snowflake", "mssql"]) def test_big_timestamp(con): # TODO: test with a timezone value = ibis.timestamp("2419-10-11 10:10:25") @@ -895,7 +915,7 @@ def build_date_col(t): ).cast("date") -@pytest.mark.notimpl(["datafusion"]) +@pytest.mark.notimpl(["datafusion", "mssql"]) @pytest.mark.notyet(["impala"], reason="impala doesn't support dates") @pytest.mark.parametrize( ("left_fn", "right_fn"), diff --git a/ibis/backends/tests/test_timecontext.py b/ibis/backends/tests/test_timecontext.py index c1501fba5daa..62c36b0b5b5a 100644 --- a/ibis/backends/tests/test_timecontext.py +++ b/ibis/backends/tests/test_timecontext.py @@ -18,6 +18,7 @@ "sqlite", "snowflake", "polars", + "mssql", ] ) diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py index 7e417db8092c..641be7be607f 100644 --- a/ibis/backends/tests/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -7,6 +7,10 @@ import ibis.expr.datatypes as dt from ibis.udf.vectorized import analytic, reduction +pytestmark = [ + pytest.mark.notimpl(["mssql"], reason="window functions aren't implemented") +] + @reduction(input_type=[dt.double], output_type=dt.double) def mean_udf(s): diff --git a/poetry-overrides.nix b/poetry-overrides.nix index 2fcee8cefc23..6baa33219647 100644 --- a/poetry-overrides.nix +++ b/poetry-overrides.nix @@ -176,4 +176,9 @@ in set +u ''; }); + + pymssql = super.pymssql.overridePythonAttrs (attrs: { + nativeBuildInputs = attrs.nativeBuildInputs or [ ] ++ [ self.setuptools ]; + buildInputs = attrs.buildInputs or [ ] ++ [ pkgs.libkrb5 ]; + }); } diff --git a/poetry.lock b/poetry.lock index ab864c314e54..45ef748d7614 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1774,6 +1774,14 @@ python-versions = ">=3.7" [package.dependencies] markdown = ">=3.2" +[[package]] +name = "pymssql" +version = "2.2.7" +description = "DB-API interface to Microsoft SQL Server for Python. (new Cython-based version)" +category = "main" +optional = true +python-versions = "*" + [[package]] name = "pymysql" version = "1.0.2" @@ -2556,6 +2564,7 @@ decompiler = ["black"] duckdb = ["duckdb", "duckdb-engine", "pyarrow", "sqlalchemy", "sqlglot"] geospatial = ["GeoAlchemy2", "geopandas", "Shapely"] impala = ["fsspec", "impyla", "requests", "sqlglot", "sqlalchemy"] +mssql = ["sqlalchemy", "pymssql", "sqlglot"] mysql = ["sqlalchemy", "pymysql", "sqlglot"] pandas = [] polars = ["polars", "pyarrow"] @@ -2568,7 +2577,7 @@ visualization = ["graphviz"] [metadata] lock-version = "1.1" python-versions = "^3.8" -content-hash = "20909063fc436ca922c611bd38e9513aba29197e3ad419a253089d04ca9fd43c" +content-hash = "975416ea2a8da7a5cc7a9b242f4842121e87b172f3932c1772764c8d0a666b1b" [metadata.files] absolufy-imports = [ @@ -3959,6 +3968,69 @@ pymdown-extensions = [ {file = "pymdown_extensions-9.7-py3-none-any.whl", hash = "sha256:767d07d9dead0f52f5135545c01f4ed627f9a7918ee86c646d893e24c59db87d"}, {file = "pymdown_extensions-9.7.tar.gz", hash = "sha256:651b0107bc9ee790aedea3673cb88832c0af27d2569cf45c2de06f1d65292e96"}, ] +pymssql = [ + {file = "pymssql-2.2.7-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:9a883def0ded86dc93cdb45dcbe924f79bd141e6bc39975d6077f88e156f3741"}, + {file = "pymssql-2.2.7-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:83ee4914bacecc715fcdb3cc22aedc8d9bf22f62e75802799fe9773b718fd41b"}, + {file = "pymssql-2.2.7-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2a8b1b903527f0f8c287582bfe01b28180f173583b8501914c1134659ead3c1d"}, + {file = "pymssql-2.2.7-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4aa12c836c307c80c1148eb190362bbbe178abc311e6715316b9950327af7a14"}, + {file = "pymssql-2.2.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70260e05717cd6d72a622ee29d06375fa44d58fe825d4964a63344ae34d80223"}, + {file = "pymssql-2.2.7-cp310-cp310-manylinux_2_24_i686.whl", hash = "sha256:c42a03cab7edd2bf6c4e075a9f1f7252151a4022216d7c85af4e4e4751f3bb14"}, + {file = "pymssql-2.2.7-cp310-cp310-manylinux_2_24_x86_64.whl", hash = "sha256:9bfb8f04b26d398f2fb7741733a33c7cfe418bbbf922703e5c4c409e86891785"}, + {file = "pymssql-2.2.7-cp310-cp310-win32.whl", hash = "sha256:0dbb905655f5976b94b6f899d4675ffdd460e7cb5516fba332cf0d77c15c2e9e"}, + {file = "pymssql-2.2.7-cp310-cp310-win_amd64.whl", hash = "sha256:2ce4f9fd604b9c7f9efad56afb3dcb2331c3c87bada172388f69d91297f20939"}, + {file = "pymssql-2.2.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8fe96bcbb26e7603ef63696f59fa19364c793aab25f2b606dc04d50917c7b35f"}, + {file = "pymssql-2.2.7-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:628611bc8cab379f8353ad29b93a07162254c9b75efb5fe5255ac855a8d3abe4"}, + {file = "pymssql-2.2.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:045029bed7cea6fcbc630e18f956f7ec6d1bde25c570019ff1f8f0e2b9abd5f0"}, + {file = "pymssql-2.2.7-cp311-cp311-manylinux_2_24_i686.whl", hash = "sha256:ad3c2e67fd04fb860ffb3affd068e109ef92488a74274347235df45664de4a27"}, + {file = "pymssql-2.2.7-cp311-cp311-manylinux_2_24_x86_64.whl", hash = "sha256:7099e45e91460ffec10e551830c722c27f207a41fd2267446a9b1a798e89d3bc"}, + {file = "pymssql-2.2.7-cp311-cp311-win32.whl", hash = "sha256:4dbe67d60472e18d01bcfba139f404f017ab9e9bd1b558d527befbb47dbd6486"}, + {file = "pymssql-2.2.7-cp311-cp311-win_amd64.whl", hash = "sha256:a9a40bf77792532fe643ee07ae0de930f6386c8593348baef07d76d1b2f48967"}, + {file = "pymssql-2.2.7-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:e6116a0389939bba789fb3fccdd976773cfce7d9cc48bf2eb933cdc2c8ce2b19"}, + {file = "pymssql-2.2.7-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:2b0415e6063b06234921d2d7d2b226cc68d197976f05b1547555ebfb3c446d01"}, + {file = "pymssql-2.2.7-cp36-cp36m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:d84a0fe84dda22dd50efd9ef9f68349a9df88edeb1c719e1545961e7bb74c27c"}, + {file = "pymssql-2.2.7-cp36-cp36m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9eeff70c3af730fee19406dda11a7cef0e70e397d53f7c2edb13bd11d8e3b1b5"}, + {file = "pymssql-2.2.7-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4dd8ae8b5bc7dd78af8b886721c9b586b5269fea4f0e425c64ee2444356cb292"}, + {file = "pymssql-2.2.7-cp36-cp36m-manylinux_2_24_i686.whl", hash = "sha256:a061af4df57863abee1a8a87cad357f660294e038ef9a3375e258c10f982164c"}, + {file = "pymssql-2.2.7-cp36-cp36m-manylinux_2_24_x86_64.whl", hash = "sha256:1b723fccf11caf57cb44051e83955f170d2cad8ad931cbb4ab97d263691c4bd5"}, + {file = "pymssql-2.2.7-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:016d1f903b0bd9a7f094712668bcf9fa86ef305fba4b933d182c152043706191"}, + {file = "pymssql-2.2.7-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:bf4640b04663e0296d8562ba835bd8636ca763495ece0fc023a2192adcfacdb2"}, + {file = "pymssql-2.2.7-cp36-cp36m-win32.whl", hash = "sha256:9a5a554af18e803a2532a8232817b0904cb7cb6d8c1a1cf716fe6a5f568a1111"}, + {file = "pymssql-2.2.7-cp36-cp36m-win_amd64.whl", hash = "sha256:1c0b7ed54b38ba2a59695dd9d0adba6a144ac37de459d514668b18e45f5a232d"}, + {file = "pymssql-2.2.7-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:f26de948303c2146089c1a5f8c4c5c46e6fd21b8b6b550c19c1f056d87ab112d"}, + {file = "pymssql-2.2.7-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:97760db6df17327ebedd58a93d7cd5c2c453faa517bc9bdfbe19ad1ff66b96a5"}, + {file = "pymssql-2.2.7-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:5ce2089b5b88a56eb599118b4f9a1b119e9056e85f8c6cb3002e44493181dd76"}, + {file = "pymssql-2.2.7-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:597563146e4ab088ee907c836075b9300541c16eef9791f4fbdfe6100894d512"}, + {file = "pymssql-2.2.7-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6cd3ee322daf8fcbb6e58deb21daa4adceea658e433eef3d3cae8c5be5049086"}, + {file = "pymssql-2.2.7-cp37-cp37m-manylinux_2_24_i686.whl", hash = "sha256:18b611ee72c5f4095cd8e942047982e92ab4d2d2ce5a457b85ef03bb8e385e7e"}, + {file = "pymssql-2.2.7-cp37-cp37m-manylinux_2_24_x86_64.whl", hash = "sha256:2d97127604bfde669cfc6e14f03536925e1a446d2bf4b7f3c7d671be07801361"}, + {file = "pymssql-2.2.7-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:16a281b556975d4c79cad6d41e902aba32017351aebfa4ede30581e00e89b1c1"}, + {file = "pymssql-2.2.7-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:7e9a277352a5a081a20107e112c7b820ecb76c2320779d1fc15b783110a2c1f5"}, + {file = "pymssql-2.2.7-cp37-cp37m-win32.whl", hash = "sha256:e06e6c189821fe259764dd8c61551ebcc2e5ec3752d06f850e79b520c2e92998"}, + {file = "pymssql-2.2.7-cp37-cp37m-win_amd64.whl", hash = "sha256:4306f74b4b19acc367b4bf6afb5ef961d35362f416622ae24a73035f75cfcdee"}, + {file = "pymssql-2.2.7-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:26eb3bb6f4b6a57e2f7e2639179914aa5c962522ccd68f5aecb0190e8d34893f"}, + {file = "pymssql-2.2.7-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:28151def07dc86e3b44dc0759ce130e56ebbab17b41c01458fc217678eccce31"}, + {file = "pymssql-2.2.7-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:51855d2f63e20f4d2ed6986d0f10cc03f341f959638e60d041a1ddb5a95d00fd"}, + {file = "pymssql-2.2.7-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bbf6717d85b62b95b9c96f3dd12166297dc9cef4f0887534d62c6a00c85bba4e"}, + {file = "pymssql-2.2.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:429491158fbee5309bd18b15d6fb29ad986b91afef4d05db5640fa7206d0d338"}, + {file = "pymssql-2.2.7-cp38-cp38-manylinux_2_24_i686.whl", hash = "sha256:3067423fb4cbf476c8d465fe5b7f081d3508524c1f4907b961a4c69af4280454"}, + {file = "pymssql-2.2.7-cp38-cp38-manylinux_2_24_x86_64.whl", hash = "sha256:1fc8a4b7d1a4146db70b5fbec3511bcfccb7b34d22a2aba89427bf55f8e44e23"}, + {file = "pymssql-2.2.7-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6f873e481f7175bed246f756e250778ca723e52ec14bd9cb2bb0cfaeea237868"}, + {file = "pymssql-2.2.7-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:0da65a89c95dc6336281c6a84f67abece9d50350dfd9b1c574b04aeb7148967d"}, + {file = "pymssql-2.2.7-cp38-cp38-win32.whl", hash = "sha256:4c9b337972377cabe4782e3cb4fae95b328305b0815392004a330314f3441fd8"}, + {file = "pymssql-2.2.7-cp38-cp38-win_amd64.whl", hash = "sha256:46271abb5a657004c220a4675f4365978e1b67e826de5b98a2c06855e9816e17"}, + {file = "pymssql-2.2.7-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:ec7889c696f2cc27d17af86e21062d032d795bf81e48802820a69cfeb740667c"}, + {file = "pymssql-2.2.7-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:25c330fab365174a29f7b5d77b03c05836ee3d39e135fad7d66380b5d5b99911"}, + {file = "pymssql-2.2.7-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c638398a023471ebde4774e2f8e5237bed07e7f934c4142c6d8e63ed42a86db1"}, + {file = "pymssql-2.2.7-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aa413e4fa34c53b6cfaaf294ca9070bbce1c52e5b284b35ce8e2bfbfaeae9d96"}, + {file = "pymssql-2.2.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:319e0dabd35ddb3e20798e4dc1ed6a8f8038101deafd7aabf531c0c6eaedeb5d"}, + {file = "pymssql-2.2.7-cp39-cp39-manylinux_2_24_i686.whl", hash = "sha256:8d8a13e89483891afabf67211453eab7c8d5f73379ed77c21160a672d3a818fb"}, + {file = "pymssql-2.2.7-cp39-cp39-manylinux_2_24_x86_64.whl", hash = "sha256:56916753f74ffa1e3b89483ce529ba13fd42944636558099b173b5343815fb0e"}, + {file = "pymssql-2.2.7-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:084a1573a5e4a10e7ad6e978f98ad3cc9704fc844beec4275aab1ff691533712"}, + {file = "pymssql-2.2.7-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6ed58a251e3aaffe4c731adad7d1468593dcd45f19375f1501f2bf8a54e1e355"}, + {file = "pymssql-2.2.7-cp39-cp39-win32.whl", hash = "sha256:78884588abfc44e99e3eaec46e19f5b08854af66eae9719a87a63b4645cf49b1"}, + {file = "pymssql-2.2.7-cp39-cp39-win_amd64.whl", hash = "sha256:cfa2bf7b8f7f462f72b2fa78b7753fc6c86a660dbea57d663993716afbb05072"}, + {file = "pymssql-2.2.7.tar.gz", hash = "sha256:ff95b910532ec7b02e4322231c117d3d6af0abab667e6fbf15442db873943045"}, +] pymysql = [ {file = "PyMySQL-1.0.2-py3-none-any.whl", hash = "sha256:41fc3a0c5013d5f039639442321185532e3e2c8924687abe6537de157d403641"}, {file = "PyMySQL-1.0.2.tar.gz", hash = "sha256:816927a350f38d56072aeca5dfb10221fe1dc653745853d30a216637f5d7ad36"}, diff --git a/pyproject.toml b/pyproject.toml index e0d05dee3cd6..81a48c852a6e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,6 +55,7 @@ lz4 = { version = ">=3.1.10,<5", optional = true } polars = { version = ">=0.14.18,<1", optional = true } psycopg2 = { version = ">=2.8.4,<3", optional = true } pyarrow = { version = ">=1,<10", optional = true } +pymssql = { version = ">=2.2.7,<3", optional = true } pymysql = { version = ">=1,<2", optional = true } pyspark = { version = ">=3,<4", optional = true } requests = { version = ">=2,<3", optional = true } @@ -119,6 +120,7 @@ datafusion = ["datafusion"] duckdb = ["duckdb", "duckdb-engine", "pyarrow", "sqlalchemy", "sqlglot"] geospatial = ["geoalchemy2", "geopandas", "shapely"] impala = ["fsspec", "impyla", "requests", "sqlglot", "sqlalchemy"] +mssql = ["sqlalchemy", "pymssql", "sqlglot"] mysql = ["sqlalchemy", "pymysql", "sqlglot"] pandas = [] polars = ["polars", "pyarrow"] @@ -136,6 +138,7 @@ datafusion = "ibis.backends.datafusion" duckdb = "ibis.backends.duckdb" impala = "ibis.backends.impala" mysql = "ibis.backends.mysql" +mssql = "ibis.backends.mssql" pandas = "ibis.backends.pandas" polars = "ibis.backends.polars" postgres = "ibis.backends.postgres" @@ -252,6 +255,7 @@ markers = [ "duckdb: DuckDB tests", "impala: Apache Impala tests", "mysql: MySQL tests", + "mssql: MS SQL Server tests", "pandas: Pandas tests", "polars: Polars tests", "postgres: PostgreSQL tests", diff --git a/shell.nix b/shell.nix index 284f7e818af1..08d3e5273a0d 100644 --- a/shell.nix +++ b/shell.nix @@ -66,4 +66,5 @@ pkgs.mkShell { PGPASSWORD = "postgres"; MYSQL_PWD = "ibis"; + MSSQL_SA_PASSWORD = "1bis_Testing!"; }