In [None]:
!pip install  -q 'ibis-framework[mssql,datafusion,pyspark,duckdb,deltalake]'
%pip install  -q altair

In [2]:
#switch between duckdb,datafusion , pyspark and mssql , spark sql require the lakehouse to be attached
workspace = 'pythononly'
lakehouse = 'NY'
engine    = "mssql"

**_<u><mark>Get connection</mark></u>_**

In [3]:
import ibis, sqlglot
def get_ibis_connection(workspace ,lakehouse ,engine):
    import ibis
    import sempy.fabric as fabric
    workspaceID = fabric.resolve_workspace_id(workspace)
    data = notebookutils.lakehouse.getWithProperties(name= lakehouse, workspaceId=workspaceID)
    lakehouseID = data['id']
    if engine =='mssql':
        import struct , pyodbc , ibis.backends.mssql
        dwh_backend = ibis.backends.mssql.Backend()
        sql_endpoint = data['properties']['sqlEndpointProperties']['connectionString']
        token = notebookutils.credentials.getToken('https://analysis.windows.net/powerbi/api').encode("UTF-16-LE")
        token_struct = struct.pack(f'<I{len(token)}s', len(token), token)
        dwh_backend.con = pyodbc.connect(f"Driver={{ODBC Driver 18 for SQL Server}};Server={sql_endpoint},1433;Encrypt=Yes;TrustServerCertificate=No", attrs_before={1256: token_struct})
        dwh_backend.raw_sql(f" use {lakehouse};")
        return dwh_backend
    elif engine =='pyspark':
        return ibis.pyspark.connect()
    else:
        access_token     = notebookutils.credentials.getToken('storage')
        storage_options  = {"bearer_token": access_token, "use_fabric_endpoint": "true"} 
        con = getattr(ibis, engine).connect()
        base_path = f'abfss://{workspaceID}@onelake.dfs.fabric.microsoft.com/{lakehouseID}/Tables'
        list_db = [item.name for item in  notebookutils.fs.ls(base_path)]
        for db in list_db:
           con.create_database(db,force =False)
           for table in [item.name for item in  notebookutils.fs.ls(base_path +f'/{db}')]:
             t= con.read_delta(base_path+f'/{db}/{table}',table,storage_options = storage_options)
             con.create_view(table, t, database=f'{db}')      
        return con
def sql_to_df(query,engine,dialect):
    con = get_ibis_connection(workspace =workspace,lakehouse = lakehouse,engine = engine)
    if engine =='mssql':
       engine = 'tsql'
    standard_sql = sqlglot.transpile(query, read= dialect, write=engine)[0]
    if engine =='tsql':
        import pyarrow as pa
        cursor = con.raw_sql(standard_sql)
        columns = [column[0] for column in cursor.description]
        data = cursor.fetchall()
        columnar_data = list(zip(*data))
        arrow_table = pa.Table.from_arrays([pa.array(col) for col in columnar_data], columns)
        return arrow_table
    else:
       return  con.sql(standard_sql).to_pyarrow()

# SQL

In [4]:
sql = """ 
             SELECT  date , SUM(fare_amount) AS totalfares ,
             AVG(fare_amount) AS avgfares from
             ny.taxi group by date
      """
data = sql_to_df(sql,engine, dialect="mssql")

# Viz

In [5]:
import altair as alt
brush = alt.selection_interval()
details = alt.Chart(data).mark_bar().encode(alt.X('date:T'),  alt.Y('totalfares:Q'), tooltip=[alt.Tooltip('date:T',format='%Y-%m-%d %H'),'totalfares:Q']
).properties( width=1400,  height=400 ).add_params( brush)

summary = alt.Chart(data).mark_square().encode( alt.X('date:T'), alt.Y('avgfares:Q'), tooltip=['avgfares:Q'] ).properties(  width=1400,  height=400).transform_filter( brush)
details & summary

# Dataframe API

In [6]:
con = get_ibis_connection(workspace =workspace,lakehouse = lakehouse,engine = engine )

In [7]:
con.list_databases()

['INFORMATION_SCHEMA',
 '_rsc',
 'db_accessadmin',
 'db_backupoperator',
 'db_datareader',
 'db_datawriter',
 'db_ddladmin',
 'db_denydatareader',
 'db_denydatawriter',
 'db_owner',
 'db_securityadmin',
 'dbo',
 'guest',
 'ny',
 'queryinsights',
 'sys']

In [8]:
taxi = con.table(name="taxi", database='ny')
agg = taxi.filter(taxi.year == 2024).group_by("date").aggregate(totalfares=taxi.fare_amount.sum(), avgfares=taxi.fare_amount.mean())
data = agg.to_pyarrow()