In [1]:
#!pip install     pyodbc
#!pip install     cachetools

In [2]:
endpoint = '2ddwq77iortevbfx7it6gysrxa-flcbzbaq54pulgpa4k3xjby76m.datawarehouse.fabric.microsoft.com'

**_<u><mark>Get connection</mark></u>_**

In [3]:
from cachetools import cached, LRUCache
import pyodbc 
import struct 
import configparser
import pyarrow as pa

In [4]:
def get_ibis_connection(endpoint ):
    try:
        token = notebookutils.credentials.getToken('https://analysis.windows.net/powerbi/api').encode("UTF-16-LE")
        token_struct = struct.pack(f'<I{len(token)}s', len(token), token)
        connection_string = f"Driver={{ODBC Driver 18 for SQL Server}};Server={endpoint};1433;Encrypt=Yes;TrustServerCertificate=No, attrs_before={1256: token_struct}"
    except:
        config = configparser.ConfigParser()
        config.read("C:/KV/variable.ini")
        client_id     =config.get("myvars", "appId")
        client_secret =config.get("myvars", "secret")
        connection_string = f"Driver={{ODBC Driver 18 for SQL Server}};Server={endpoint};Authentication=ActiveDirectoryServicePrincipal;UID={client_id};PWD={client_secret};ConnectRetryCount=0"
    con = pyodbc.connect(connection_string)
    return con
@cached(cache=LRUCache(maxsize=32))
def sql_to_df(query,con):
    cursor = con.execute(query)
    columns = [column[0] for column in cursor.description]
    data = cursor.fetchall()
    columnar_data = list(zip(*data))
    arrow_table = pa.Table.from_arrays([pa.array(col) for col in columnar_data], columns)
    return arrow_table

# Analysis

In [5]:
con = get_ibis_connection(endpoint)
sql_to_df.cache_clear()


In [6]:
%%time
sql = """   
            SELECT  top 5000
            date ,
            SUM(fare_amount) AS totalfares ,
            AVG(fare_amount) AS avgfares
            from NYT.ny.taxi 
            group by date
            order by date desc
      """
sql_to_df(sql,con).to_pandas()

CPU times: total: 125 ms
Wall time: 29.8 s


Unnamed: 0,date,totalfares,avgfares
0,2024-06-30,91916.48,19.213311
1,2024-06-29,1760749.29,19.429815
2,2024-06-28,1987240.99,20.890838
3,2024-06-27,2366851.00,21.807863
4,2024-06-26,2250307.29,20.010380
...,...,...,...
4925,2011-01-05,4166602.43,9.526951
4926,2011-01-04,3949301.41,9.743615
4927,2011-01-03,3710331.64,9.935603
4928,2011-01-02,1652479.92,10.484214


In [7]:
%%time
sql_to_df(sql,con).to_pandas()

CPU times: total: 0 ns
Wall time: 1.01 ms


Unnamed: 0,date,totalfares,avgfares
0,2024-06-30,91916.48,19.213311
1,2024-06-29,1760749.29,19.429815
2,2024-06-28,1987240.99,20.890838
3,2024-06-27,2366851.00,21.807863
4,2024-06-26,2250307.29,20.010380
...,...,...,...
4925,2011-01-05,4166602.43,9.526951
4926,2011-01-04,3949301.41,9.743615
4927,2011-01-03,3710331.64,9.935603
4928,2011-01-02,1652479.92,10.484214


using SQL Endpoint respect data masking policy, RLS/CLS

In [8]:
sql = """   
            SELECT * FROM dwh.dbo.EmployeeData;
      """
sql_to_df(sql,con).to_pandas()

Unnamed: 0,EmployeeID,FirstName,LastName,SSN,email
0,2,F-me,xxxx,XXX-XX-0000,email2@youremail2.com
1,1,T-me,xxxx,XXX-XX-6789,email@youremail.com
