### Parameters to update
- **FabricDWWorkspaceName**: The name of the workspace that the warehouse exists.
- **FabricDWName**: The name of the warehouse.
- **ConcurrnecyNum**: The number of queries that will be executings in parallel.
- **CapacityMetricsWorkspace**: The name of the workspace that the capacity metrics semantic model exists.
- **CapacityMetricsDataset**: The name of the capacity metrics app semenatic model.
- **StoreQueryResults**: Flag to set if the results of the queries will be stored in the query results table.
- **QueryRepeatCount**: Number of times a query will run (should be between 1 and 4) eg. QueryRepeatCount = 4 and queryList = [query1, query2] will become [query1, query1, query1, query1, query2, query2, query2]
- **RunName**: The name of the run. If not specified, one will be generated with the following format. '*Run_{yyyyMMdd}_{hhmmss}*'
- **QueryList**: A list of queries to be executed against the sql endpoint.

In [None]:
FabricDWWorkspaceName = ''
FabricDWName = 'WH_SampleData'
ConcurrencyNum = 1 # The number of workers that will be executing queries at once. Every worker will execute each query defined in the query list.
CapacityMetricsWorkspace = 'Microsoft Fabric Capacity Metrics'
CapacityMetricsDataset = 'Fabric Capacity Metrics'
StoreQueryResults = False
QueryRepeatCount = 4 # Number of times a query will run eg. queryRepeatCount = 4, queryList = [query1, query2] will become [query1, query1, query1, query1, query2, query2, query2]
RunName = '' # The name of the run. If not specified, one will be generated with the following format. '*Run_{yyyyMMdd}_{hhmmss}*'
QueryList = '' # A list of queries to be executed. Eg. "['SELECT COUNT(*) FROM tblA', 'SELECT 1 AS a']"

In [None]:
import datetime
RunName = RunName if RunName else f"Run_{datetime.datetime.now(datetime.timezone.utc).strftime('%Y%m%d_%H%M%S')}"
print(f"{FabricDWWorkspaceName=}")
print(f"{FabricDWName=}")
print(f"{ConcurrencyNum=}")
print(f"{CapacityMetricsWorkspace=}")
print(f"{CapacityMetricsDataset=}")
print(f"{StoreQueryResults=}")
print(f"{RunName=}")
print(f"{QueryList=}")

In [None]:
import ast

if len(QueryList) == 0: 
    queryList = [
        # Example - Query
        'SELECT COUNT(*) FROM FactTransaction'
        ,'''SELECT	COUNT(*) AS TotalTransactions
            FROM	FactTransaction AS ft
            JOIN	DimDate AS d
            ON		d.Date = ft.DateKey
            JOIN	DimPaymentMethod AS pm
            ON		pm.PaymentMethodKey= ft.PaymentMethodKey
            JOIN	DimTransactionType AS tt
            ON		tt.TransactionTypeKey = ft.TransactionTypeKey
            JOIN	DimSupplier AS s
            ON		s.SupplierKey = ft.SupplierKey
            JOIN	DimCustomer AS cu
            ON		cu.CustomerKey = ft.CustomerKey
            JOIN	DimCustomer AS cuBill
            ON		cuBill.CustomerKey = ft.BillToCustomerKey'''
        # Example - Stored Procedures
        ,'''EXEC sp_Ingest'''
        ,'''EXEC sp_Query'''
        # Example - Query with multiple statements
        ,'''IF OBJECT_ID('dbo.DimDate', 'U') IS NOT NULL DROP TABLE dbo.DimDate; CREATE TABLE dbo.DimDate AS SELECT * FROM LH_SampleData.dbo.DimDate'''
    ]
else:
    queryList = ast.literal_eval(QueryList.replace('\\n', ' ').replace('\\t', ' '))

In [None]:
import math

executorCoreCnt = int(spark.conf.get('spark.executor.cores', '4'))
executorInstances = len(spark._jsc.sc().statusTracker().getExecutorInfos()) - 1
executorsRequired = math.ceil(ConcurrencyNum / (executorCoreCnt * executorInstances))
maxConcurrency = ConcurrencyNum if ConcurrencyNum < (executorCoreCnt * executorInstances) else (executorCoreCnt * executorInstances)

# Adding queries to the queryList so that each queries executes 4 times sequentially.
# Example, original queryList = [query_1, query_2] becomes [query_1, query_1, query_1, query_1, query_2, query_2, query_2, query_2]
queryListWithRepeat = []
for i, query in enumerate(queryList):
    for n in range(QueryRepeatCount):
        queryListWithRepeat.extend([{'QueryUniqueNum': i+1, 'QueryRepeatNum': n+1, 'Query': query}])

# Multipling the number of queries by the maxConcurrency to create a pool of queries to be executed.
# This will make it so that each number of concurrency will execute every query in the queryList.
# Eg. If the queryList has 10 queries and the maxConcurrency is 5, then the queryPool will have 50 queries.
queryPool = queryListWithRepeat * maxConcurrency

rddQueries = sc.parallelize(queryPool, maxConcurrency)
rddQueriesWithId = rddQueries.zipWithUniqueId().map(lambda x: [x[1], (x[1], x[0].get('QueryUniqueNum'), x[0].get('QueryRepeatNum'), x[0].get('Query'))] )
rddQueriesWithId = rddQueriesWithId.partitionBy(maxConcurrency, lambda k: k ) 
print(rddQueriesWithId.glom().map(len).collect())  # Check the length of each partition to check for even distribution of rows in the partitions. This will tell us if the number of queries are evenly distributed

displayHTML(f"""
<p><span style="font-size:20px;"><strong>Max concurrency of spark session is </strong><i><strong>{executorCoreCnt * executorInstances}</strong></i></span></p>
<p><span style="font-size:20px;"><strong>Defined concurrency is </strong><i><strong>{ConcurrencyNum}</strong></i></span></p>
<p><span style="font-size:20px;"><strong>Will run </strong><i><strong>{maxConcurrency}</strong></i><strong> queries concurrently for this spark session</strong></span></p>
<p><span style="font-size:20px;"><strong>A total of <i>{len(queryPool)}</i> queries will be executed (queryList size <i>{len(queryList)}</i> * max concurrency <i>{maxConcurrency}</i> * query repeat count <i>{QueryRepeatCount}</i>)</strong></span></p>
""")

if executorsRequired < executorInstances:
    displayHTML(f"""
    <p><span style="font-size:17px;"><strong><i>cell 1</i><br><pre><code>%%configure -f<br>{{"conf": {{"spark.dynamicAllocation.minExecutors": {executorsRequired}}}}}</code></pre>
        <br><hr style="border-bottom: dotted 1px #000" /><i>cell 2</i><br><br><pre><code>import time, datetime

ExecutorsRequired = {executorsRequired}
endDateTime = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(minutes=10)
executorsReadyIterCnt = 0
while datetime.datetime.now(datetime.timezone.utc) < endDateTime:
    executorAvailableCnt = len([executor.host() for executor in spark._jsc.sc().statusTracker().getExecutorInfos()]) -1
    print(f'{{datetime.datetime.now().replace(microsecond=0)}} | {{ExecutorsRequired=}} | {{executorAvailableCnt=}} | {{executorsReadyIterCnt=}}', end='\\r')
    if executorAvailableCnt == ExecutorsRequired: # check if the required number of executors are ready
        executorsReadyIterCnt += 1
        if executorsReadyIterCnt == int(30/5)+1: # check 6 times (30 seconds) for executors to be ready and stable
            break
    else:
        executorsReadyIterCnt = 0
    time.sleep(5)</code></pre>
    </strong></span></p>
    """)

##### Get the workspace Id and sql endpoint name to run the queries against

In [None]:
import requests

header = {'Authorization': f'Bearer {mssparkutils.credentials.getToken("pbi")}'
          ,"Content-Type": "application/json"
          }

response = requests.request(method='get', url=f'https://api.fabric.microsoft.com/v1/workspaces', headers=header)

while True:
    workspaceFound = False
    for workspace in response.json().get('value'):
        if workspace.get('displayName') == FabricDWWorkspaceName:
            fabricDWWorkspaceId = workspace.get('id')
            workspaceFound = True
            break
    
    if workspaceFound:
        break
    elif workspaceFound == False and response.json().get('continuationToken'):
        responseStatus = requests.request(method='get', url=response.json().get('continuationUri'), headers=header)
    else:
        print(f"Workspace was not found and no contination token found - {response.json()}")
        break

print(f'{fabricDWWorkspaceId = }\n{FabricDWWorkspaceName = }')

##### Get the artifact type of the sql endpoint (lakehouse or warehouse)

In [None]:
import requests, json

response = requests.request(method='get', url=f'https://api.fabric.microsoft.com/v1/workspaces/{fabricDWWorkspaceId}/items', headers=header)

for item in response.json().get('value'):
    if item.get('displayName') == FabricDWName:
        itemType = item.get('type')
        
print(f'{itemType = }')

##### The capacity information that the sql endpoint is associated to

In [None]:
import requests

header = {'Authorization': f'Bearer {mssparkutils.credentials.getToken("pbi")}'
          ,"Content-Type": "application/json"
          }

response = requests.request(method='get', url=f'https://api.fabric.microsoft.com/v1/workspaces/{fabricDWWorkspaceId}', headers=header)
workspaceName = response.json().get('displayName')

response = requests.request(method='get', url=f'https://api.fabric.microsoft.com/v1/workspaces/{fabricDWWorkspaceId}', headers=header)
capacityId = response.json().get('capacityId')
capacityRegion = response.json().get('capacityRegion')
capacityName = response.json().get('displayName')
capacitySku = 'F0' #Default value of F0

response = requests.request(method='get', url=f'https://api.fabric.microsoft.com/v1/capacities', headers=header)
for capacity in response.json().get('value'):
    if capacity.get('id') == capacityId:
        capacitySku = capacity.get('sku')

if itemType == 'Lakehouse':
    response = requests.request(method='get', url=f'https://api.fabric.microsoft.com/v1/workspaces/{fabricDWWorkspaceId}/lakehouses', headers=header)
    warehouse = [warehouse for warehouse in response.json().get('value') if warehouse.get('displayName') == FabricDWName][0]
    fabricDWServer = warehouse.get('properties').get('sqlEndpointProperties').get('connectionString')
else:
    response = requests.request(method='get', url=f'https://api.fabric.microsoft.com/v1/workspaces/{fabricDWWorkspaceId}/warehouses', headers=header)
    warehouse = [warehouse for warehouse in response.json().get('value') if warehouse.get('displayName') == FabricDWName][0]
    fabricDWServer = warehouse.get('properties').get('connectionString')
    
warehouseId = warehouse.get('id')

print(f'{warehouseId = }\n{fabricDWServer = }\n{workspaceName = }\n{capacityId = }\n{capacityRegion = }\n{capacityName = }\n{capacitySku = }')

##### Get the latest Fabric capacity cost for the region the sql endpoint exists

In [None]:
response = requests.request(method='get', url="https://prices.azure.com/api/retail/prices?$filter=skuName eq 'Fabric Capacity'", headers=header)
for capacity in response.json().get('Items'):
    if capacity.get('armRegionName') == capacityRegion.replace(' ', '').lower():
        costReserved = capacity.get('retailPrice') / 12 / 730 / 60 / 60 # get the amount per CU second
        costPayGo = costReserved / (156.334/262.80) # constant saving of ~41%. 156.334 is the resevered price of a region. 262.80 is the paygo price of a region
print(f'{costReserved = :.10f}\n{costPayGo = :.10f}') # per CU

##### Define the queries to be executed. These are single line queries so use /* */ for commenting out code vs --

In [None]:
from notebookutils import mssparkutils  
from pyspark.sql import functions as F
from pyspark.sql import Row
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, LongType, TimestampType, DecimalType
import pyodbc, struct, itertools, time, datetime, re, uuid, json

def sqlendpoint_get_token():
    # Use the credentials of the user executing the notebook
    token = bytes(mssparkutils.credentials.getToken('pbi'), "UTF-8")
    encoded_bytes = bytes(itertools.chain.from_iterable(zip(token, itertools.repeat(0))))
    tokenstruct = struct.pack("<i", len(encoded_bytes)) + encoded_bytes
    
    return tokenstruct

connectionString = f'DRIVER={{ODBC Driver 18 for SQL Server}};SERVER={fabricDWServer};Database={FabricDWName};APP=QueryCostAnalyzer'

runId = str(uuid.uuid4()).upper()

print(f'{runId = }')

##### Get information from the sql endpoint and as into the RunResults table

In [None]:
from delta.tables import *
from pyspark.sql.functions import col
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, LongType, TimestampType, DecimalType

tokenstruct = sqlendpoint_get_token()

with pyodbc.connect(connectionString, attrs_before = { 1256:tokenstruct }) as conn:
    with conn.cursor() as cursor:
        cursor.execute('''SELECT @@VERSION AS DWVersion
                            ,@@SERVERNAME AS ServerGuid
                            ,DB_NAME() AS DWName
        ''')
        resultList = cursor.fetchall()
        resultColumns = [column[0] for column in cursor.description]
        cursor.commit()
        resultSet = [dict(zip(resultColumns, [str(col) for col in row])) for row in resultList]

        cursor.execute(f'''SELECT [is_vorder_enabled] AS IsVOrderEnabled, [data_lake_log_publishing_desc] AS DataLakeLogPublishingDesc
                            ,[data_lake_log_publishing] AS DataLakeLogPublishing, [create_date] AS DWCreateDate, [compatibility_level] AS CompatibilityLevel
                            FROM sys.databases 
                            WHERE [name] = '{FabricDWName}'
            ''')

        resultList = cursor.fetchall()
        resultColumns = [column[0] for column in cursor.description]
        cursor.commit()
        resultSet = resultSet[0] | [dict(zip(resultColumns, [str(col) for col in row])) for row in resultList][0]
        df = spark.createDataFrame([resultSet])

dfRunOrdered = df.select(
    F.lit(RunName).alias('RunName').cast(StringType())
    ,F.lit(runId).alias('RunId').cast(StringType())
    ,F.lit(len(queryPool)).alias('QueriesExecutedCnt').cast(IntegerType())
    ,F.lit(maxConcurrency).alias('RunConcurrency').cast(IntegerType())
    ,F.lit(QueryRepeatCount).alias('QueryRepeatCount').cast(IntegerType())
    ,F.lit(StoreQueryResults).alias('StoreQueryResults').cast(StringType())
    ,F.lit(itemType).alias('ItemType').cast(StringType())
    ,col('DWName').cast(StringType())
    ,col('ServerGuid').cast(StringType())
    ,F.lit(warehouseId).alias('DWGuid').cast(StringType())
    ,F.lit(fabricDWServer).alias('DWConnectionString').cast(StringType())
    ,col('DWVersion').cast(StringType())
    ,col('CompatibilityLevel').cast(StringType())
    ,col('DWCreateDate').cast(StringType())
    ,col('DataLakeLogPublishing').cast(StringType())
    ,col('DataLakeLogPublishingDesc').cast(StringType())
    ,col('IsVOrderEnabled').cast(StringType())
    ,F.lit(workspaceName).alias('WorkspaceName').cast(StringType())
    ,F.lit(fabricDWWorkspaceId).alias('WorkspaceGuid').cast(StringType())
    ,F.lit(capacityName).alias('CapacityName').cast(StringType())
    ,F.lit(capacityId).alias('CapacityGuid').cast(StringType())
    ,F.lit(capacitySku).alias('CapacitySKU').cast(StringType())
    ,F.lit(capacityRegion).alias('CapacityRegion').cast(StringType())
    ,F.lit(None).alias('RunStartDateTimeUTC').cast(TimestampType())
    ,F.lit(None).alias('RunStartDateTimeEpochMS').cast(LongType())
    ,F.lit(None).alias('RunEndDateTimeUTC').cast(TimestampType())
    ,F.lit(None).alias('RunEndDateTimeEpochMS').cast(LongType())
    ,F.lit(None).alias('RunDurationMS').cast(LongType())
    ,F.lit(None).alias('RunCUSeconds').cast(DecimalType(38,19))
    ,F.lit(None).alias('RunCostPayGo').cast(DecimalType(38,19))
    ,F.lit(None).alias('RunCostReserved').cast(DecimalType(38,19))
    ,F.lit(None).alias('RunDataScannedDiskMB').cast(DecimalType(38,19))
    ,F.lit(None).alias('RunDataScannedMemoryMB').cast(DecimalType(38,19))
    ,F.lit(None).alias('RunDataScannedRemoteStorageMB').cast(DecimalType(38,19))
    ,F.lit(None).alias('RunAllocatedCpuTimeMS').cast(LongType())
    ,F.lit(60*60*24*int(64 if ''.join([str(i) for i in capacitySku if i.isdigit()]) == 1 else ''.join([str(i) for i in capacitySku if i.isdigit()])) ).alias('CapacityDailyCUSeconds').cast(IntegerType())
    ,F.lit(costPayGo * 60*60*24*int(64 if ''.join([str(i) for i in capacitySku if i.isdigit()]) == 1 else ''.join([str(i) for i in capacitySku if i.isdigit()])) ).alias('CapacityDailyCostPayGo').cast(DecimalType(38,19))
    ,F.lit(costReserved * 60*60*24*int(64 if ''.join([str(i) for i in capacitySku if i.isdigit()]) == 1 else ''.join([str(i) for i in capacitySku if i.isdigit()])) ).alias('CapacityDailyCostReserved').cast(DecimalType(38,19))
)

if mssparkutils.fs.exists(f'abfss://{notebookutils.mssparkutils.env.getWorkspaceName()}@onelake.dfs.fabric.microsoft.com/LH_QueryResults.Lakehouse/Tables/runresults'):

    dtRunResults = DeltaTable.forPath(spark, f'abfss://{notebookutils.mssparkutils.env.getWorkspaceName()}@onelake.dfs.fabric.microsoft.com/LH_QueryResults.Lakehouse/Tables/runresults')

    (dtRunResults.alias('t')
        .merge(dfRunOrdered.alias('s')
            ,f't.runId = s.RunId'
            )
        .whenNotMatchedInsertAll()
    ).execute() 
else:
    dfRunOrdered.write.format('delta').mode('append').save(f'abfss://{notebookutils.mssparkutils.env.getWorkspaceName()}@onelake.dfs.fabric.microsoft.com/LH_QueryResults.Lakehouse/Tables/runresults')

##### Run the queries against the sql endpoint and store results into the QueryResults table

In [None]:
from pyspark import SparkContext, SparkConf
import pyodbc 
from notebookutils import mssparkutils

tokenstruct = sqlendpoint_get_token()

def get_result_set(cursor):
    if cursor.description:
        resultList = cursor.fetchall()
        resultRowCnt = len(resultList)
        resultColumns = []
        if StoreQueryResults:
            resultColumns = [column[0] for column in cursor.description]
    else:
        resultList = []
        resultColumns = []
        resultRowCnt = 0
    return [dict(zip(resultColumns, [str(col) for col in row])) for row in resultList], resultRowCnt

def execute_query(partition_index, iterator):
    workerNum = partition_index + 1
    queryMetrics = []
    with pyodbc.connect(connectionString, attrs_before = { 1256:tokenstruct }) as conn:
        for i, queryInfo in enumerate(iterator, start=1):
            queryIndex = queryInfo[1][0]
            workerQueryNum = i
            queryUniqueNum = queryInfo[1][1]
            queryRepeatNum = queryInfo[1][2]
            queryStatement = queryInfo[1][3]
            with conn.cursor() as cursor:
                queryStartDateTimeUTC = datetime.datetime.now(datetime.timezone.utc)
                startTime = int(round(time.time() * 1000))

                cursor.execute(f'/* {workerNum=} | {workerQueryNum=} | {queryUniqueNum=} | {queryRepeatNum=} */ {queryStatement}')
                
                queryMessage = str(cursor.messages) if cursor.messages else ""
                resultSetList = list()
                resultRowCntList = list()
                resultSet, resultRowCnt = get_result_set(cursor)
                resultSetList.append(resultSet)
                resultRowCntList.append(resultRowCnt)

                while cursor.nextset():
                    queryMessage += ",".join([str(cursor.messages) if cursor.messages else ""])
                    resultSet, resultRowCnt = get_result_set(cursor)
                    resultSetList.append(resultSet)
                    resultRowCntList.append(resultRowCnt)
                    
                endTime = int(round(time.time() * 1000))
                queryEndDateTimeUTC = datetime.datetime.now(datetime.timezone.utc)
                
                statementId = ','.join(re.findall(r"Statement ID: \{([A-F0-9\-]+)\}", queryMessage)) if re.findall(r"Statement ID: \{([A-F0-9\-]+)\}", queryMessage) else ""
                queryHash = ','.join(re.findall(r"Query hash: (0x[A-F0-9]+)", queryMessage)) if re.findall(r"Query hash: (0x[A-F0-9]+)", queryMessage) else ""
                distributionRequestId = ','.join(re.findall(r"Distributed request ID: \{([A-F0-9\-]+)\}", queryMessage)) if re.findall(r"Distributed request ID: \{([A-F0-9\-]+)\}", queryMessage) else ""
                resultSetJsonString = json.dumps(resultSetList)

                cursor.commit()

                queryId = str(uuid.uuid4()).upper()
                queryMetrics.append([queryId, workerNum, workerQueryNum, queryUniqueNum, queryRepeatNum, queryStatement, queryStartDateTimeUTC, queryEndDateTimeUTC
                        ,queryMessage, startTime, endTime, endTime - startTime
                        ,statementId, queryHash, distributionRequestId, resultSetJsonString, resultRowCntList
                        ])
    return queryMetrics

queriesExecuted = rddQueriesWithId.mapPartitionsWithIndex(execute_query)

runStartDateTimeUTC = datetime.datetime.now(datetime.timezone.utc)
runStartTimeEpoch = int(runStartDateTimeUTC.timestamp()*1000)

dfQueriesExecuted = spark.createDataFrame(queriesExecuted.collect(), schema=StructType([
    StructField("QueryId", StringType(), False),
    StructField("WorkerNum", IntegerType(), False),
    StructField("WorkerQueryNum", IntegerType(), False),
    StructField("QueryUniqueNum", IntegerType(), False),
    StructField("QueryRepeatNum", IntegerType(), False),
    StructField("QueryStatement", StringType(), False),
    StructField("QueryStartDateTimeUTC", TimestampType(), False),
    StructField("QueryEndDateTimeUTC", TimestampType(), False),
    StructField("ReturnMessage", StringType(), False),
    StructField("QueryStartDateTimeEpochMS", LongType(), False),
    StructField("QueryEndDateTimeEpochMS", LongType(), False),
    StructField("QueryDurationMS", LongType(), False),
    StructField("StatementId", StringType(), False),
    StructField("QueryHash", StringType(), False),
    StructField("DistributionRequestId", StringType(), False),
    StructField("ResultSet", StringType(), False),
    StructField("ResultRowCnt", StringType(), False)
    ]))

runEndDateTimeUTC = datetime.datetime.now(datetime.timezone.utc)
runEndTimeEpoch = int(runEndDateTimeUTC.timestamp()*1000)

dfFinal = dfQueriesExecuted.select(F.lit(RunName).alias("RunName"), F.lit(runId).alias("RunId"), "*")\
    .withColumns(
        {
            "QueryCUSeconds": F.lit(None).cast(DecimalType(38, 19))
            ,"QueryCostPayGo": F.lit(None).cast(DecimalType(38, 19))
            ,"QueryCostReserved": F.lit(None).cast(DecimalType(38, 19))

            ,"DataScannedDiskMB": F.lit(None).cast(DecimalType(38, 19))
            ,"DataScannedMemoryMB": F.lit(None).cast(DecimalType(38, 19))
            ,"DataScannedRemoteStorageMB": F.lit(None).cast(DecimalType(38, 19))
            ,"ResultCacheHit": F.lit(None).cast(IntegerType())
            ,"AllocatedCpuTimeMS": F.lit(None).cast(LongType())
        }
    )

dfFinal.write.format('delta').mode('append').save(f'abfss://{notebookutils.mssparkutils.env.getWorkspaceName()}@onelake.dfs.fabric.microsoft.com/LH_QueryResults.Lakehouse/Tables/queryresults')

In [None]:
spark.read.format('delta').load(f'abfss://{notebookutils.mssparkutils.env.getWorkspaceName()}@onelake.dfs.fabric.microsoft.com/LH_QueryResults.Lakehouse/Tables/queryresults').createOrReplaceTempView('vwqueryresults')
statementList = spark.sql(f'SELECT ARRAY_JOIN(COLLECT_SET(CONCAT("\\"", StatementId, "\\"")), ", ") AS Statements FROM (SELECT EXPLODE(SPLIT(StatementId, ",")) AS StatementId FROM vwqueryresults WHERE runId = "{runId}") AS a ').collect()[0].asDict().get('Statements')
# We have to explode by statement ids since a sql query may have multiple queries within it
queriesExecutedCnt = spark.sql(f'SELECT COUNT(StatementId) AS QueryCnt FROM (SELECT EXPLODE(SPLIT(StatementId, ",")) AS StatementId FROM vwqueryresults WHERE runId = "{runId}") AS a ').collect()[0].asDict().get('QueryCnt') 
print(f'{runId = }\n{statementList = }\n{queriesExecutedCnt = }')

##### Update the RunResults table with the end time of the run

In [None]:
from delta.tables import *

dtRunResults = DeltaTable.forPath(spark, f'abfss://{notebookutils.mssparkutils.env.getWorkspaceName()}@onelake.dfs.fabric.microsoft.com/LH_QueryResults.Lakehouse/Tables/runresults')

df_final = spark.createDataFrame(data=[(runStartDateTimeUTC, runStartTimeEpoch, runEndDateTimeUTC, runEndTimeEpoch, )], schema=['runStartDateTimeUTC', 'runStartDateTimeEpochMS', 'RunEndDateTimeUTC', 'RunEndDateTimeEpochMS'])

(dtRunResults.alias('t')
    .merge(df_final.alias('s')
        ,f't.RunId = "{runId}"'
        )
    .whenMatchedUpdate(set=
        {'RunStartDateTimeUTC': 's.RunStartDateTimeUTC'
        ,'RunStartDateTimeEpochMS': 's.RunStartDateTimeEpochMS'
        ,'RunEndDateTimeUTC': 's.RunEndDateTimeUTC'
        ,'RunEndDateTimeEpochMS': 's.RunEndDateTimeEpochMS'
        ,'RunDurationMS': 's.RunEndDateTimeEpochMS - s.RunStartDateTimeEpochMS'
        }
    )
).execute() 

##### Define the get_capacity_metrics_usage function 

In [None]:
import requests, datetime
from pyspark.sql.functions import to_timestamp, lit, col
from pyspark.sql.types import DoubleType

def get_capacity_metrics_usage(time_point:datetime, operation_id_list:str):

    schema = StructType([
        StructField("Items[ItemId]", 	StringType(), 		True),
        StructField("Items[ItemKind]", 	StringType(), 		True),
        StructField("Items[ItemName]", 	StringType(), 		True),
        StructField("TimePointBackgroundDetail[OperationStartTime]", 	StringType(), 	True),
        StructField("TimePointBackgroundDetail[OperationEndTime]", 		StringType(), 	True),
        StructField("TimePointBackgroundDetail[OperationId]", 			StringType(), 		True),
        StructField("[Sum_CUs]", 		DoubleType(), 		True),
        StructField("[Sum_Duration]", IntegerType(), 		True)
    ])

    header = {'Authorization': f'Bearer {mssparkutils.credentials.getToken("pbi")}'
                ,"Content-Type": "application/json"
                }

    response = requests.get('https://api.fabric.microsoft.com/v1/workspaces', headers=header)

    capacityWorkspaceId = [workspace.get('id') for workspace in response.json().get('value') if workspace.get('displayName') == CapacityMetricsWorkspace][0]

    response = requests.get(f"https://api.powerbi.com/v1.0/myorg/groups/{capacityWorkspaceId}/datasets", headers=header)

    datasetId = [dataset.get('id') for dataset in response.json().get('value') if dataset.get('name') == CapacityMetricsDataset][0]
    
    body = {
        "queries": [
        {
            "query": f"""
                DEFINE
                    MPARAMETER 'CapacityID' 	= "{capacityId}"
                    MPARAMETER 'TimePoint' 		= (DATE({time_point.year}, {time_point.month}, {time_point.day}) + TIME({time_point.hour}, {time_point.minute}, {time_point.second}))

                    VAR __Var_CapacityId	= {{"{capacityId}"}}
                    VAR __Var_OperationId	= {{{statementList}}}

                    VAR __Filter_OperationId 	= TREATAS(__Var_OperationId, 'TimePointBackgroundDetail'[OperationId])
                    VAR __Filter_CapacityId 	= TREATAS(__Var_CapacityId, 'Capacities'[capacityId])

                    VAR OperationCUs = 
                        SUMMARIZECOLUMNS(
                            'Items'[ItemId],
                            'Items'[ItemKind],
                            'Items'[ItemName],
                            'TimePointBackgroundDetail'[OperationStartTime],
                            'TimePointBackgroundDetail'[OperationEndTime],
                            'TimePointBackgroundDetail'[OperationId],
                            __Filter_OperationId,
                            __Filter_CapacityId,
                            "Sum_CUs", CALCULATE(SUM('TimePointBackgroundDetail'[Total CU (s)])),
                            "Sum_Duration", CALCULATE(SUM('TimePointBackgroundDetail'[Duration (s)]))
                        )

                EVALUATE
                    OperationCUs
                """
        }
        ]
    }

    response = requests.post(f'https://api.powerbi.com/v1.0/myorg/datasets/{datasetId}/executeQueries', headers=header, json=body )

    if response.status_code == 200:
        rowsList = response.json()["results"][0]["tables"][0]["rows"]
        
        df_dax = spark.createDataFrame(data=rowsList, schema=schema)
        df_dax = df_dax.select('*', to_timestamp('TimePointBackgroundDetail[OperationStartTime]'), to_timestamp('TimePointBackgroundDetail[OperationEndTime]'))
        
        df = (df_dax.select(
            lit(time_point).alias("TimePoint")
            ,col("Items[ItemId]").alias("ItemId")
            ,col("Items[ItemKind]").alias("ItemKind")
            ,col("Items[ItemName]").alias("ItemName")
            ,col("TimePointBackgroundDetail[OperationStartTime]").alias("StartTime")
            ,col("TimePointBackgroundDetail[OperationEndTime]").alias("EndTime")
            ,col("TimePointBackgroundDetail[OperationId]").alias("OperationId")
            ,col("[Sum_CUs]").cast(DecimalType(38, 19)).alias("Sum_CUs")
            ,col("[Sum_Duration]").cast(IntegerType()).alias("Sum_Duration"))
        )

        return df
    else:
        raise Exception(f'{response.json()}\nCheck that user has as least contributor access to the workspace')

##### Define the model_refresh function 

In [None]:
import requests, time

def model_refresh():
    header = {'Authorization': f'Bearer {mssparkutils.credentials.getToken("pbi")}'
            ,"Content-Type": "application/json"
            }

    response = requests.get('https://api.fabric.microsoft.com/v1/workspaces', headers=header)

    capacityWorkspaceId = [workspace.get('id') for workspace in response.json().get('value') if workspace.get('displayName') == CapacityMetricsWorkspace][0]

    response = requests.get(f"https://api.powerbi.com/v1.0/myorg/groups/{capacityWorkspaceId}/datasets", headers=header)

    datasetId = [dataset.get('id') for dataset in response.json().get('value') if dataset.get('name') == CapacityMetricsDataset][0]

    response = requests.post(f"https://api.powerbi.com/v1.0/myorg/groups/{capacityWorkspaceId}/datasets/{datasetId}/refreshes", headers=header)

    refreshId = response.headers.get('RequestId')
    print(f'{refreshId = } | {response.status_code = }')

    if response.status_code == 202:
        for attempt in range(12): 
            # https://learn.microsoft.com/en-us/power-bi/connect-data/asynchronous-refresh#get-refreshes
            response = requests.get(f"https://api.powerbi.com/v1.0/myorg/groups/{capacityWorkspaceId}/datasets/{datasetId}/refreshes?$top=1", headers=header)
            if response.status_code == 200:
                if response.json().get('value')[0].get('status') != 'Unknown':
                    print(f'Refresh Complete')
                    break
                else:
                    print(f'Refreshing tables ...')
                    time.sleep(20)
            else:
                time.sleep(10)
    else:
        print(f'Refreshed failed - {response.text}')

##### Query the capacity metrics app to get the CUs consumed for each query

In [None]:
from pyspark.sql.functions import min, max, sum

# # Continues to query the metrica app to get the data. Data can delayed by a few minutes.
# # We retry every minute until 15 minutes has passed.
for retryCnt in range(15):
    df_today = get_capacity_metrics_usage(runStartDateTimeUTC, statementList)
    df_tomorrow = get_capacity_metrics_usage(runStartDateTimeUTC + datetime.timedelta(hours = 23), statementList)
    df_all_days = df_today.unionAll(df_tomorrow)
    df_count = df_all_days.select('OperationId').distinct()
    
    print(f'{df_count.count()} statements of the {queriesExecutedCnt} that have been found in the capacity metrics model. ', end='')
    if df_count.count() == queriesExecutedCnt:
        df_final = df_all_days.groupBy('ItemId', 'ItemKind', 'ItemName', 'OperationId').agg(min("StartTime").alias("StartTime"), max("EndTime").alias("EndTime"), sum("Sum_CUs").alias("QueryCUSeconds"), sum("Sum_Duration").alias("SumDuration"))
        break
    else:
        if retryCnt%10 == (10-1):
            print('Refreshing the capacity metircs app semantic model...')
            model_refresh()
            continue
        print('Sleeping for a minute...')
        time.sleep(60)

##### Query the sql endpoint queryinsights exec_request_history table to get CPU and memory usage per query

In [None]:
from delta.tables import *

statementIdList = [statement.replace('"', "'") for statement in statementList.split(', ')]

"""
Query the queryinsights view until all statment ids have been populated. If more than 5 minutes have past, exit the log what has been found. 
"""
for retryCnt in range(5):
        
    tokenstruct = sqlendpoint_get_token()

    with pyodbc.connect(connectionString, attrs_before = { 1256:tokenstruct }) as conn:
        with conn.cursor() as cursor:
            cursor.execute(f'''SELECT 
                                distributed_statement_id
                                ,submit_time
                                ,start_time
                                ,end_time
                                ,total_elapsed_time_ms
                                ,login_name
                                ,row_count
                                ,status
                                ,session_id
                                ,connection_id
                                ,program_name
                                ,batch_id
                                ,root_batch_id
                                ,query_hash
                                ,label
                                ,result_cache_hit
                                ,allocated_cpu_time_ms
                                ,data_scanned_remote_storage_mb
                                ,data_scanned_memory_mb
                                ,data_scanned_disk_mb
                                ,command 
                        FROM    queryinsights.exec_requests_history
                        WHERE   submit_time BETWEEN '{runStartDateTimeUTC}' AND '{runEndDateTimeUTC}'
                        AND     distributed_statement_id IN
                        (
                            {','.join(statementIdList)}
                        )
                ''')

            resultList = cursor.fetchall()
            resultColumns = [column[0] for column in cursor.description]
            cursor.commit()
            resultSet = [dict(zip(resultColumns, [str(col) for col in row])) for row in resultList]
            df_queryinsights = spark.createDataFrame(resultSet)

    if df_queryinsights.count() == len(statementIdList):
        print(f'{df_queryinsights.count()} out of {len(statementIdList)} found.')
        break
    else:
        print(f'{df_queryinsights.count()} out of {len(statementIdList)} found. Sleeping for a minute...')
        time.sleep(60)      

df_queryinsights = (df_queryinsights.select(
                F.lit(RunName).alias('RunName')
                ,F.lit(runId).alias('RunId')
                ,F.col('allocated_cpu_time_ms').cast(LongType()).alias('allocated_cpu_time_ms')
                ,F.col('batch_id').cast(StringType()).alias('batch_id')
                ,F.col('connection_id').cast(StringType()).alias('connection_id')
                ,F.col('data_scanned_disk_mb').cast(DecimalType(18, 3)).alias('data_scanned_disk_mb')
                ,F.col('data_scanned_memory_mb').cast(DecimalType(18, 3)).alias('data_scanned_memory_mb')
                ,F.col('data_scanned_remote_storage_mb').cast(DecimalType(18, 3)).alias('data_scanned_remote_storage_mb')
                ,F.col('distributed_statement_id').cast(StringType()).alias('distributed_statement_id')
                ,F.col('end_time').cast(TimestampType()).alias('end_time')
                ,F.col('label').cast(StringType()).alias('label')
                ,F.col('login_name').cast(StringType()).alias('login_name')
                ,F.col('program_name').cast(StringType()).alias('program_name')
                ,F.col('query_hash').cast(StringType()).alias('query_hash')
                ,F.col('result_cache_hit').cast(IntegerType()).alias('result_cache_hit')
                ,F.col('root_batch_id').cast(StringType()).alias('root_batch_id')
                ,F.col('row_count').cast(LongType()).alias('row_count')
                ,F.col('session_id').cast(IntegerType()).alias('session_id')
                ,F.col('start_time').cast(TimestampType()).alias('start_time')
                ,F.col('status').cast(StringType()).alias('status')
                ,F.col('submit_time').cast(TimestampType()).alias('submit_time')
                ,F.col('total_elapsed_time_ms').cast(IntegerType()).alias('total_elapsed_time_ms')
                ,F.col('command').cast(StringType()).alias('command')
            )
        )

if mssparkutils.fs.exists(f'abfss://{notebookutils.mssparkutils.env.getWorkspaceName()}@onelake.dfs.fabric.microsoft.com/LH_QueryResults.Lakehouse/Tables/queryinsightsresults'):

    dtQueryInsightsResults = DeltaTable.forPath(spark, f'abfss://{notebookutils.mssparkutils.env.getWorkspaceName()}@onelake.dfs.fabric.microsoft.com/LH_QueryResults.Lakehouse/Tables/queryinsightsresults')

    (dtQueryInsightsResults.alias('t')
        .merge(df_queryinsights.alias('s')
            ,f't.runId = s.RunId'
            )
        .whenNotMatchedInsertAll()
    ).execute() 
else:
    df_queryinsights.write.format('delta').mode('append').save(f'abfss://{notebookutils.mssparkutils.env.getWorkspaceName()}@onelake.dfs.fabric.microsoft.com/LH_QueryResults.Lakehouse/Tables/queryinsightsresults')

In [None]:
df_final.createOrReplaceTempView('vwCapacityMetricsApp')
df_queryinsights.createOrReplaceTempView('vwQueryInsights')
spark.read.format('delta').load(f'abfss://{notebookutils.mssparkutils.env.getWorkspaceName()}@onelake.dfs.fabric.microsoft.com/LH_QueryResults.Lakehouse/Tables/queryresults').createOrReplaceTempView('vwQueryResults')

dfQueryResultsCleansedWithQueryInsights = spark.sql(f"""
SELECT qr.RunId, qr.StatementId
        ,SUM(b.QueryCUSeconds) AS QueryCUSeconds
        ,SUM(b.DataScannedMemoryMB) AS DataScannedMemoryMB, SUM(b.DataScannedDiskMB) AS DataScannedDiskMB
        ,SUM(b.DataScannedRemoteStorageMB) AS DataScannedRemoteStorageMB, MAX(b.ResultCacheHit) AS ResultCacheHit
        ,SUM(b.AllocatedCpuTimeMS) AS AllocatedCpuTimeMS
FROM vwQueryResults AS qr
JOIN 
(
    SELECT cma.OperationId AS StatementId, cma.QueryCUSeconds
            ,qi.data_scanned_memory_mb AS DataScannedMemoryMB, qi.data_scanned_disk_mb AS DataScannedDiskMB
            ,qi.data_scanned_remote_storage_mb AS DataScannedRemoteStorageMB
            ,qi.result_cache_hit AS ResultCacheHit, qi.allocated_cpu_time_ms AS AllocatedCpuTimeMS
    FROM vwCapacityMetricsApp AS cma
    JOIN vwQueryInsights AS qi
    ON qi.distributed_statement_id = cma.OperationId
) AS b
ON qr.StatementId LIKE '%'||b.StatementId||'%'
WHERE qr.RunId = '{runId}'
GROUP BY qr.RunId, qr.StatementId
""")


##### Update the QueryResults table with the CUSeconds and QueryCost derived from the Capacity Metrics App

In [None]:
from delta.tables import *

dtQueryResults = DeltaTable.forPath(spark, f'abfss://{notebookutils.mssparkutils.env.getWorkspaceName()}@onelake.dfs.fabric.microsoft.com/LH_QueryResults.Lakehouse/Tables/queryresults')

(dtQueryResults.alias('t')
    .merge(dfQueryResultsCleansedWithQueryInsights.alias('s')
        ,f't.runId = s.RunId AND t.StatementId = s.StatementId'
        )
    .whenMatchedUpdate(set=
        {'QueryCUSeconds': 's.QueryCUSeconds'
        ,'QueryCostPayGo': f's.QueryCUSeconds * {costPayGo}'
        ,'QueryCostReserved': f's.QueryCUSeconds * {costReserved}'
        ,'DataScannedDiskMB': 's.DataScannedDiskMB'
        ,'DataScannedMemoryMB': 's.DataScannedMemoryMB'
        ,'DataScannedRemoteStorageMB': 's.DataScannedRemoteStorageMB'
        ,'ResultCacheHit': 's.ResultCacheHit'
        ,'AllocatedCpuTimeMS': 's.AllocatedCpuTimeMS'
        }
    )
).execute() 

##### Update the RunResults table with cost of run

In [None]:
from delta.tables import *

dtRunResults = DeltaTable.forPath(spark, f'abfss://{notebookutils.mssparkutils.env.getWorkspaceName()}@onelake.dfs.fabric.microsoft.com/LH_QueryResults.Lakehouse/Tables/runresults')

spark.read.format('delta').load(f'abfss://{notebookutils.mssparkutils.env.getWorkspaceName()}@onelake.dfs.fabric.microsoft.com/LH_QueryResults.Lakehouse/Tables/queryresults').createOrReplaceTempView('vwqueryresults')

dtRunResultsCleansed = spark.sql(f'''SELECT SUM(COALESCE(QueryCUSeconds, 0)) AS RunCUSeconds
        ,SUM(COALESCE(QueryCostPayGo, 0)) AS RunCostPayGo
        ,SUM(COALESCE(QueryCostReserved, 0)) AS RunCostReserved 
        ,SUM(COALESCE(DataScannedDiskMB, 0)) AS RunDataScannedDiskMB
        ,SUM(COALESCE(DataScannedMemoryMB, 0)) AS RunDataScannedMemoryMB
        ,SUM(COALESCE(DataScannedRemoteStorageMB, 0)) AS RunDataScannedRemoteStorageMB 
        ,SUM(COALESCE(AllocatedCpuTimeMS, 0)) AS RunAllocatedCpuTimeMS 
        FROM vwqueryresults WHERE RunId = "{runId}"
    ''')

(dtRunResults.alias('t')
    .merge(dtRunResultsCleansed.alias('s')
        ,f't.RunId = "{runId}"'
        )
    .whenMatchedUpdate(set=
        {'RunCUSeconds': 's.RunCUSeconds'
        ,'RunCostPayGo': f's.RunCUSeconds * {costPayGo}'  # This could be different looking at the tables separately due to rounding
        ,'RunCostReserved': f's.RunCUSeconds * {costReserved}'  # This could be different looking at the tables separately due to rounding
        ,'RunDataScannedDiskMB': 's.RunDataScannedDiskMB'
        ,'RunDataScannedMemoryMB': 's.RunDataScannedMemoryMB'
        ,'RunDataScannedRemoteStorageMB': 's.RunDataScannedRemoteStorageMB'
        ,'RunAllocatedCpuTimeMS': 's.RunAllocatedCpuTimeMS'
        }
    )
).execute()

In [None]:
spark.read.format('delta').load(f'abfss://{notebookutils.mssparkutils.env.getWorkspaceName()}@onelake.dfs.fabric.microsoft.com/LH_QueryResults.Lakehouse/Tables/runresults').createOrReplaceTempView('vwrunresults')
display(spark.sql(f"SELECT * FROM vwrunresults WHERE RunId = '{runId}'"))

In [None]:
spark.read.format('delta').load(f'abfss://{notebookutils.mssparkutils.env.getWorkspaceName()}@onelake.dfs.fabric.microsoft.com/LH_QueryResults.Lakehouse/Tables/queryresults').createOrReplaceTempView('vwqueryresults')
display(spark.sql(f"SELECT * FROM vwqueryresults WHERE RunId = '{runId}'"))

In [None]:
spark.read.format('delta').load(f'abfss://{notebookutils.mssparkutils.env.getWorkspaceName()}@onelake.dfs.fabric.microsoft.com/LH_QueryResults.Lakehouse/Tables/queryinsightsresults').createOrReplaceTempView('vwqueryinsightsresults')
display(spark.sql(f"SELECT * FROM vwqueryinsightsresults WHERE RunId = '{runId}'"))

In [None]:
from delta.tables import *

spark.conf.set('spark.databricks.delta.retentionDurationCheck.enabled', False)

for tableName in ['queryresults', 'runresults', 'queryinsightsresults']:
    dt = DeltaTable.forPath(spark, f'abfss://{notebookutils.mssparkutils.env.getWorkspaceName()}@onelake.dfs.fabric.microsoft.com/LH_QueryResults.Lakehouse/Tables/{tableName}')
    dt.optimize().executeCompaction()
    dt.vacuum(1)

##### Trigger the sql endpoint metadata sync process

In [None]:
import json, requests
import time
from notebookutils import mssparkutils

header = {'Authorization': f'Bearer {mssparkutils.credentials.getToken("pbi")}'
          ,"Content-Type": "application/json"
          }

workspaceId = spark.conf.get("trident.workspace.id")

response = requests.request(method='get', url=f'https://api.fabric.microsoft.com/v1/workspaces/{workspaceId}/lakehouses', headers=header)

lakehouseId = [lakehouse.get('id') for lakehouse in response.json().get('value') if lakehouse.get('displayName') == 'LH_QueryResults'][0]
sqlendpointId = requests.request(method='get', url=f"https://api.fabric.microsoft.com/v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}", headers=header).json()['properties']['sqlEndpointProperties']['id']

payload = {"commands":[{"$type":"MetadataRefreshExternalCommand"}]}
response = requests.request(method='post', url=f"https://api.fabric.microsoft.com/v1.0/myorg/lhdatamarts/{sqlendpointId}", data=json.dumps(payload), headers=header)

batchId = response.json()["batchId"]
progressState = response.json()["progressState"]

iterationCnt = 0
while progressState == 'inProgress':
    time.sleep(5)
    statusresponsedata = requests.request(method='get', url=f"https://api.fabric.microsoft.com/v1.0/myorg/lhdatamarts/{sqlendpointId}/batches/{batchId}", headers=header)
    progressState = statusresponsedata.json()["progressState"]
    print(f"Sync state: {progressState}")
    if iterationCnt > 24:
        break # A sync should not take more than 2 minutes. If it does, then break our the loop

table_details = [
        {
        'tableName': table['tableName'],
         'sqlSyncState':  table['sqlSyncState']
        }
        for table in statusresponsedata.json()['operationInformation'][0]['progressDetail']['tablesSyncStatus']
    ]

print(json.dumps(table_details, indent=2))

In [1]:
displayHTML(f"""<script src="https://cdn.rawgit.com/google/code-prettify/master/loader/run_prettify.js"></script>
<p style="margin-bottom:0"><span style="font-size:20px;"><strong>/*<br>Reference T-SQL - </strong></span><span style="font-size:20px;"><strong>See running sql statements on the DW. Used to verify query(s) are executing and that the concurrency is working correctly.<br>*/</strong></span></p>
<pre class="prettyprint"><p style="margin-top:0;">SELECT	d.name AS 'database_name'
	,s.login_name
	,r.[session_id]
	,r.start_time
	,r.STATUS
	,r.total_elapsed_time
	,r.command
	,CASE /* Uses statement start and end offset to figure out what statement is running */
		WHEN r.[statement_start_offset] > 0
			THEN
				/* The start of the active command is not at the beginning of the full command text */
				CASE r.[statement_end_offset]
					WHEN - 1
						THEN
							/* The end of the full command is also the end of the active statement */
							SUBSTRING(t.TEXT, (r.[statement_start_offset] / 2) + 1, 2147483647)
					ELSE
						/* The end of the active statement is not at the end of the full command */
						SUBSTRING(t.TEXT, (r.[statement_start_offset] / 2) + 1, (r.[statement_end_offset] - r.[statement_start_offset]) / 2)
					END
		ELSE
			/* 1st part of full command is running */
			CASE r.[statement_end_offset]
				WHEN - 1
					THEN
						/* The end of the full command is also the end of the active statement */
						RTRIM(LTRIM(t.[text]))
				ELSE
					/* The end of the active statement is not at the end of the full command */
					LEFT(t.TEXT, (r.[statement_end_offset] / 2) + 1)
				END
		END AS [executing_statement]
	,t.[text] AS [parent_batch]
	,s.[program_name]
	,r.query_hash
	,r.query_plan_hash
	,r.dist_statement_id
	,r.[label]
	,s.client_interface_name
	,r.[sql_handle]
	,c.client_net_address
	,c.connection_id
FROM	sys.dm_exec_requests r
CROSS APPLY sys.[dm_exec_sql_text](r.[sql_handle]) t
JOIN	sys.dm_exec_sessions s ON r.session_id = s.session_id
JOIN	sys.dm_exec_connections c ON s.session_id = c.session_id
JOIN	sys.databases d ON d.database_id = r.database_id
WHERE	r.dist_statement_id != '00000000-0000-0000-0000-000000000000'
AND	r.session_id <> @@SPID
AND	s.program_name NOT IN ('QueryInsights','DMS')
</pre></p>
""")


StatementMeta(, 56ed4fc1-00c2-4248-bba5-123781108efd, 3, Finished, Available, Finished)