In [17]:
## Parameters to be passed in from pipeline
minSku = 'F2' # min sku size we can scale down to
maxSku = 'F128' # max sku size we can scale up to
utilizationTolerance = 90 # Percentage of CU used to stay under. Its to say, we'll scale to a sku where we're under the utilizationTolerance so consumed CU < F SKU * utilizationTolerance
capacityName = 'fabricbamdemo' #capacity name to be monitored. Might need to also require region or capacity Id since capacityName and region are what makes a capacity name unique
subscriptionId = '' # Azure subscription that the capacity is in
metricsAppWorkspaceName = 'WS_FabricCapacityMetrics' # Name of the workspace that the Capacity Metric App semantic model is in. Could default to the default workspace
metricsAppModelName = 'Fabric Capacity Metrics' # name of the Capacity Metric App semantic model. Could default to the default app name

StatementMeta(, 11683cc5-d1f3-4332-b77e-69f05d57db27, 19, Finished, Available)

In [18]:
keyVaultEndpoint = ''

tenantId = mssparkutils.credentials.getSecret(keyVaultEndpoint, 'secretName_tenantId')
clientId = mssparkutils.credentials.getSecret(keyVaultEndpoint, 'secretName_clientId')
secret = mssparkutils.credentials.getSecret(keyVaultEndpoint, 'secretName_clientSecret')

StatementMeta(, 11683cc5-d1f3-4332-b77e-69f05d57db27, 20, Finished, Available)

##### Token Audiences and tokens

In [None]:
from azure.identity import ClientSecretCredential

api_pbi = 'https://analysis.windows.net/powerbi/api/.default'
api_azuremgmt = 'https://management.core.windows.net/.default'

auth = ClientSecretCredential(tenant_id=tenantId, client_id=clientId, client_secret=secret)
header_pbi = {'Authorization': f'Bearer {auth.get_token(api_pbi).token}', 'Content-type': 'application/json'}
header_azuremgmt = {'Authorization': f'Bearer {auth.get_token(api_azuremgmt).token}', 'Content-type': 'application/json'}

##### Refresh only the imported tables required

In [19]:
import requests, json, time

response = requests.get('https://api.fabric.microsoft.com/v1/workspaces', headers=header_pbi)

workspaceId = [workspace.get('id') for workspace in response.json().get('value') if workspace.get('displayName') == metricsAppWorkspaceName][0]
print(f'{workspaceId = }')

response = requests.get(f"https://api.powerbi.com/v1.0/myorg/groups/{workspaceId}/datasets", headers=header_pbi)

datasetId = [dataset.get('id') for dataset in response.json().get('value') if dataset.get('name') == metricsAppModelName][0]
print(f'{datasetId = }')

# https://learn.microsoft.com/en-us/power-bi/connect-data/asynchronous-refresh#post-refreshes
tableList = [{"table": "Capacities"}
            ,{"table": "TimePoints"}
            ,{"table": "Items"}
            ]
body = {"objects": tableList} # Need to ask Pat what tables are import and what are direct query
response = requests.post(f"https://api.powerbi.com/v1.0/myorg/groups/{workspaceId}/datasets/{datasetId}/refreshes", headers=header_pbi, data=json.dumps(body))

refreshId = response.headers.get('RequestId')
print(f'{refreshId = }')

# Check the status of the refresh attempting 12 times ~60 seconds
for attempt in range(12): 
    # https://learn.microsoft.com/en-us/power-bi/connect-data/asynchronous-refresh#get-refreshes
    response = requests.get(f"https://api.powerbi.com/v1.0/myorg/groups/{workspaceId}/datasets/{datasetId}/refreshes?$top=1", headers=header_pbi)
    if response.json().get('value')[0].get('status') != 'Unknown':
        print(f'Refresh Complete')
        break
    else:
        print(f'Refreshing tables ...')
        time.sleep(5)

StatementMeta(, 11683cc5-d1f3-4332-b77e-69f05d57db27, 21, Finished, Available)

workspaceId = '1f0eccd7-c5e9-494c-a5a6-37d5e75b6792'
datasetId = '6bc16852-d48e-45ea-9ce9-b8f6ec78c4ee'
refreshId = '87817462-5f92-4026-a145-3a18852df650'
Refreshing tables ...
Refresh Complete


##### Create a dictionary for each Fabric capacity sku and the total amount of capacity unit seconds over a 24 hour period

In [20]:
skuDict = {f'F{2**_}': (2**_)*(60*60*24) for _ in range(1,12)}
print(f'{skuDict = }')

StatementMeta(, 11683cc5-d1f3-4332-b77e-69f05d57db27, 22, Finished, Available)

skuDict = {'F2': 172800, 'F4': 345600, 'F8': 691200, 'F16': 1382400, 'F32': 2764800, 'F64': 5529600, 'F128': 11059200, 'F256': 22118400, 'F512': 44236800, 'F1024': 88473600, 'F2048': 176947200}


##### Get the current sku of the capacity, the amount of capacity seconds per 24 hours, and capacity id

In [21]:
import requests

response = requests.get("https://api.fabric.microsoft.com/v1/capacities", headers=header_pbi)

currentSku = [capacity.get('sku') for capacity in response.json().get('value') if capacity.get('displayName') == capacityName][0]
print(f'{currentSku = }')
currentSkuCUTotalPerDay = skuDict[currentSku]
print(f'{currentSkuCUTotalPerDay = }')
capacityId = [capacity.get('id') for capacity in response.json().get('value') if capacity.get('displayName') == capacityName][0].upper()
print(f'{capacityId = }')

StatementMeta(, 11683cc5-d1f3-4332-b77e-69f05d57db27, 23, Finished, Available)

currentSku = 'F64'
currentSkuCUTotalPerDay = 5529600
capacityId = '10C012A4-33EB-480E-A2AD-EE63C29E86BE'


##### Query the Fabric Capacity Metrics App model to get the total consumed capacity seconds for the last 24 hours

In [23]:
import requests, json, math

body = {
  "queries": [
    {
      "query": f"""
        DEFINE
            MPARAMETER 'CapacityID' = "{capacityId}"

            VAR __DS0FilterTable2 = TREATAS({{"{capacityId}"}}, 'Capacities'[capacityId])
            
            VAR __DS0FilterTable3 = 
                      FILTER(
                        KEEPFILTERS(VALUES('TimePoints'[TimePoint])),
                        'TimePoints'[TimePoint] >= NOW() - 1
                      )
                      
            VAR __DS0Core = 
              SELECTCOLUMNS(
                SUMMARIZECOLUMNS(
                'Capacities'[Capacity Name],
                Items[Billable type],
                  __DS0FilterTable2,
                  __DS0FilterTable3,
                  "SumInteractive", SUM('CUDetail'[Interactive]),
                  "SumBackground", SUM('CUDetail'[Background]),
                  "SumCUs", SUM('CUDetail'[CUs])
                ),
                "CapacityName", 'Capacities'[Capacity Name],
                "BillType", Items[Billable type],
                "SumInteractive", [SumInteractive],
                "SumBackground", [SumBackground],
                "SumCUs", [SumCUs]
              )

          EVALUATE
            __DS0Core
    """
    }
  ]
}

results = requests.post(f'https://api.powerbi.com/v1.0/myorg/datasets/{datasetId}/executeQueries', headers=header_pbi, json=body )

df = spark.read.json(sc.parallelize([results.json()]))
df_exploded = df.withColumn("resultsExploded", explode(df.results))
df_exploded_table_exploded = df_exploded.withColumn('tablesExploded', explode(df_exploded.resultsExploded.tables))
df_exploded_table_exploded_rows_exploded = df_exploded_table_exploded.withColumn('rowsExploded', explode(df_exploded_table_exploded.tablesExploded.rows))
df_final = df_exploded_table_exploded_rows_exploded.select('rowsExploded.*')\
    .selectExpr("`[CapacityName]` AS CapacityName", "`[BillType]` AS BillType", "`[SumInteractive]` AS SumInteractive", "`[SumBackground]` AS SumBackground", "`[SumCUs]` AS SumCUs")

consumedCU = df_final.groupBy().sum("SumInteractive", "SumBackground").collect()[0]
totalConsumedCULast24Hours = math.ceil(consumedCU[0] + consumedCU[1])
print(f'{totalConsumedCULast24Hours = }')

StatementMeta(, 11683cc5-d1f3-4332-b77e-69f05d57db27, 25, Finished, Available)

200 {"results":[{"tables":[{"rows":[{"[BillType]":"Billable","[CapacityName]":"fabricbamdemo","[SumInteractive]":4506.288,"[SumBackground]":5730.174,"[SumCUs]":10236.462}]}]}]}
totalConsumedCULast24Hours = 10237


##### Apply logic to determine if the capacity should be scaled and if it needs to scale, what capacity should it scale to based on the current capacity, consumption over the last 24 hours, and the defined utilization tolerance to be within the defined min/max SKU

In [43]:
import math

utilizationTolerancePercentage = utilizationTolerance/100

print(f'{totalConsumedCULast24Hours = }')
print(f'{utilizationTolerancePercentage = }')
print(f'{currentSkuCUTotalPerDay = }')

skuNeeded = [(sku, cu, totalConsumedCULast24Hours, math.ceil(cu*utilizationTolerancePercentage)) for sku, cu in skuDict.items() if math.ceil(cu*utilizationTolerancePercentage) >= totalConsumedCULast24Hours][0]
print(f'{skuNeeded = }')

scaleSku = ''
if int(skuNeeded[0].replace('F', '')) < int(minSku.replace('F', '')):
    scaleSku = minSku
elif int(skuNeeded[0].replace('F', '')) > int(maxSku.replace('F', '')):
    scaleSku = maxSku
else:
    scaleSku = skuNeeded[0]

print(f'{scaleSku = }')

StatementMeta(, c6095258-f9a6-47f2-bea4-b51542e59e1f, 45, Finished, Available)

totalConsumedCULast24Hours = 294886
utilizationTolerancePercentage = 0.9
currentSkuCUTotalPerDay = 345600
skuNeeded = ('F4', 345600, 294886, 311040)
scaleSku = 'F4'


##### Perform the scaling operation within Azure

In [36]:
import requests, json

# One last validation to check if the sku to scale to is different than the current sku
if scaleSku != currentSku:
    print(f'\nScaling from {currentSku} to {scaleSku}')

    response = requests.get(f'https://management.azure.com/subscriptions/{subscriptionId}/providers/Microsoft.Fabric/capacities?api-version=2022-07-01-preview', headers=header_azuremgmt)
    responseList = response.json().get('value')
    resourceGroupName = [resource.get('id') for resource in responseList if resource.get('name') == capacityName][0].split("resourceGroups/")[-1].split("/")[0]

    url = f'https://management.azure.com/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.Fabric/capacities/{capacityName}?api-version=2022-07-01-preview'
    body = {"sku": {"name": f"{scaleSku}", "tier": "Fabric"}}
            
    response = requests.patch(url, headers=header, data=json.dumps(body))
    print(response, response.text)

else:
    print(f'The capacity {currentSku} is already under the utilization tolerance setting of {utilizationTolerance}%.')

StatementMeta(, c6095258-f9a6-47f2-bea4-b51542e59e1f, 38, Finished, Available)


Scaling from F16 to F4
<Response [200]> {"properties":{"provisioningState":"Succeeded","state":"Paused"},"id":"/subscriptions/18bc4af3-8099-4e70-86d8-eba06dd5bac8/resourceGroups/RG-FabricLoadTesting/providers/Microsoft.Fabric/capacities/fabricloadtesting","name":"fabricloadtesting","type":"Microsoft.Fabric/capacities","location":"East US 2","sku":{"name":"F4","tier":"Fabric"}}
