## Get subscription

In [None]:
import requests
import json
import pandas as pd
from datetime import datetime
from azure.mgmt.subscription import SubscriptionClient
from azure.identity import DefaultAzureCredential

credential = DefaultAzureCredential()
sub_client = SubscriptionClient(credential)
subscription = next(sub_client.subscriptions.list(), None)
if not subscription:
    raise Exception("Authenticate using the az cli")
subscriptionId = subscription.subscription_id
print(f"Using subscription {subscriptionId}")

## Getting regions

In [None]:
token = credential.get_token('https://management.azure.com/.default')
headers = {'Authorization': 'Bearer ' + token.token}

unique_region_names = set()

def get_regions():
    locations_request = f"https://management.azure.com/subscriptions/{subscriptionId}/locations?api-version=2021-04-01"
    response = requests.get(locations_request, headers=headers)
    data = json.loads(response.text)
    return data["value"]

#get_regions()

geographyGroups = set()
for item in get_regions():
    metadata = item['metadata']
    geographyGroup = metadata['geographyGroup'] if 'geographyGroup' in metadata else None
    if geographyGroup in ['US']:
        unique_region_names.add(item["name"])
    if geographyGroup:
        geographyGroups.add(geographyGroup)

regions_list = list(unique_region_names)

print(regions_list)
print(geographyGroups)

## Helper to parallelize calls per region

Most of the API calls we'll need to make going forward are per region, the following helper will allow us to run queries in parallel for each region

In [None]:
def parallel_map(fn, *iterables, executor=None, **kwargs):
    from concurrent.futures import ThreadPoolExecutor, as_completed
    from tqdm import tqdm

    """
    Equivalent to executor.map(fn, *iterables),
    but displays a tqdm-based progress bar.
    
    Does not support timeout or chunksize as executor.submit is used internally
    
    **kwargs is passed to tqdm.
    """
    with executor if executor else ThreadPoolExecutor(max_workers=len(regions_list)) as ex:
        futures = []
        for iterable in iterables:
            futures += [ex.submit(fn, i) for i in iterable]
        for f in tqdm(as_completed(futures), total=len(futures), **kwargs):
            yield f.result()

## Find regions where Azure AI is supported

In [None]:
regions_successful = []
regions_failed = []

def get_models_response(region):
    url = f"https://management.azure.com/subscriptions/{subscriptionId}/providers/Microsoft.CognitiveServices/locations/{region}/models?api-version=2023-05-01"
    return (region, requests.get(url, headers=headers))

for (region, result) in parallel_map(get_models_response, regions_list):
    if result.status_code == 200:
        regions_successful.append(region)
    else:
        regions_failed.append(region)

print("Potential Azure OpenAI regions based on control plane response:")
print(regions_successful)

print("Azure OpenAI Not Supported:")
print(regions_failed)

In [None]:
def get_models(region):
    (_, response) = get_models_response(region)
    parsed = json.loads(response.text)
    return parsed['value']

#get_models('eastus2')

In [None]:
region_model_data = {}  

excluded_models =  ['text-similarity-ada-001', 'text-babbage-001', 'text-curie-001', 'text-similarity-curie-001', 'text-davinci-002','text-davinci-003', 'text-davinci-fine-tune-002', 'code-davinci-002', 'code-davinci-fine-tune-002','text-ada-001', 'text-search-ada-doc-001', 'text-search-ada-query-001', 'code-search-ada-code-001','code-search-ada-text-001', 'text-similarity-babbage-001', 'text-search-babbage-doc-001','text-search-babbage-query-001', 'code-search-babbage-code-001', 'code-search-babbage-text-001', 'text-search-curie-doc-001', 'text-search-curie-query-001', 'text-davinci-001','text-similarity-davinci-001', 'text-search-davinci-doc-001', 'text-search-davinci-query-001','code-cushman-001']

for region in regions_successful:
    data_test = []

    for item in get_models(region):
        model_name = None
        version = None
        sku_name = None
        if item["model"]["capabilities"].get("scaleType") == "Manual": #skip legacy models
            continue
        model_name = item["model"]["name"]
        if model_name in excluded_models: # if in list skip
            continue
        version = item["model"]["version"]
        rdate = item["model"]["deprecation"]
        for sku in item["model"]["skus"]:
            sku_name = sku["name"]
        if sku_name == "Standard": # This example is only targeting Standard Model deployments SKUI
            data_test.append({"Model Name": model_name, "Version": version, "SKU Name": sku_name})
                #print(data_test)

    region_model_data[region] = data_test  # store the model data under corresponding region name

# Print result
for region, model_data in region_model_data.items():
    print(f'{region}: {model_data}')


In [None]:
rows = []
for region, models in region_model_data.items():
    for model in models:
        row = model.copy()  
        row['Region'] = region  
        rows.append(row)

df = pd.DataFrame(rows)
df = df[['Region', 'Model Name', 'Version', 'SKU Name']]
pd.set_option('display.max_rows', None)

df['Exist'] = True 
pivot_df = df.pivot_table(index='Region', columns=['Model Name', 'Version'], values='Exist', fill_value=False, aggfunc='any')
pivot_df.reset_index(inplace = True)

pivot_df

In [None]:
def infer_type(usage_name_localized_value):
    if 'Tokens Per Minute' in usage_name_localized_value:
        return 'tokens-per-minute'
    elif 'Requests Per Minute' in usage_name_localized_value:
        return 'requests-per-minute'
    elif 'Enqueued tokens' in usage_name_localized_value:
        return 'enqueued-tokens'
    else:
        return None

import re

def extract_model_info(usage):
    '''Extracts model vendor, SKU and name from the value using regular expressions.
    Value example: OpenAI.Standard.gpt-35-turbo or OpenAI.Standard.gpt-35-turbo-finetune'''
    localized = usage['name']['localizedValue'].lower()
    if not 'tokens' in localized and not 'requests' in localized: # Skip if not a token usage
        return
    # Updated pattern to match finetune suffix and classify work type
    pattern = r'(?P<vendor>\w+)\.(?P<SKU>\w+)\.(?P<name>[\w-]+?)(?:-finetune)?$'
    match = re.match(pattern, usage['name']['value'])
    if match:
        result = match.groupdict()
        # Determine work type based on the presence of the 'finetune' suffix
        result['workload'] = 'finetune' if usage['name']['value'].endswith('-finetune') else 'inference'
        return result
    return None

def extract_usage_details(region, usage):
    '''Extracts region, name, localizedValue, type, currentValue, limit, unit from the item'''
    model_info = extract_model_info(usage)
    currentValue = usage['currentValue']
    limit = usage['limit']
    if 'thousands' in usage['name']['localizedValue']:
        currentValue *= 1000
        limit *= 1000
    remaining = limit - currentValue
    return {
        'region': region,
        'value': usage['name']['value'],
        'localizedValue': usage['name']['localizedValue'],
        'type': infer_type(usage['name']['localizedValue']),
        'modelName': model_info['name'] if model_info else None,
        'vendor': model_info['vendor'] if model_info else None,
        'SKU': model_info['SKU'] if model_info else None,
        'workload': model_info['workload'] if model_info else None,
        'current': currentValue,
        'remaining': remaining,
        'limit': limit,
        'unit': usage['unit']
    }

In [None]:
region_model_quota_data = {}

# List of models to exclude
exclude_models = ["Code-Cushman-001", "code-cushman-fine-tune-002", "Code-Search-Ada-Code-001", "Code-Search-Ada-Text-001", "Text-Ada-001", "Text-Search-Ada-Doc-001", "Text-Search-Ada-Query-001", "Text-Similarity-Ada-001", "Babbage", "Code-Search-Babbage-Code-001", "Code-Search-Babbage-Text-001", "Text-Babbage-001", "Text-Search-Babbage-Doc-001", "Text-Search-Babbage-Query-001", "Text-Similarity-Babbage-001", "Curie", "Text-Curie-001", "Text-Search-Curie-Doc-001", "Text-Search-Curie-Query-001", "Text-Similarity-Curie-001", "Code-Davinci-002", "Code-Davinci-Fine-Tune-002", "Davinci", "Text-Davinci-001", "Text-Davinci-002", "Text-Davinci-003", "Text-Davinci-Fine-Tune-002", "Text-Search-Davinci-Doc-001", "Text-Search-Davinci-Query-001", "Text-Similarity-Davinci-001", "Code-Cushman-001", "code-cushman-fine-tune-002",  "Ada", "Code-Search-Ada-Code-001", "Code-Search-Ada-Text-001", "Text-Ada-001", "Text-Search-Ada-Doc-001", "Text-Search-Ada-Query-001", "Text-Similarity-Ada-001", "Babbage", "Code-Search-Babbage-Code-001", "Code-Search-Babbage-Text-001", "Text-Babbage-001", "Text-Search-Babbage-Doc-001", "Text-Search-Babbage-Query-001", "Text-Similarity-Babbage-001", "Curie", "Text-Curie-001", "Text-Search-Curie-Doc-001", "Text-Search-Curie-Query-001", "Text-Similarity-Curie-001", "Code-Davinci-Fine-Tune-002", "Davinci", "Text-Davinci-001", "Text-Davinci-002", "Text-Davinci-003", "Text-Davinci-Fine-Tune-002", "Text-Search-Davinci-Doc-001", "Text-Search-Davinci-Query-001", "Text-Similarity-Davinci-001"]

def get_usages(region):
    url = f"https://management.azure.com/subscriptions/{subscriptionId}/providers/Microsoft.CognitiveServices/locations/{region}/usages?api-version=2023-05-01"
    response = requests.get(url, headers=headers)
    return json.loads(response.text)['value']

region='eastus2'
usages = get_usages(region)
usages
token_limit_details = [extract_usage_details(region, usage) for usage in usages]
token_limit_details

## Convert the list to a DataFrame
df = pd.DataFrame(token_limit_details)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 100)
df[df['modelName'].notnull()]
#df[df['modelName'] == 'gpt-4o']

In [None]:
usages[:2]

In [None]:
from functools import reduce
def usage_name_reducer(j, u):
    j[u['name']['value']] = u
    return j
usages_by_name = reduce(usage_name_reducer, usages, {})

In [None]:
models = get_models('eastus2')

In [None]:
def flatten_models_sku(models):
    for model in models:
        skus = model['model']['skus'] or [None]
        for sku in skus:
            yield (model, sku)
models_flattened = list(flatten_models_sku(models))
models_flattened[:2]

In [None]:
def join_models_usages(models_flattened, usages_by_name):
    for (model, sku) in models_flattened:
        usage = None
        if sku:
            usageName = sku['usageName']
            usage = usages_by_name[usageName]
        yield (model, sku, usage)

joined_model_sku_usages = list(join_models_usages(models_flattened, usages_by_name))
joined_model_sku_usages[:2]

In [None]:
def criteria(model, sku, usage):
    if not usage:
        return False
    limit = usage['limit']
    current = usage['currentValue']
    remaining = limit - current
    modelName = model['model']['name']
    skuName = sku['name']
    return modelName == 'gpt-35-turbo' and remaining > 10 and skuName in ['Standard', 'GlobalStandard']

list(filter(lambda msu: criteria(msu[0], msu[1], msu[2]), joined_model_sku_usages))