In [0]:
!pip install azure-storage-blob
!pip install adal

In [0]:
import adal
import requests
import json
import time
from azure.storage.blob import BlobServiceClient

In [0]:
resource_group_name = "rg_uc_00000_dev"
ai_video_indexer_account_name = "resuc00000vi1dev"
location = "westeurope"
storage_account_name = "resuc00000sa2dev"
video_container_name = "sharepoint-videos"
transcript_container_name = "sharepoint-video-transcripts"

In [0]:
client_id = dbutils.secrets.get(scope = "key-vault-secrets", key = "tf-aa1-application-clientid")
client_secret = dbutils.secrets.get(scope = "key-vault-secrets", key = "tf-aap1-application-password")
tenant_id = dbutils.secrets.get(scope = "key-vault-secrets", key = "tenant-id")
subscription_id = dbutils.secrets.get(scope = "key-vault-secrets", key = "subscription-id")
ai_video_indexer_account_id = dbutils.secrets.get(scope = "key-vault-secrets", key = "resuc00000vi1dev-account-id")
storage_account_key = dbutils.secrets.get(scope = "key-vault-secrets", key = "resuc00000sa2dev-access-key")
video_container_sas_token = dbutils.secrets.get(scope = "key-vault-secrets", key = "resuc00000sa2dev-sharepoint-videos-sas-token")

In [0]:
# Request URL
url = f"https://management.azure.com/subscriptions/{subscription_id}/resourceGroups/{resource_group_name}/providers/Microsoft.VideoIndexer/accounts/{ai_video_indexer_account_name}/generateAccessToken?api-version=2024-01-01"

# Request body
data = {
    "permissionType": "Contributor",
    "scope": "Account",
}

# Authentication
context = adal.AuthenticationContext(f"https://login.microsoftonline.com/{tenant_id}")
token_response = context.acquire_token_with_client_credentials(resource="https://management.azure.com/", client_id=client_id, client_secret=client_secret)

# Check if authentication was successful
if 'accessToken' in token_response:
    access_token = token_response['accessToken']
    headers = {
        'Authorization': f'Bearer {access_token}',
        'Content-Type': 'application/json'
    }

    # Make the POST request
    response = requests.post(url, json=data, headers=headers)

    # Check the response
    if response.status_code == 200:
        access_token = response.json().get('accessToken')
        print(f"Generated access token: {access_token}")
    else:
        print(f"Failed to generate access token. Status code: {response.status_code}, Error: {response.text}")
else:
    print(f"Failed to authenticate with Azure AD. Error: {token_response.get('error_description')}")

In [0]:
blob_service_client = BlobServiceClient(account_url=f"https://{storage_account_name}.blob.core.windows.net", credential=storage_account_key)
container_client = blob_service_client.get_container_client(video_container_name)
blob_list = container_client.list_blobs()
video_urls = [f"https://{storage_account_name}.blob.core.windows.net/{video_container_name}/{blob.name}" for blob in blob_list]

for video_url in video_urls:
    headers = {
        'Content-Type': 'multipart/form-data'
    }
    blob_file_identifier = video_url.split('/')[-1]
    params = {
        'accessToken': access_token,
        'name': blob_file_identifier,
        'privacy': 'Private',
        'videoUrl': f"{video_url}?{video_container_sas_token}",
        'language': 'multi'
    }
    response = requests.post(f'https://api.videoindexer.ai/{location}/Accounts/{ai_video_indexer_account_id}/Videos', params=params, headers=headers)
    if response.status_code == 200:
        video_id = response.json().get('id')
        print(f"Video {video_url} uploaded and indexed. Video ID: {video_id}")
    else:
        print(f"Failed to upload and index video {video_url}. Status code: {response.status_code}, Error: {response.text}")

In [0]:
container_client = blob_service_client.get_container_client(transcript_container_name)

# Wait every 30 seconds if there are any videos in progress
while True:
    # Get the status of all videos using Video Indexer API
    response = requests.get(f'https://api.videoindexer.ai/{location}/Accounts/{ai_video_indexer_account_id}/Videos', params={'accessToken': access_token})
    if response.status_code == 200:
        videos = response.json().get('results')
        videos_in_progress = [video for video in videos if video['state'] == 'Processing']
        if len(videos_in_progress) == 0:
            break
        else:
            print(f"{len(videos_in_progress)} videos still in progress. Waiting for 30 seconds...")
            time.sleep(30)
    else:
        print(f"Failed to get video status. Status code: {response.status_code}, Error: {response.text}")

# Get the captions of the indexed videos in the format of txt
for video in videos:
    video_id = video['id']
    video_name = video['name']
    response = requests.get(f'https://api.videoindexer.ai/{location}/Accounts/{ai_video_indexer_account_id}/Videos/{video_id}/Captions', params={'format': 'txt', 'accessToken': access_token})
    if response.status_code == 200:
        captions = response.text
        
        # Save the captions to the container
        blob_name = f"{video_name}.txt"
        blob_client = container_client.get_blob_client(blob_name)
        blob_client.upload_blob(captions, overwrite=True)
        
        print(f"Transcript of {video_id} saved as {blob_name}")
    else:
        print(f"Failed to get transcript of {video_id}. Status code: {response.status_code}, Error: {response.text}")