In [44]:
import requests
import urllib.parse
from datetime import datetime, timedelta
from azure.storage.blob import generate_container_sas
from azure.identity import ManagedIdentityCredential, AzureCliCredential, ChainedTokenCredential
from Utilities.envVars import *
from Utilities.cogSearch import performCogSearch
import time

In [23]:
# Import Python libraries
from Utilities.envVars import *
from openai import OpenAI, AzureOpenAI
from langchain.prompts import PromptTemplate

In [24]:
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.chat_models import AzureChatOpenAI
from langchain.docstore.document import Document
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.chains.summarize import load_summarize_chain

embeddingModelType = "azureopenai"
llm = AzureChatOpenAI(
        azure_endpoint=OpenAiEndPoint,
        api_version=OpenAiVersion,
        azure_deployment=OpenAiChat16k,
        temperature=0.1,
        api_key=OpenAiKey,
        max_tokens=1000)

##### Helper Function to create the SAS Token.  SAS Token is required to access the Video file that is uploaded to storage account that will be used to send it to Video Indexer for Processing

In [25]:
# Define function to create Shared Access Signature
def getSasUrlForContainer(blobAccount, blobKey, blobContainer, uri: str):
    # Generate Shared Access Signature with read permission
    sas_token = generate_container_sas(blobAccount,blobContainer, blobKey, permission="rw", expiry=datetime.utcnow() + timedelta(days=31))
    return uri + '?' + sas_token

In [26]:
# Define function to create Shared Access Signature
def getSasUrl(uri: str):
    # Get Azure Blob Storage configuration
    blob_account            = VideoIndexerBlobAccount
    blob_key                = VideoIndexerBlobKey
    blob_container_source   = VideoIndexerBlobContainer

    # Generate Shared Access Signature with read permission
    sas_token = generate_container_sas(blob_account,blob_container_source, blob_key, permission="rw", expiry=datetime.utcnow() + timedelta(days=31))
    return uri + '?' + sas_token

##### Helper function to Perform the indexing on the Video.  It is 2 step process, with Authentication to get Access token from Management Plane and using that access Token as a part of the Upload URL call that will invoke the Video Upload Functionality to Video Indexer AI

In [27]:
def getAccessToken():
    resource_id     = VideoIndexerResourceId 

    # Get access token to AVAM using ManagedIdentity from Azure Function or AzureCLI when executing locally
    credential = ChainedTokenCredential(ManagedIdentityCredential(), AzureCliCredential())
    access_token = credential.get_token("https://management.azure.com")
    url = f"https://management.azure.com{resource_id}/generateAccessToken?api-version=2021-10-18-preview"
    headers = {
        "Authorization" : f"Bearer {access_token.token}"
    }
    body = {
    "permissionType": "Contributor",
    "scope": "Account"
    }
    response = requests.post(url, headers=headers, json=body)
    video_access_token = response.json()['accessToken']

    return video_access_token

In [28]:
# Call Video Indexer to perform video processing
def startVideoIndexing(video_name: str, video_url: str, access_token: str):
    # Get Video Indexer configuration
    endpoint        = VideoIndexerEndPoint
    account_id      = VideoIndexerAccountId
    location        = VideoIndexerLocation

    # Call Video Indexer to start processing the video
    video_url = urllib.parse.quote(video_url)
    video_name = video_name.split('/')[-1] # extract just video name, remove container and folder path
    video_name = urllib.parse.quote(video_name)
    #function_url = urllib.parse.quote(function_url)
    privacy = "Private" # Set visibility for the video [Private, Public]

    #upload_video_url = f"{endpoint}/{location}/Accounts/{account_id}/Videos?accessToken={access_token}&name={video_name}&videoUrl={video_url}&privacy={privacy}&callbackUrl={function_url}&language=auto"
    upload_video_url = f"{endpoint}/{location}/Accounts/{account_id}/Videos?accessToken={access_token}&name={video_name}&videoUrl={video_url}&privacy={privacy}&language=auto"
    logging.info(upload_video_url)
    upload_result = requests.post(upload_video_url)

    return upload_result.json()

##### Video indexer will take time to process, so we will wait for the video to be processed and then we will get the insights from the Video Indexer.  The key attribute required for next step of the Process is the Video ID.  We will use that to get the insights from the Video Indexer

#### Step 1 - The user uploaded the video and we will process that video and perform indexing

In [13]:
url = "https://dataaiopenaistor.blob.core.windows.net/videos/BryanMsft.mp4"
sasUrl = getSasUrlForContainer(OpenAiDocStorName, OpenAiDocStorKey, OpenAiVideoContainer, url)
print(sasUrl)
videoName = "BryanMsft"
accessToken = getAccessToken()
# Call Video Indexer service with Shared Access Signature to index the video
videoResult = startVideoIndexing(videoName,sasUrl, accessToken)
videoResult

https://dataaiopenaistor.blob.core.windows.net/videos/BryanMsft.mp4?se=2024-02-22T03%3A00%3A49Z&sp=rw&sv=2022-11-02&sr=c&sig=/EP5ioCfamGKVuRhcbwvVifPxi%2Bzxc69gx2ctMl99NQ%3D


{'accountId': 'dc8a63da-2623-4881-85de-15016f35fa99',
 'id': 'b25e60b852',
 'partition': None,
 'externalId': None,
 'metadata': None,
 'name': 'BryanMsft',
 'description': None,
 'created': '2024-01-22T03:00:56.7933333+00:00',
 'lastModified': '2024-01-22T03:00:56.7933333+00:00',
 'lastIndexed': '2024-01-22T03:00:56.7933333+00:00',
 'privacyMode': 'Private',
 'userName': 'Ashish Talati',
 'isOwned': True,
 'isBase': True,
 'hasSourceVideoFile': True,
 'state': 'Uploaded',
 'moderationState': 'OK',
 'reviewState': 'None',
 'processingProgress': '1%',
 'durationInSeconds': 0,
 'thumbnailVideoId': 'b25e60b852',
 'thumbnailId': '00000000-0000-0000-0000-000000000000',
 'searchMatches': [],
 'indexingPreset': 'Default',
 'streamingPreset': 'Default',
 'sourceLanguage': None,
 'personModelId': '00000000-0000-0000-0000-000000000000'}

In [15]:
# videoResult = {'accountId': 'dc8a63da-2623-4881-85de-15016f35fa99',
#  'id': '681f932fe2',
#  'partition': None,
#  'externalId': None,
#  'metadata': None,
#  'name': 'dataaimediasa',
#  'description': None,
#  'created': '2024-01-20T17:21:23.6733333+00:00',
#  'lastModified': '2024-01-20T17:21:23.6733333+00:00',
#  'lastIndexed': '2024-01-20T17:21:23.6733333+00:00',
#  'privacyMode': 'Private',
#  'userName': 'Ashish Talati',
#  'isOwned': True,
#  'isBase': True,
#  'hasSourceVideoFile': True,
#  'state': 'Uploaded',
#  'moderationState': 'OK',
#  'reviewState': 'None',
#  'processingProgress': '1%',
#  'durationInSeconds': 0,
#  'thumbnailVideoId': '681f932fe2',
#  'thumbnailId': '00000000-0000-0000-0000-000000000000',
#  'searchMatches': [],
#  'indexingPreset': 'Default',
#  'streamingPreset': 'Default',
#  'sourceLanguage': None,
#  'personModelId': '00000000-0000-0000-0000-000000000000'}

In [29]:
# Get Video Insights from Video Indexer
def getIndexStatus(video_id: str, video_access_token: str):
    # Get Video Indexer configuration
    endpoint        = VideoIndexerEndPoint
    account_id      = VideoIndexerAccountId
    location        = VideoIndexerLocation

    # Create video URL
    video_url = f"{endpoint}/{location}/Accounts/{account_id}/Videos/{video_id}/Index?accessToken={video_access_token}"

    # Get Video Data
    search_video = requests.get(video_url)#, headers=headers)
    video_data = search_video.json()

    return video_data

In [36]:
import time

#videoId = videoResult['id']
videoId = 'bedc54f751'
accessToken = getAccessToken()
while True:
    indexStatus = getIndexStatus(videoId, accessToken)
    if indexStatus['state'] != 'Uploaded' and indexStatus['state'] != 'Processing':
        print('Processing Completed')
        break
    else:
        time.sleep(20)
        print('Processing...')
        continue

Processing Completed


In [30]:
# Get Video Insights from Video Indexer
def createPromptContent(video_id: str, video_access_token: str):
    # Get Video Indexer configuration
    endpoint        = VideoIndexerEndPoint
    account_id      = VideoIndexerAccountId
    location        = VideoIndexerLocation

    # Create video URL
    video_url = f"{endpoint}/{location}/Accounts/{account_id}/Videos/{video_id}/PromptContent?accessToken={video_access_token}"

    # Get Video Data
    search_video = requests.post(video_url)#, headers=headers)

    return search_video

In [31]:
# Get Video Insights from Video Indexer
def getPromptContent(video_id: str, video_access_token: str):
    # Get Video Indexer configuration
    endpoint        = VideoIndexerEndPoint
    account_id      = VideoIndexerAccountId
    location        = VideoIndexerLocation

    # Create video URL
    video_url = f"{endpoint}/{location}/Accounts/{account_id}/Videos/{video_id}/PromptContent?accessToken={video_access_token}"

    # Get Video Data
    search_video = requests.get(video_url)#, headers=headers)
    video_data = search_video.json()

    return video_data

#### STEP 2 - Create Prompt Content so that we can use that for our RAG pattern

In [39]:
accessToken = getAccessToken()
promptContentResp = createPromptContent(videoId, accessToken)

In [40]:
accessToken = getAccessToken()
while True:
    promptContent = getPromptContent(videoId, accessToken)
    if not "ErrorType" in promptContent:
        print(promptContent)
        print('Processing Completed')
        break
    else:
        time.sleep(20)
        print('Processing...')
        continue


{'partition': None, 'name': 'ChatGPT OpenAI powering your apps OpenAI Studio in Microsoft Azure', 'sections': [{'id': 0, 'start': '0:00:01.3013', 'end': '0:00:19.052367', 'content': "[Video title] ChatGPT OpenAI powering your apps OpenAI Studio in Microsoft Azure\n[Tags] Beginning\n[Detected objects] book, chair, dining table, laptop\n[Visual labels] human face, person, clothing, laptop, computer, display, software\n[OCR] OpenAI, GPT-3, Codex, DALL-E 2, Cognitive Services, Azure OpenAl Studio - Preview, Models, Try it out, Create customized model, Management, Training data, Validation data, Advanced options, Review and train, Cancel, Base model, Base model type, Next, O Advanced options, File Management, ada, babbage, curie, Nex, Let's start with your content, Back, Done, grass field with trees, Transformer Decoders, // . a. function. that . computes . the . sum . of . squares. of . numbers . in . an . array, Advanced code generation, mfu tvn > 1<, uhwxuq vxp, Playground, Deployments, 

#### STEP 3 - Summarize the video for each sections iteratively and create the final summary

In [41]:
sections = promptContent['sections']

In [32]:
def getSummary(sections):
    preText = f'You are given a video including its visual, audio and text insights: ' \
    '[Transcript] is the text that is spoken in the video ' \
    '[OCR] is the visual text in the video, ' \
    '[Known People] are the people that appear in the video, ' \
    '[Audio effects] are the sounds in the video, ' \
    '[Detected objects] are the objects that appear in the video, ' \
    '[Visual labels] are the objects that appear in the video. ' \
    'Use these insights as part of the video\'s content, but don\'t use their initials (written in []) as-is'
    postText = "Generate informative and detailed summary:"
    finalText = postText
    summary = ''
    combineSummary = ''
    for i, section in enumerate(sections):
        if len(summary) > 0:
            preTextExtra = f'In the previous parts of the video the following ideas were discussed : \n {sectionAnswer}\n'
        else:
            preTextExtra = ''
        
        summaryText = f'{preText} \n {preTextExtra}'
        if i == 0:
            partOfVideo = f'Given the first part of the video:'

        if i > 0:
            partOfVideo = f'Given the next part of the video:'

        if i == len(sections) - 1:
            partOfVideo = f'Given the last part of the video:'
            postText = finalText

        summaryText += f'\n {partOfVideo}'
        summaryText += """
        {text}
        """
        summaryText += f'\n {postText}'
        customPrompt = PromptTemplate(template=summaryText, input_variables=["text"])
        chainType = "map_reduce"
        docs = [Document(page_content=section["content"], metadata={"id": '', "source": ''})]
        summaryChain = load_summarize_chain(llm, chain_type=chainType, map_prompt=customPrompt, combine_prompt=customPrompt)
        sectionSummary = summaryChain({"input_documents": docs}, return_only_outputs=True)
        sectionAnswer = sectionSummary['output_text']
        combineSummary += sectionAnswer
        summary = sectionAnswer
    return summary, combineSummary

#### STEP 4 - For each section create indexed document in cognitive search with embedded vectors

In [67]:
videoInsights = []
summary, combineSummary = getSummary(sections)
for section in sections:
    videoInsights.append({
        'sectionId': section['id'],
        'sectionStart': section['start'],
        'sectionEnd': section['end'],
        'sectionContent': section['content'],
        'sectionVector': '',
        'videoId': videoId,
        'videoName': promptContent['name'],
        'summary': combineSummary,
        'sourcefile': str(section['start']) + ' ' + str(section['end'])
    })

In [68]:
summary

"In this part of the video, the speaker focuses on the usage of the ChatGPT OpenAI model in Azure OpenAI Studio for cross-document summarization. They explain that the model can read a set of statements and answer questions based on them. The speaker provides an example prompt in Visual Studio and demonstrates how the code replaces placeholders at runtime. They ask the model a question about the percentage of homes with solar, and the model provides a detailed answer with specific regions mentioned. However, the speaker desires a shorter answer and provides an example answer to refine the results. They input a different question, and the model generates a concise answer, learning from the example. The speaker highlights the playground in Azure Open AI Studio, which allows users to experiment, iterate, test prompts, refine them, and integrate them into custom applications. The video also mentions the support for fine-tuning in Azure Open AI, where users can specialize one of the base mo

In [33]:
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import *
from azure.core.credentials import AzureKeyCredential

def createVideoSearchIndex(SearchService, SearchKey, indexName):
    indexClient = SearchIndexClient(endpoint=f"https://{SearchService}.search.windows.net/",
            credential=AzureKeyCredential(SearchKey))
    if indexName not in indexClient.list_index_names():
        index = SearchIndex(
            name=indexName,
            fields=[
                        SimpleField(name="id", type=SearchFieldDataType.String, key=True),
                        SimpleField(name="sectionId", type=SearchFieldDataType.String),
                        SimpleField(name="sectionStart", type=SearchFieldDataType.String),
                        SimpleField(name="sectionEnd", type=SearchFieldDataType.String),
                        SimpleField(name="videoId", type=SearchFieldDataType.String),
                        SearchableField(name="videoName", type=SearchFieldDataType.String,
                                        searchable=True, retrievable=True, analyzer_name="en.microsoft"),
                        SearchableField(name="content", type=SearchFieldDataType.String,
                                        searchable=True, retrievable=True, analyzer_name="en.microsoft"),
                        SearchField(name="contentVector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
                                    searchable=True, vector_search_dimensions=1536, vector_search_configuration="vectorConfig"),
                        SearchableField(name="summary", type=SearchFieldDataType.String,
                                        searchable=True, retrievable=True, analyzer_name="en.microsoft"),
                        SimpleField(name="sourcefile", type="Edm.String", filterable=True, retrievable=True),
            ],
            vector_search = VectorSearch(
                algorithm_configurations=[
                    HnswVectorSearchAlgorithmConfiguration(
                        name="vectorConfig",
                        kind="hnsw",
                        parameters={
                            "m": 4,
                            "efConstruction": 400,
                            "efSearch": 500,
                            "metric": "cosine"
                        }
                    )
                ]
            ),
            semantic_settings=SemanticSettings(
                configurations=[SemanticConfiguration(
                    name='semanticConfig',
                    prioritized_fields=PrioritizedFields(
                        title_field=SemanticField(field_name="content"), prioritized_content_fields=[SemanticField(field_name='content')]))],
                        prioritized_keywords_fields=[SemanticField(field_name='sourcefile')])
        )

        try:
            print(f"Creating {indexName} search index")
            indexClient.create_index(index)
        except Exception as e:
            print(e)
    else:
        print(f"Search index {indexName} already exists")

In [51]:
createVideoSearchIndex(SearchService, SearchKey, "msmechanics")

Search index msmechanics already exists


In [34]:
from tenacity import retry, wait_random_exponential, stop_after_attempt  

@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
# Function to generate embeddings for title and content fields, also used for query embeddings
def generateKbEmbeddings(OpenAiEndPoint, OpenAiKey, OpenAiVersion, OpenAiApiKey, OpenAiEmbedding, embeddingModelType, text):
    if (embeddingModelType == 'azureopenai'):
        try:
            client = AzureOpenAI(
                        api_key = OpenAiKey,  
                        api_version = OpenAiVersion,
                        azure_endpoint = OpenAiEndPoint
                        )

            response = client.embeddings.create(
                input=text, model=OpenAiEmbedding)
            embeddings = response.data[0].embedding
        except Exception as e:
            logging.info(e)

    elif embeddingModelType == "openai":
        try:
            client = OpenAI(api_key=OpenAiApiKey)
            response = client.embeddings.create(
                    input=text, model="text-embedding-ada-002", api_key = OpenAiApiKey)
            embeddings = response.data[0].embedding
        except Exception as e:
            logging.info(e)
        
    return embeddings

In [35]:
import uuid
def createSections(OpenAiEndPoint, OpenAiKey, OpenAiVersion, OpenAiApiKey, embeddingModelType, OpenAiEmbedding, docs):
    counter = 1
    for i in docs:
        yield {
            "id": str(uuid.uuid4()),
            "sectionId": str(i.get('sectionId')),
            "sectionStart": i.get('sectionStart'),
            "sectionEnd": i.get('sectionEnd'),
            "content": i.get('sectionContent'),
            "contentVector": generateKbEmbeddings(OpenAiEndPoint, OpenAiKey, OpenAiVersion, OpenAiApiKey, OpenAiEmbedding, embeddingModelType,  i.get('sectionContent')),
            "videoId": i.get('videoId'),
            "videoName": i.get('videoName'),
            "summary": i.get('summary'),
            "sourcefile": i.get('sourcefile')
        }
        counter += 1

In [36]:
from azure.search.documents import SearchClient
def indexSections(OpenAiEndPoint, OpenAiKey, OpenAiVersion, OpenAiApiKey, embeddingModelType, OpenAiEmbedding, SearchService, SearchKey, indexName, docs):
    print("Total docs: " + str(len(docs)))
    sections = createSections(OpenAiEndPoint, OpenAiKey, OpenAiVersion, OpenAiApiKey, embeddingModelType, OpenAiEmbedding, docs)
    searchClient = SearchClient(endpoint=f"https://{SearchService}.search.windows.net/",
                                    index_name=indexName,
                                    credential=AzureKeyCredential(SearchKey))
    i = 0
    batch = []
    for s in sections:
        batch.append(s)
        i += 1
        if i % 1000 == 0:
            results = searchClient.index_documents(batch=batch)
            succeeded = sum([1 for r in results if r.succeeded])
            print(f"\tIndexed {len(results)} sections, {succeeded} succeeded")
            batch = []

    if len(batch) > 0:
        results = searchClient.upload_documents(documents=batch)
        succeeded = sum([1 for r in results if r.succeeded])
        print(f"\tIndexed {len(results)} sections, {succeeded} succeeded")

In [114]:
indexSections(OpenAiEndPoint, OpenAiKey, OpenAiVersion, OpenAiApiKey, embeddingModelType, OpenAiEmbedding, SearchService, SearchKey, "videos", videoInsights)

Total docs: 4
	Indexed 4 sections, 4 succeeded


#### STEP 5 - Run RAG Pattern on indexed video Repository

In [37]:
from azure.search.documents.models import Vector
def performVideoSearch(OpenAiEndPoint, OpenAiKey, OpenAiVersion, OpenAiApiKey, SearchService, SearchKey, embeddingModelType, OpenAiEmbedding, question, indexName, k, returnFields=["sectionId", "sectionContent", "sectionStart", "sectionEnd"] ):
    searchClient = SearchClient(endpoint=f"https://{SearchService}.search.windows.net",
        index_name=indexName,
        credential=AzureKeyCredential(SearchKey))
    try:
        r = searchClient.search(  
            search_text=question,  
            vectors=[Vector(value=generateKbEmbeddings(OpenAiEndPoint, OpenAiKey, OpenAiVersion, OpenAiApiKey, embeddingModelType, OpenAiEmbedding, 
                                                     question), k=k, fields="contentVector")],  
            select=returnFields,
            query_type="semantic", 
            query_language="en-us", 
            semantic_configuration_name='semanticConfig', 
            query_caption="extractive", 
            query_answer="extractive",
            include_total_count=True,
            top=k
        )
        return r
    except Exception as e:
        print(e)

    return None

In [38]:
def askQuestion(query):
    chainType = "stuff"
    topK = 3

    # Since we already index our document, we can perform the search on the query to retrieve "TopK" documents
    #r = performVideoSearch(OpenAiEndPoint, OpenAiKey, OpenAiVersion, OpenAiApiKey, SearchService, SearchKey, OpenAiEmbedding, embeddingModelType,
    #                    query, "videos", topK, returnFields=["sectionId", "sectionContent", "sectionStart", "sectionEnd", "summary"])
    r = performCogSearch(OpenAiEndPoint, OpenAiKey, OpenAiVersion, OpenAiApiKey, SearchService, SearchKey, embeddingModelType, OpenAiEmbedding,
                        query, "videos", topK, returnFields=["id", "sectionId", "content", "sectionStart", "sectionEnd", "summary", "sourcefile"])

    if r == None:
        docs = [Document(page_content="No results found")]
    else :
        docs = [
            Document(page_content=doc['content'], metadata={"id": doc['id'], "source": doc['sourcefile']})
            for doc in r
            ]

    qaChain = load_qa_with_sources_chain(llm, chain_type=chainType)
    answer = qaChain({"input_documents": docs, "question": query}, return_only_outputs=True)
    outputAnswer = answer['output_text']
    return outputAnswer

In [28]:
query = "When is Apple's event scheduled and what is it called?. What are the products planned to be announced at the event?"
askQuestion(query)

'Apple\'s event is scheduled for October 30th and it is called the "Scary Fast" event. The products planned to be announced at the event include new MacBook Pros with 14-inch and 16-inch models featuring M3 Pro and M3 Max chips, as well as an iMac update with the standard M3 chip. The M3 chip is significant as it will be based on a three nanometer production process, offering faster performance, improved power efficiency, and better battery life. The event will be online-only with no in-person component.\nSOURCES: 0:00:46.279567 0:01:16.3763, 0:01:16.3763 0:02:30.984167, 0:02:30.984167 0:02:38.9588'

In [29]:
query = "Who are the speakers?"
askQuestion(query)

'The speakers are Mark Gurman and another unidentified person.\nSOURCES: 0:01:16.3763 0:02:30.984167, 0:00:46.279567 0:01:16.3763, 0:02:30.984167 0:02:38.9588'

In [30]:
query = "Can you summarize the discussion in the video?"
askQuestion(query)

'The video discusses Apple\'s upcoming product launch event on October 30th, which is called "Scaryfast." It is expected that new Mac chips and new Macs will be announced. The event will be online-only, with no in-person component. Possible products to be introduced include new MacBook Pros with M3 Pro and M3 Max chips, as well as an iMac with the standard M3 chip. The M3 chip is significant as it will be based on a three nanometer production process, offering faster performance, better power efficiency, and improved battery life. The video also mentions the logo morphing into the Finder icon, indicating that the event is Mac-centric. The timing of the event at 5:00 PM and the dark theme of the invitation suggest a Halloween theme. The discussion highlights the anticipation for these product launches and the potential for significant advancements in technology. \nSOURCES: 0:00:00 0:00:46.279567, 0:01:16.3763 0:02:30.984167'

In [31]:
query = "Is there a price mentioned of the new products?"
askQuestion(query)

'There is no mention of the price of the new products.\nSOURCES:'

In [32]:
query = "Who is the CEO?"
askQuestion(query)

'The CEO is not mentioned in the provided content.\nSOURCES:'

#### One Time Ingestion

In [45]:
mechanicsVideosList = [
    {
        "name": "Interview_Farooq",
        "id": "48ca2e67e3",
    },
    {
        "name": "Interview_JensLottner",
        "id": "de586609ec",
    },
    {
        "name": "Interview_BusinessWeek",
        "id": "12e65e4bdb",
    },
    {
        "name": "Interview_BeyondTheBell",
        "id": "17561131a2",
    },
    {
        "name": "Interview_JacquelineRong",
        "id": "ae98aab761",
    }
]

In [56]:
mechanicsVideoInsights = []
for video in mechanicsVideosList:
    videoId = video['id']
    videoName = video['name']
    print("Processing video : " + videoId)
    # Create Prompt Content for the Video
    accessToken = getAccessToken()
    promptContentResp = createPromptContent(videoId, accessToken)
    accessToken = getAccessToken()
    while True:
        promptContent = getPromptContent(videoId, accessToken)
        if not "ErrorType" in promptContent:
            print(promptContent)
            print('Processing Completed')
            break
        else:
            time.sleep(20)
            print('Processing Content...')
            continue
    # Get list of all sections from Prompts
    sections = promptContent['sections']
    # Get summary for all sections
    summary, combineSummary = getSummary(sections)
    print("Got Summary for videoId : " + videoId)
    # Append data for each Video
    for section in sections:
        mechanicsVideoInsights.append({
            'sectionId': section['id'],
            'sectionStart': section['start'],
            'sectionEnd': section['end'],
            'sectionContent': section['content'],
            'sectionVector': '',
            'videoId': videoId,
            'videoName': promptContent['name'],
            'summary': combineSummary,
            'sourcefile': str(section['start']) + ' ' + str(section['end'])
        })

# Create Index
createVideoSearchIndex(SearchService, SearchKey, "bloombergint")
# Index Sections
indexSections(OpenAiEndPoint, OpenAiKey, OpenAiVersion, OpenAiApiKey, embeddingModelType, OpenAiEmbedding, SearchService, SearchKey, "bloombergint", mechanicsVideoInsights)

In [57]:
from azure.storage.blob import BlobClient
from Utilities.azureBlob import upsertMetadata

for video in mechanicsVideosList:
    videoName = video['name']
    videoId = video['id']
    blobName = videoName + '.mp4'
    blob = BlobClient.from_connection_string(conn_str=OpenAiDocConnStr, container_name=OpenAiVideoContainer, blob_name=blobName)
    if blob.exists():
        metadata = {'embedded': 'true', 'namespace': "bloombergint", 'indexType': "cogsearchvs", 
                                    "indexName": "Bloomberg Interview".replace("-", "_"),
                                    "textSplitterType": "recursive", 
                                    "chunkSize": "8000", "chunkOverlap": "1000",
                                    "promptType": "generic", "videoId": videoId, "videoName": videoName}
        upsertMetadata(OpenAiDocConnStr, OpenAiVideoContainer, blobName, metadata)
    else:
        print(blobName + " not found")