In [None]:
import os
import pickle
import json
import subprocess
import nltk
import gc
import random
import torch
import asyncio
import hashlib

from deepgram_transcribe import transcribe_file
from span_marker import SpanMarkerModel
import jsonschema
from dotenv import load_dotenv
import httpx
from deepgram import (
    DeepgramClient,
    DeepgramClientOptions,
    PrerecordedOptions,
    FileSource,
)
import logging
from IPython.display import display, Markdown


from llama_index.core import Document, SimpleDirectoryReader, VectorStoreIndex, get_response_synthesizer, PromptTemplate, StorageContext, load_index_from_storage
from llama_index.core.schema import MetadataMode, TextNode, NodeRelationship, RelatedNodeInfo, ImageNode
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.llms import ChatMessage
from llama_index.llms.openai import OpenAI
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.node_parser import TokenTextSplitter, SimpleNodeParser
from llama_index.vector_stores.milvus import MilvusVectorStore
from llama_index.llms.openai import OpenAI
from llama_index.core.tools import FunctionTool
from llama_index.agent.openai import OpenAIAgent
from openai import OpenAI as OpenAI4DALLE
openai_client4dalle = OpenAI4DALLE()

from llama_index.core.extractors import (
    TitleExtractor,
    BaseExtractor,
)
from llama_index.core.vector_stores.types import ExactMatchFilter, MetadataFilters, MetadataFilter, FilterOperator
from llama_index.core.indices import MultiModalVectorStoreIndex
from llama_index.core.query_engine import CitationQueryEngine
from llama_index.multi_modal_llms.anthropic.base import AnthropicMultiModal

load_dotenv()

In [None]:

KNOWN_RECIPE_QA_PROMPT_TMPL_STR = """
A man is preparing {recipe_title} dish the audio transcript is below.
_________________________________________________________
{context_str}
_________________________________________________________

Given the context information only and not prior knowledge, 'Write a detailed Food Recipe' in stepwise manner, each step should consist of heading and deatiled single line instructions.
The recipe should contain as much details as possible including the ingredients, ingredients quatity, style of cooking, utensils and instrument usage. 
For first step of recipe list out all the ingredients

About the output format:
The receipe output should be a json format consisting of sequential steps as a list and each step is a dictornary with keys heading and instructions
For example/reference I am sharing expected output for the Recipe of Coconut panna cotta

[
    {{
        "heading": "Ingredients",
        "instructions": "We will need: 2 cups coconut milk, 1/2 cup granulated sugar, 2 teaspoons powdered gelatin, 2 tablespoons cold water, 1 teaspoon vanilla extract, Coconut cream for topping (optional)"
    }},
    {{
        "heading": "Preparing the Coconut Milk Mixture",
        "instructions": "In a medium-sized saucepan, pour 2 cups of coconut milk. Place the saucepan over medium heat and warm the coconut milk, stirring occasionally. Be careful not to let it boil. Once the coconut milk is warm, add 1/2 cup of granulated sugar to it. Stir continuously until the sugar is completely dissolved. This usually takes about 2-3 minutes."
    }},
    {{
        "heading": "Blooming the Gelatin",
        "instructions": "In a small bowl, sprinkle 2 teaspoons of powdered gelatin over 2 tablespoons of cold water. Make sure the water is cold to properly activate the gelatin. Allow the gelatin to sit undisturbed for about 5 minutes. During this time, the gelatin will absorb the water and become soft, or 'bloom'."
    }},
    {{
        "heading": "Incorporating Gelatin into Coconut Milk Mixture",
        "instructions": "Once the gelatin has bloomed, it will have a spongy texture. Add the bloomed gelatin to the warm coconut milk mixture in the saucepan. Stir the mixture continuously until the gelatin is completely dissolved into the coconut milk. This usually takes 2-3 minutes of stirring over medium heat."
    }},
    {{
        "heading": "Adding Vanilla Extract",
        "instructions": "After the gelatin is fully dissolved, add 1 teaspoon of vanilla extract to the coconut milk mixture in the saucepan. Stir well to evenly distribute the vanilla extract throughout the mixture. This adds a delightful aroma and flavor to the panna cotta."
    }},
    {{
        "heading": "Pouring into Glasses and Chilling",
        "instructions": "Once the coconut milk mixture is ready, remove the saucepan from the heat. Carefully pour the mixture into individual serving glasses or ramekins. Fill each glass leaving about 1/4 inch of space at the top. Place the glasses on a tray and transfer them to the refrigerator. Allow the panna cotta to chill and set in the refrigerator for at least 4 hours. For best results, refrigerate overnight."
    }},
    {{
        "heading": "Adding Coconut Cream Topping (Optional)",
        "instructions": "If desired, before serving, spoon a layer of coconut cream over the chilled panna cotta in each glass. This adds an extra layer of coconut flavor and enhances the presentation of the dessert."
    }},
    {{
        "heading": "Serving",
        "instructions": "Once set, remove the Coconut Panna Cotta from the refrigerator. Serve chilled, optionally garnished with shredded coconut or fresh fruit, and enjoy!"
    }}
]
"""

MODIFY_RECIPE_QA_PROMPT_TMPL_STR = """
A man is preparing {recipe_title} dish the audio transcript is below.
_________________________________________________________
{context_str}
_________________________________________________________

Using the above context information and recipe modification request belowt: 
_______________________________________________
{modification_request}
______________________________________________
'Write a detailed Food Recipe' in stepwise manner, each step should consist of heading and deatiled single line instructions.
The recipe should contain as much details as possible including the ingredients, ingredients quatity, style of cooking, utensils and instrument usage. 
For first step of recipe list out all the ingredients

At the same time modify whereever required to incorporate the modification_request: {modification_request}
Please be careful that the modifications should be such that the sequential steps of food-recipe are coherent, 
logical and does not completely remove the essence of orignal recipe

According to user request we can change ingridents, spiciness, add additional content for improving nutritional-content etc, 
but we want to keep the core values and style of original author 
You can use knowledge other than the context for performing modifications

Also give title to this new recipe after according to modifications done and output it in 'recipe_title' field of output json


About the output format:
The receipe output should be a json format consisting of sequential steps as a list and each step is a dictornary with keys heading and instructions
For example/reference I am sharing expected


{{
    "recipe_title": "Recipe of Coconut panna cotta",
    "steps": [
        {{
            "heading": "Ingredients",
            "instructions": "We will need: 2 cups coconut milk, 1/2 cup granulated sugar, 2 teaspoons powdered gelatin, 2 tablespoons cold water, 1 teaspoon vanilla extract, Coconut cream for topping (optional)"
        }},
        {{
            "heading": "Preparing the Coconut Milk Mixture",
            "instructions": "In a medium-sized saucepan, pour 2 cups of coconut milk. Place the saucepan over medium heat and warm the coconut milk, stirring occasionally. Be careful not to let it boil. Once the coconut milk is warm, add 1/2 cup of granulated sugar to it. Stir continuously until the sugar is completely dissolved. This usually takes about 2-3 minutes."
        }},
        {{
            "heading": "Blooming the Gelatin",
            "instructions": "In a small bowl, sprinkle 2 teaspoons of powdered gelatin over 2 tablespoons of cold water. Make sure the water is cold to properly activate the gelatin. Allow the gelatin to sit undisturbed for about 5 minutes. During this time, the gelatin will absorb the water and become soft, or 'bloom'."
        }},
        {{
            "heading": "Incorporating Gelatin into Coconut Milk Mixture",
            "instructions": "Once the gelatin has bloomed, it will have a spongy texture. Add the bloomed gelatin to the warm coconut milk mixture in the saucepan. Stir the mixture continuously until the gelatin is completely dissolved into the coconut milk. This usually takes 2-3 minutes of stirring over medium heat."
        }},
        {{
            "heading": "Adding Vanilla Extract",
            "instructions": "After the gelatin is fully dissolved, add 1 teaspoon of vanilla extract to the coconut milk mixture in the saucepan. Stir well to evenly distribute the vanilla extract throughout the mixture. This adds a delightful aroma and flavor to the panna cotta."
        }},
        {{
            "heading": "Pouring into Glasses and Chilling",
            "instructions": "Once the coconut milk mixture is ready, remove the saucepan from the heat. Carefully pour the mixture into individual serving glasses or ramekins. Fill each glass leaving about 1/4 inch of space at the top. Place the glasses on a tray and transfer them to the refrigerator. Allow the panna cotta to chill and set in the refrigerator for at least 4 hours. For best results, refrigerate overnight."
        }},
        {{
            "heading": "Adding Coconut Cream Topping (Optional)",
            "instructions": "If desired, before serving, spoon a layer of coconut cream over the chilled panna cotta in each glass. This adds an extra layer of coconut flavor and enhances the presentation of the dessert."
        }},
        {{
            "heading": "Serving",
            "instructions": "Once set, remove the Coconut Panna Cotta from the refrigerator. Serve chilled, optionally garnished with shredded coconut or fresh fruit, and enjoy!"
        }}
    ]
}}


""" 



UNKNOWN_RECIPE_QA_PROMPT_TMPL_STR = """
From multiple food recipes prepared by a specifc man we have listed some his techniques/style to mix ingredients and process food etc
From those randomly collected-recipe-steps, you have to compose a detailed recipe by assembling/modifying the provided 
collected-recipe-steps into a coherent, step-by-step cooking guide. 

Your task is to ensure that the resultant recipe flows naturally, adheres to a logical progression, and maintains the 
structure of a typical food recipe, while ensuring the preservation of the author's unique style and choice of utilizing the ingredients.

Begin by organizing the steps in a sequential structure, ensuring that each follows the previous one chronologically. 

Incorporate ingredients incrementally as they are required in the cooking process. 
Describe when each ingredient should be added, ensuring that the introduction of each aligns with its 
appropriate step similar to how done in collected-recipe-steps. 

Use imperative verbs to denote actions, quantities, in style similar to authors.

Conclude the recipe with instructions on serving the finished dish, including any garnishes or 
accompaniments in style similar to authors..

For all the instruction keep the cooking style similar to that seen in collected-recipe-steps

the context from collected-recipe-steps is below
_____________________________________
{context_str}
_____________________________________

About the output format:
The receipe output should be a json format consisting of sequential steps as a list and each step is a dictornary with keys heading and instructions
For example/reference I am sharing expected


{{
    "recipe_title": "Recipe of Coconut panna cotta",
    "steps": [
        {{
            "heading": "Ingredients",
            "instructions": "We will need: 2 cups coconut milk, 1/2 cup granulated sugar, 2 teaspoons powdered gelatin, 2 tablespoons cold water, 1 teaspoon vanilla extract, Coconut cream for topping (optional)"
        }},
        {{
            "heading": "Preparing the Coconut Milk Mixture",
            "instructions": "In a medium-sized saucepan, pour 2 cups of coconut milk. Place the saucepan over medium heat and warm the coconut milk, stirring occasionally. Be careful not to let it boil. Once the coconut milk is warm, add 1/2 cup of granulated sugar to it. Stir continuously until the sugar is completely dissolved. This usually takes about 2-3 minutes."
        }},
        {{
            "heading": "Blooming the Gelatin",
            "instructions": "In a small bowl, sprinkle 2 teaspoons of powdered gelatin over 2 tablespoons of cold water. Make sure the water is cold to properly activate the gelatin. Allow the gelatin to sit undisturbed for about 5 minutes. During this time, the gelatin will absorb the water and become soft, or 'bloom'."
        }},
        {{
            "heading": "Incorporating Gelatin into Coconut Milk Mixture",
            "instructions": "Once the gelatin has bloomed, it will have a spongy texture. Add the bloomed gelatin to the warm coconut milk mixture in the saucepan. Stir the mixture continuously until the gelatin is completely dissolved into the coconut milk. This usually takes 2-3 minutes of stirring over medium heat."
        }},
        {{
            "heading": "Adding Vanilla Extract",
            "instructions": "After the gelatin is fully dissolved, add 1 teaspoon of vanilla extract to the coconut milk mixture in the saucepan. Stir well to evenly distribute the vanilla extract throughout the mixture. This adds a delightful aroma and flavor to the panna cotta."
        }},
        {{
            "heading": "Pouring into Glasses and Chilling",
            "instructions": "Once the coconut milk mixture is ready, remove the saucepan from the heat. Carefully pour the mixture into individual serving glasses or ramekins. Fill each glass leaving about 1/4 inch of space at the top. Place the glasses on a tray and transfer them to the refrigerator. Allow the panna cotta to chill and set in the refrigerator for at least 4 hours. For best results, refrigerate overnight."
        }},
        {{
            "heading": "Adding Coconut Cream Topping (Optional)",
            "instructions": "If desired, before serving, spoon a layer of coconut cream over the chilled panna cotta in each glass. This adds an extra layer of coconut flavor and enhances the presentation of the dessert."
        }},
        {{
            "heading": "Serving",
            "instructions": "Once set, remove the Coconut Panna Cotta from the refrigerator. Serve chilled, optionally garnished with shredded coconut or fresh fruit, and enjoy!"
        }}
    ]
}}


""" 



KNOWN_RECIPE_QA_PROMPT_TMPL = PromptTemplate(KNOWN_RECIPE_QA_PROMPT_TMPL_STR)
UNKNOWN_RECIPE_QA_PROMPT_TMPL = PromptTemplate(UNKNOWN_RECIPE_QA_PROMPT_TMPL_STR)
MODIFY_RECIPE_QA_PROMPT_TMPL = PromptTemplate(MODIFY_RECIPE_QA_PROMPT_TMPL_STR)


KNOWN_RECIPE_RESPONSE_SCHEMA = {
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "heading": {"type": "string"},
            "instructions": {"type": "string"}
        },
        "required": ["heading", "instructions"]
    }
}

UNKNOWN_RECIPE_RESPONSE_SCHEMA = {
    "type": "object",
    "properties": {
        "recipe_title": {"type": "string"},
        "steps": {
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "heading": {"type": "string"},
                    "instructions": {"type": "string"}
                },
                "required": ["heading", "instructions"],
                "additionalProperties": False
            }
        }
    },
    "required": ["recipe_title", "steps"],
    "additionalProperties": False
}


MMLM_REFINE_PROMPT = """ 
I have a food-recipe-step and corresponding images, 
From the images I want you to add new information to the food-recipe-step 
given food-recipe-step and images, output should be single line

find additional details or clarifications from images only, do not use any prior knowledge
then combine the extracted information from images with existing information 
(only use information from images which you are confident of) ensuring that the instructions remain clear, coherent.  
The final output should have tone like single line instructions which allow other users to cook recipe in accurately
Do not add any other statement like "Based on the image" etc only output the response directly


the recipe-step is below
_____________________
{step}
_____________________

"""


IMG_GENERATION_PROMPT_TMPL = """
We want to help user by explaining the Food Recipe steps in visual manner, 
I'll ask to generate image for one of the steps but do incorporate knowledge from steps before than candidate step
Below are the json formatted steps for Food Recipe named {recipe_title}, 
____________
{steps}
___________

generate image which depicts the scenario for the instruction in step {step_index} which is 
__________________
{step_instruction}
_________________
( please incorporate the previous steps, recipe title and ingridents aswell for better coherency between steps)
"""

In [None]:
# Constants
VIDEO_DIR = 'yt_data'
AUDIO_DIR = 'audio_data'
TRANSCRIPT_PICKLE_PATH = "audio_file_transcripts.pkl"
CHUNK_ENTITY_EXTRACT = True
RECIPE_YOUTUBE_PLAYLIST = "https://youtube.com/playlist?list=PL_PgxS3FkP7ATPveBQ1yah7LDqysyzDCG&si=GUM8RS-uuvVfV8GH"
VIDEO_FRAME_RATE = 24

In [None]:
def get_youtube_videos():
    subprocess.call(['yt-dlp', '-i', RECIPE_YOUTUBE_PLAYLIST])

def transcribe_file(AUDIO_FILE):
    try:
        # STEP 1 Create a Deepgram client using the API key in the environment variables
        config: DeepgramClientOptions = DeepgramClientOptions(
            verbose=logging.SPAM,
        )
        deepgram: DeepgramClient = DeepgramClient(config=config)
        # OR use defaults
        # deepgram: DeepgramClient = DeepgramClient()

        # STEP 2 Call the transcribe_file method on the prerecorded class
        with open(AUDIO_FILE, "rb") as file:
            buffer_data = file.read()

        payload: FileSource = {
            "buffer": buffer_data,
        }

        options: PrerecordedOptions = PrerecordedOptions(
            model="whisper-large",
            smart_format=True,
            punctuate=True,
        )

        response = deepgram.listen.prerecorded.v("1").transcribe_file(
            payload, options, timeout=httpx.Timeout(300.0, connect=10.0)
        )
        return response

    except Exception as e:
        print(f"Exception: {e}")


def video2audio():
    for video_file in os.listdir(VIDEO_DIR):
        video_path = os.path.join(VIDEO_DIR, video_file)
        audio_filename = f'{os.path.join(AUDIO_DIR, os.path.basename(video_path))}.wav'
        cmd = ['ffmpeg', '-loglevel', 'warning', '-n', '-i', f'{video_path}', f'{audio_filename}']
        subprocess.call(cmd)

def audio2text_pickle():
    audio_file_transcripts = {}
    for audio_file in os.listdir(AUDIO_DIR):
        audio_file_path = os.path.join(AUDIO_DIR, audio_file)
        response = transcribe_file(audio_file_path)
        audio_file_transcripts[audio_file] = response['results']['channels'][0]['alternatives'][0]['paragraphs']['paragraphs']
    
    with open(TRANSCRIPT_PICKLE_PATH, "wb") as file:
        pickle.dump(audio_file_transcripts,file)

    return audio_file_transcripts


def get_transcript_dict():
    if os.path.exists(TRANSCRIPT_PICKLE_PATH):
        with open(TRANSCRIPT_PICKLE_PATH,'rb') as file:
            audio_file_transcripts = pickle.load(file)
        return audio_file_transcripts
    
    else:
        raise "transcrip file not found"


def get_file_transcript_list():
    file_transcript_list = []

    audio_file_transcripts = get_transcript_dict()
    
    for audio_file, transcript in audio_file_transcripts.items():
        filename = os.path.basename(audio_file)
        file_transcript_list.append({"filename": filename, "transcript": transcript})
    
    return file_transcript_list

def get_file_hash(filename):
    hasher = hashlib.sha1()
    hasher.update(filename.encode('utf-8'))  
    return hasher.hexdigest()

hash2file_map = {get_file_hash(filename): filename for filename in os.listdir(AUDIO_DIR)}

if not os.path.exists(VIDEO_DIR):
    get_youtube_videos()

if not os.path.exists(AUDIO_DIR):
    video2audio()

if not os.path.exists(TRANSCRIPT_PICKLE_PATH):
    audio2text_pickle()

## Load text data in TextNodes format

In [None]:

def get_paragraph_info(paragraph):
    text = ""
    for sentence in paragraph['sentences']:
        text += f"{ sentence.text}"
    return text, paragraph['start'], paragraph['end']

def concatenated_transcript(transcript):
    text = ""
    for paragraph in transcript:
        paragraph_text, _, _ = get_paragraph_info(paragraph)
        text += f"{paragraph_text}\n"
    return text

def extract_recipe_title(text):
    messages = [
    ChatMessage(
            role="system", content="You are a food receipe title extractor which only output title as a nouns, if cannot find return None"
        ),
        ChatMessage(role="user", content=f"{text}"),
    ]
    resp = OpenAI().chat(messages)
    return resp.message.content

class RecipeSearchQueryExtractor(BaseExtractor):
    metadata_mode = MetadataMode.EMBED
    
    async def aextract(self, nodes):
        metadata_list = []
        for node in nodes:
            recipe_title = node.metadata['recipe_title']
            if recipe_title == 'None':
                recipe_title = node.metadata["document_title"]
            
            questions = (
                f"How to make {recipe_title}?\n"  +
                f"Share the recipe for {recipe_title}\n"  +
                f"I want to make {recipe_title}, how should I do?\n"  +
                f"Write steps for preparing {recipe_title}"
            )

            metadata_list.append({"recipe_queries": questions})


        return metadata_list

async def get_file_nodes(filename, timestamped_transcript, filehash):
    doc = Document(
        text=concatenated_transcript(timestamped_transcript),
        metadata = {
            'filehash': filehash,
            'recipe_title': extract_recipe_title(filename)
            },
        excluded_llm_metadata_keys = ["recipe_queries"]
    )
    text_splitter = TokenTextSplitter(
        separator=" ", chunk_size=512,
    )
    node_template = "Context: {context_str}. Fetch all the major food entities or recipe title found in the context. Title: "
    combine_template = "{context_str}. Using ONLY above titles and food entities, create a food recipe title which is being prepared, the recipe title should not be more than 5 words? Title: "

    title_extractor = TitleExtractor(
        nodes=5,
        node_template = node_template,
        combine_template=combine_template 
        )
    

    pipeline = IngestionPipeline(
        transformations=[text_splitter, title_extractor, RecipeSearchQueryExtractor()]
    )
    
    file_nodes = await pipeline.arun(documents=[doc])

    return file_nodes

# Function to generate hash for each node's metadata, useful for persistent node ID generation
def generate_node_hash(node_data):
    json_str = json.dumps(node_data, sort_keys=True)
    hash_value = hashlib.sha256(json_str.encode()).hexdigest()
    return hash_value

class NodeEntityExtractor():
    
    def __init__(self) -> None:
        self.ENTITY_SCORE_THRESHOLD = 0.8
        self.entity_model = SpanMarkerModel.from_pretrained("tomaarsen/span-marker-mbert-base-multinerd")
        if torch.cuda.is_available():
            self.entity_model.cuda()
    
    def __call__(self, text):

        food_entities = []
        texts = nltk.sent_tokenize(text)
        for outputs in self.entity_model.predict(texts):
            if not isinstance(outputs, list): outputs = [outputs]
            for entity in outputs:
                if entity['label'] == 'FOOD' and entity['score'] > self.ENTITY_SCORE_THRESHOLD:
                    food_entities.append(entity['span']) 

        return {
            'food_in_context': food_entities, 
            }
    
    def unload_from_cuda(self):
        if torch.cuda.is_available():
            del self.entity_model
            gc.collect()
            torch.cuda.empty_cache()
        

def get_pnodes(timestamped_transcript, filehash):

    nodes = []
    node1 = None
    for paragraph in timestamped_transcript:
        node_text, start_time, end_time = get_paragraph_info(paragraph)
        node_metadata = {
            'start_time': start_time, 
            'end_time': end_time, 
            'filehash': filehash,
            }
        node_hash = generate_node_hash({**node_metadata, 'text': node_text})

        node2 = TextNode(id_=node_hash, text=node_text, metadata=node_metadata)
        if node1:
            node1.relationships[NodeRelationship.CHILD] = RelatedNodeInfo(
                node_id=node2.node_id
            )
        node1 = node2
        nodes.append(node2)
    
    if CHUNK_ENTITY_EXTRACT:    
        for node in nodes:
            node.metadata = {**node.metadata, **entity_extractor(node.text)}
    return nodes
    


pnodes = []
file_nodes = []

file_transcript_list = get_file_transcript_list()
entity_extractor = NodeEntityExtractor()

for file_transcript in file_transcript_list:
    filename = file_transcript['filename']
    timestamped_transcript = file_transcript['transcript']
    
    print(f'processing file {filename}')

    filehash = get_file_hash(filename)

    file_nodes.extend(await get_file_nodes(file_transcript['filename'], timestamped_transcript, filehash))
    pnodes.extend(get_pnodes(timestamped_transcript, filehash))

entity_extractor.unload_from_cuda()
print(f"len of transcripts nodes {len(pnodes)}")
print(f"total no. of file nodes: containing description of each video file -> {len(file_nodes)}")

#### Extract frames from video corresponding to each text-chunk (TextNode). Save them in ./extracted_frames

In [None]:

async def run_frame_extract_command(node, semaphore):
    async with semaphore:
        start_time = node.metadata['start_time']
        end_time = node.metadata['end_time'] 
        filehash = node.metadata['filehash']
        time_duration = int(end_time - start_time)

        video_path = os.path.join(VIDEO_DIR, os.path.join(hash2file_map[filehash][:-4]))

        save_dir = f'./extracted_frames/{filehash}/{node.node_id}/'
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        command = [
            'ffmpeg', '-n', '-hide_banner', '-loglevel', 'error', '-ss', f'{start_time}', 
            '-i', f'{video_path}', '-vf', "select='eq(pict_type\,I)'", '-t', f'{time_duration}', 
            '-vsync', '0', '-frame_pts', 'true', f'{save_dir}%05d.jpg'
            ]

        process = await asyncio.create_subprocess_exec(*command)
        await process.wait()

semaphore = asyncio.Semaphore(10)
frame_extract_tasks = []
for node in pnodes:
    frame_extract_tasks.append(run_frame_extract_command(node, semaphore))

await asyncio.gather(*frame_extract_tasks)


In [None]:
image_nodes = []
framepath_absolute_timestamp_maping = {}

for node in pnodes:
    get_file_paths = lambda dir: [os.path.join(dir, i) for i in os.listdir(dir)]
    relevant_frames_dir = os.path.join('extracted_frames', node.metadata['filehash'], node.node_id)
    relevant_frames_paths = get_file_paths(relevant_frames_dir)
    for frame_path in relevant_frames_paths:
        image_nodes.append(ImageNode(image_path=frame_path, metadata={**node.metadata, 'node_id': node.node_id}))
        framepath_absolute_timestamp_maping[frame_path] = float(node.metadata['start_time']) + float(os.path.basename(frame_path).split('.')[0])/VIDEO_FRAME_RATE

print(image_nodes)

In [None]:
from llama_index.vector_stores.postgres import PGVectorStore

# Create PGVectorStore instance
vector_store = PGVectorStore.from_params(
    database="vector_db",
    host="0.0.0.0",
    password="password",
    port=5432,
    user="postgres",
    table_name="image_collection",
    embed_dim=512
)

image_storage_context = StorageContext.from_defaults(image_store=vector_store)
image_index = MultiModalVectorStoreIndex(
    image_nodes,
    storage_context=image_storage_context,
    show_progress=True
    )

file_index = VectorStoreIndex(file_nodes)
pindex = VectorStoreIndex(pnodes)

file_retriever = file_index.as_retriever()


In [None]:
# print([(node.metadata['document_title'], node.metadata['recipe_title']) for node in file_nodes])
# print([ node.text for node in file_nodes[0:1]])
# print([ node.text for node in file_nodes[1:2]])
# print([ node.text for node in file_nodes[2:3]])
# print([node.metadata['recipe_title'] for node in file_nodes])
# print(file_nodes)
# print(pnodes)
# print([node.node_id for node in pnodes])
# print(image_index)

In [None]:
def generate_markdown_template(json_response, steps_images, recipe_query, source=None):
    markdown_template = f"<h2>User query: {recipe_query}</h2>"
    if source: markdown_template += f"<h4>course {source}</h4>"

    for step in json_response:
        heading = step["heading"]
        instructions = step["instructions"]
        
        markdown_template += f"<h3>{heading}</h3>"

        if heading in steps_images:
            image_paths = steps_images[heading]
            images_html = ""
            for image_path in image_paths:
                if source:
                    frame_timestamp = framepath_absolute_timestamp_maping[image_path]
                    timestamp_element = f"<p>time: {round(frame_timestamp, 2)}sec</p>"
                else:
                    timestamp_element = ""

                images_html += f"<div style='display: flex; flex-direction: column; align-items: flex-start;'><img src='{image_path}' style='width:240px;'>{timestamp_element}</div>"
            markdown_template += f"<div style='display: flex;'>{images_html}</div>\n"
        
        for ind, sentence in enumerate(nltk.sent_tokenize(instructions)):
            markdown_template += f"<p>{ind+1}. {sentence}</p>\n"
    
    return markdown_template

def get_generated_image_url(prompt):
  response = openai_client4dalle.images.generate(
    model="dall-e-3",
    prompt=prompt,
    size="1024x1024",
    quality="standard",
    n=1,
  )

  image_url = response.data[0].url
  return image_url

def refine_with_mmlm(response, images):
    anthropic_mm_llm = AnthropicMultiModal(max_tokens=300)

    for step in response:
        if images[step['heading']]:
            image_documents = SimpleDirectoryReader(
                input_files=images[step['heading']]
            ).load_data()
            step['instructions'] = anthropic_mm_llm.complete(
                prompt=MMLM_REFINE_PROMPT.format(step=step['instructions']),
                image_documents=image_documents,
                ).text
    return response
    

def get_images4step(response):
    response_json = json.loads(response.response)
    referenced_nodes = [sn.node for sn in response.source_nodes]
    referenced_node_index = VectorStoreIndex(referenced_nodes)
    split_step = False

    all_step_image_paths = {'Ingredients': []}
    for step in response_json:

        if step['heading'] == 'Ingredients':
            continue

        instructions = step['instructions']
        if split_step:
            sentences = nltk.sent_tokenize(instructions)
        else:
            sentences = [instructions]
        
        step_citation_score_nodes = []
        for sentence in sentences:
            step_citation_score_nodes.extend(referenced_node_index.as_query_engine().retrieve(sentence)[:3])
        
        step_citation_node_ids = set()
        for item in step_citation_score_nodes:
            step_citation_node_ids.add(item.node.node_id)
        

        image_node_filters = MetadataFilters(
            filters=[
                MetadataFilter(key="node_id", value=step_citation_node_ids, operator=FilterOperator.IN),
            ],
        )

        image_retriever = image_index.as_retriever(
            similarity_top_k=20,
            filters=image_node_filters
        )

        retrived_image_score_nodes = []
        for sentence in nltk.sent_tokenize(instructions):
            retieved_nodes = image_retriever.retrieve(sentence)
            retrived_image_score_nodes.extend(retieved_nodes)
        
        retrived_image_nodes = []
        for retrived_image_score_node in retrived_image_score_nodes:
            if retrived_image_score_node.node not in retrived_image_nodes:
                retrived_image_nodes.append(retrived_image_score_node.node)
        
        retrived_image_nodes = sorted(retrived_image_nodes, key=lambda x: (x.metadata['start_time'], os.path.basename(x.image_path)))
        retrived_image_nodes_img_paths = [r.image_path for r in retrived_image_nodes]
        all_step_image_paths[step['heading']] = retrived_image_nodes_img_paths

    return all_step_image_paths

def generate_recipe(recipe_query: str) -> None:

    def generate_modified_recipe(recipe_query: str, modification_request: str) -> None:
        """If user request to do some sort of modification over some recipe, this recive:1) recipe which user mentioned 2) the modification request for that recipe"""

        if modification_request is None:
            generate_known_dish_recipe(recipe_query)

        file_nodes = file_retriever.retrieve(recipe_query)
        retrieved_file_hash = file_nodes[0].metadata["filehash"]
        recipe_title = file_nodes[0].metadata['recipe_title']

        filters = MetadataFilters(filters=[ExactMatchFilter(key="filehash", value=retrieved_file_hash)])
        retriever = VectorIndexRetriever(
            index=pindex,
            similarity_top_k=20,
            filters=filters
        )
        
        query_engine = RetrieverQueryEngine.from_args(retriever=retriever, text_qa_template=MODIFY_RECIPE_QA_PROMPT_TMPL.partial_format(recipe_title=recipe_title, modification_request=modification_request))
        
        max_tries, attempt = 10, 0
        while attempt < max_tries:
            response = query_engine.query('A step of food recipe')
            try:
                jsonschema.validate(instance=json.loads(response.response), schema=UNKNOWN_RECIPE_RESPONSE_SCHEMA)
                break
            except Exception as e:
                attempt += 1
                print(f"Response does not follow the specified format:{e}")

        recipe_title = json.loads(response.response)['recipe_title']
        steps = json.loads(response.response)['steps']
        step_images = {}
        for i, step in enumerate(json.loads(response.response)['steps']):
            if step['heading'] == 'Ingredients':
                continue
            prompt = IMG_GENERATION_PROMPT_TMPL.format(recipe_title = recipe_title, steps=steps, step_index=i+1, step_instruction=step['instructions'])
            step_images[step['heading']] = [get_generated_image_url(prompt)]

        markdown = generate_markdown_template(steps, step_images, recipe_query)
        display(Markdown(markdown))



    def generate_custom_dish_recipe(recipe_query: str) -> None:
        """if query does not specify any Food or cuisine, or if user query want to generate new/novel recipe"""
        """displays custom food recipe in markdown format return nothing"""

        node_ids = [n.node_id for n in pnodes]
        picked_node_ids = random.sample(node_ids, 10)

        retriever = VectorIndexRetriever(
            index=pindex,
            similarity_top_k=50,
            node_ids=picked_node_ids
        )
        query_engine = RetrieverQueryEngine.from_args(retriever=retriever, text_qa_template=UNKNOWN_RECIPE_QA_PROMPT_TMPL)
        max_tries, attempt = 10, 0
    
        while attempt < max_tries:
            response = query_engine.query('A step of food recipe')
            try:
                jsonschema.validate(instance=json.loads(response.response), schema=UNKNOWN_RECIPE_RESPONSE_SCHEMA)
                break
            except Exception as e:
                attempt += 1
                print(f"Response does not follow the specified format:{e}")

        recipe_title = json.loads(response.response)['recipe_title']
        steps = json.loads(response.response)['steps']
        step_images = {}
        for i, step in enumerate(json.loads(response.response)['steps']):
            if step['heading'] == 'Ingredients':
                continue
            prompt = IMG_GENERATION_PROMPT_TMPL.format(recipe_title = recipe_title, steps=steps, step_index=i+1, step_instruction=step['instructions'])
            step_images[step['heading']] = [get_generated_image_url(prompt)]

        markdown = generate_markdown_template(steps, step_images, recipe_query)
        display(Markdown(markdown))

        return


    def generate_known_dish_recipe(recipe_query: str) -> None:
        """In case any Food/Ingrident/cuisine is specified in query"""
        """displays food recipe and instructions for food or dish requested by user clearly"""
        file_nodes = file_retriever.retrieve(recipe_query)

        if file_nodes[0].score < 0.80:
            display(Markdown("## Jaques can only help with french food.. Please submit another query"))
            return
        
        retrieved_file_hash = file_nodes[0].metadata["filehash"]
        # print(hash2file_map[retrieved_file_hash])
        recipe_title = file_nodes[0].metadata['recipe_title']

        # relavant_pnodes = [pnode for pnode in pnodes if pnode.metadata['filehash'] == retrieved_file_hash]
        filters = MetadataFilters(filters=[ExactMatchFilter(key="filehash", value=retrieved_file_hash)])
        retriever = VectorIndexRetriever(
            index=pindex,
            similarity_top_k=20,
            filters=filters
        )
        
        query_engine = RetrieverQueryEngine.from_args(retriever=retriever, text_qa_template=KNOWN_RECIPE_QA_PROMPT_TMPL.partial_format(recipe_title=recipe_title))

        while True:
            response = query_engine.query('text containing instructions for food preparation')
            try:
                jsonschema.validate(instance=json.loads(response.response), schema=KNOWN_RECIPE_RESPONSE_SCHEMA)
                break
            except Exception as e:
                print(f"Response does not follow the specified format:{e}")

        steps_images = get_images4step(response)
    
        refined_response = refine_with_mmlm(json.loads(response.response), steps_images)
        markdown = generate_markdown_template(refined_response, steps_images, recipe_query, hash2file_map[retrieved_file_hash][:-4])

        display(Markdown(markdown))
        
        return
    

    Custom_FoodRecipe_tool = FunctionTool.from_defaults(fn=generate_custom_dish_recipe)
    Known_FoodRecipe_tool = FunctionTool.from_defaults(fn=generate_known_dish_recipe)
    Modify_FoodRecipe_tool = FunctionTool.from_defaults(fn=generate_modified_recipe)


    llm = OpenAI(model="gpt-3.5-turbo-0613")
    agent = OpenAIAgent.from_tools([Known_FoodRecipe_tool, Modify_FoodRecipe_tool, Custom_FoodRecipe_tool], llm=llm, verbose=True)

    response = agent.chat(
        recipe_query, tool_choice="auto"
    )

    return



In [None]:
generate_recipe("Could you share recipe for Roasted Chicken")

In [None]:
# from typing import Any, List
# from InstructorEmbedding import INSTRUCTOR

# from llama_index.core.bridge.pydantic import PrivateAttr
# from llama_index.core.embeddings import BadseEmbedding


# class InstructorEmbeddings(BaseEmbedding):
#     _model: INSTRUCTOR = PrivateAttr()
#     _instruction: str = PrivateAttr()

#     def __init__(
#         self,
#         instructor_model_name: str = "hkunlp/instructor-base",
#         instruction: str = "Represent a food receipe step for semantic search:",
#         **kwargs: Any,
#     ) -> None:
#         self._model = INSTRUCTOR(instructor_model_name, device='cuda')
#         self._instruction = instruction
#         super().__init__(**kwargs)

#     @classmethod
#     def class_name(cls) -> str:
#         return "instructor"

#     async def _aget_query_embedding(self, query: str) -> List[float]:
#         return self._get_query_embedding(query)

#     async def _aget_text_embedding(self, text: str) -> List[float]:
#         return self._get_text_embedding(text)

#     def _get_query_embedding(self, query: str) -> List[float]:
#         embeddings = self._model.encode([[self._instruction, query]])
#         return embeddings[0]

#     def _get_text_embedding(self, text: str) -> List[float]:
#         embeddings = self._model.encode([[self._instruction, text]])
#         return embeddings[0]

#     def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
#         embeddings = self._model.encode(
#             [[self._instruction, text] for text in texts]
#         )
#         print(embeddings)
#         return embeddings