In [None]:
from typing import Annotated, List, Tuple, Union
from langchain_core.tools import tool
from langchain_openai import ChatOpenAI
import threading
import base64
import os

@tool
def get_video_description(
    question: str, path: str
    ):
    """Useful for descripting scenes and answer questions from a sequence of frames. Need a question and the path to the frames."""
    base64Frames = []
    for filename in os.listdir(path):
        with open(os.path.join(path, filename), "rb") as image_file:
            base64Frames.append(base64.b64encode(image_file.read()).decode("utf-8"))
    llm = ChatOpenAI(model="gpt-4-vision-preview", max_tokens=1028)
    PROMPT_MESSAGES = [
        {
            "role": "system",
            "content": "You are provided with a sequence of frames from a video in base64 format. Your task is to analyze that sequence and answer questions related to them.",
        },
        {
            "role": "user",
            "content": [
                question,
                *map(lambda x: {"image": x, "resize": 768}, base64Frames[0::50]),
            ],
        },
    ]
    response = llm.invoke(PROMPT_MESSAGES)
    return response

In [None]:
import cv2
import os

@tool
def split_video(
    video_path: str
    ):
    """Split a video into 2 sets of frames, save them to analysts' folders and return the names of the folders."""
    print("success")
    vidcap = cv2.VideoCapture(video_path)
    success,image = vidcap.read()
    count = 0
    frames = []
    # create the folders for the analysts
    os.makedirs("analyst1_frames", exist_ok=True)
    os.makedirs("analyst2_frames", exist_ok=True)
    while success:
        frames.append(image)
        success,image = vidcap.read()
        count += 1
    # Divide the frames into 2 sets
    frames1 = frames[:len(frames)//2]
    frames2 = frames[len(frames)//2:]
    # Save the frames to analysts' folders
    for i, frame in enumerate(frames1):
        cv2.imwrite(f"analyst1_frames/{i}.jpg", frame)
    for i, frame in enumerate(frames2):
        cv2.imwrite(f"analyst2_frames/{i}.jpg", frame)
    return ["analyst1_frames", "analyst2_frames"]

In [None]:
from langchain.agents import AgentExecutor, create_openai_tools_agent
from langchain_core.messages import BaseMessage, HumanMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_openai import ChatOpenAI

def create_agent(
    llm: ChatOpenAI, tools: list, system_prompt: str
):
    # Each worker node will be given a name and some tools.
    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                system_prompt,
            ),
            MessagesPlaceholder(variable_name="messages"),
            MessagesPlaceholder(variable_name="agent_scratchpad"),
        ]
    )
    print(tools)
    agent = create_openai_tools_agent(llm, tools, prompt)
    executor = AgentExecutor(agent=agent, tools=tools)
    return executor

In [None]:
def agent_node(state, agent, name):
    result = agent.invoke(state)
    return {"messages": [HumanMessage(content=result["output"], name=name)]}

In [None]:
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.agents import create_openai_functions_agent

members = ["analyst_1", "analyst_2", "supervisor"]

system_prompt = (
    "Your role is to oversee a task involving the analysis of a video. Your responsibilities are as follows:"

    "1. Video Division:"
    "- You are provided with a video file. Your first task is to break this video into two equal parts. You are provided split_video tool to help you with this task."
    "- Ensure that the division is done in such a way that the two parts are contiguous and cover the entire video without overlapping."
    "- The next step begins once the video has been successfully divided."

    "2. Assigning Video Parts to Agents:"
    "- You have two agents under your supervision, analyst_1 and analyst_2."
    "- Assign the first half of the video to analyst_1 and the second half to analyst_2."
    "- Provide each agent with their respective video part and instruct them to analyze their assigned section."

    "3. Gathering and Summarizing Responses:"
    "- Once both agents have completed their analysis, they will provide you with their findings."
    "- Your task is to compile these findings into a comprehensive summary."
    "- The summary should include key points from each agent's analysis, highlighting any significant observations or conclusions."

    "4. Reporting:"
    "- Prepare a final report that includes the following:"
        "a. A brief description of each video part."
        "b. The individual summaries from analyst_1 and analyst_2."
        "c. Your comprehensive summary combining insights from both agents."
    "- Ensure that the report is clear, concise, and provides a complete understanding of the video's content as analyzed by the agents."

    "Remember, your role is crucial in ensuring that the video is thoroughly analyzed and the findings are accurately reported. Maintain clear communication with the agents and ensure that the workflow is followed as described."
    "When finished, respond with FINISH."
)
# Our team supervisor is an LLM node. It just picks the next agent to process
# and decides when the work is completed
options = ["FINISH"] + members
# Using openai function calling can make output parsing easier for us
function_def = {
    "name": "route",
    "description": "Select the next role.",
    "parameters": {
        "title": "routeSchema",
        "type": "object",
        "properties": {
            "next": {
                "title": "Next",
                "anyOf": [
                    {"enum": options},
                ],
            }
        },
        "required": ["next"],
    },
}
# split_video_def = {
#     "name": "split_video",
#     "description": "Split a video into 2 sets of frames, save them to analysts' folders, and return the names of the folders.",
#     "parameters": {
#         "title": "video_path",
#         "type": "object",
#         "description": "The file path of the video to be split.",
#         "properties": {
#             "video_path": {
#                 "type": "string",
#                 "description": "The file path of the video to be split."
#             }
#         },
#     },
#     "return": {
#         "type": "list",
#         "description": "A list containing the names of the two folders where the frames are saved."
#     },
#     "required": ["video_path"]
# }

# def next_agent(state):
#     llm = ChatOpenAI(model="gpt-4-1106-preview")
#     options = ["FINISH", "analyst_1", "analyst_2", "supervisor"]
#     prompt = ChatPromptTemplate.from_messages(
#         [
#             (
#                 "system",
#                 "Given the conversation above, who should act next?"
#                 " Or should we FINISH? Select one of: {options}",
#             ),
#             ("user", "{input}"),
#             MessagesPlaceholder(variable_name="messages"),
#             MessagesPlaceholder(variable_name="agent_scratchpad"),
#         ]
#     ).partial(options=str(options))
#     result = llm.invoke(state, prompt=prompt)
#     return {"messages": [HumanMessage(content=result["output"], name="supervisor")]}
    

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder(variable_name="messages"),
        (
            "system",
            "Given the conversation above, who should act next?"
            " Or should we FINISH? Select one of: {options}",
        ),
        ("user", "{input}"),
        MessagesPlaceholder(variable_name="messages"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
).partial(options=str(options), members=", ".join(members))

llm = ChatOpenAI(model="gpt-4-1106-preview")
tools=[split_video]
# supervisor = create_openai_functions_agent(llm, tools=[function_def, split_video_def], prompt=prompt)
# supervisor = AgentExecutor(agent=supervisor, tools=tools)
supervisor = create_agent(llm, tools, system_prompt)

# supervisor_chain = (
#     prompt
#     | llm.bind_tools(tools=[split_video_def, function_def])
# )

In [None]:
import operator
from typing import Annotated, Any, Dict, List, Optional, Sequence, TypedDict
import functools

from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langgraph.graph import StateGraph, END


# The agent state is the input to each node in the graph
class AgentState(TypedDict):
    # The annotation tells the graph that new messages will always
    # be added to the current states
    messages: Annotated[Sequence[BaseMessage], operator.add]
    video_path: str
    # tool_call: Optional[Dict[str, Any]]
    # The 'next' field indicates where to route to next

prompt_analysts = (
    "You are a helpful AI assistant that answers questions about a video. You are provided with a list of frames and a question. Your task is to analyze the video and provide a response to the question."
    " Use the provided tool to see the video and progress towards answering the question."
)

analyst_1 = create_agent(llm, [get_video_description], prompt_analysts + "You have access to the first part of the movie stored as frames. It is stored in the analyst1_frames folder.")
analyst_1_node = functools.partial(agent_node, agent=analyst_1, name="analyst_1")

analyst_2 = create_agent(llm, [get_video_description], prompt_analysts + "You have access to the second part of the movie stored as frames. It is stored in the analyst2_frames folder.")
analyst_2_node = functools.partial(agent_node, agent=analyst_2, name="analyst_2")

supervisor_node = functools.partial(agent_node, agent=supervisor, name="supervisor")

workflow = StateGraph(AgentState)
workflow.add_node("analyst_1", analyst_1_node)
workflow.add_node("analyst_2", analyst_2_node)
workflow.add_node("supervisor", supervisor_node)

In [None]:
def router(state):
    # This is the router
    messages = state["messages"]
    last_message = messages[-1]
    if "FINAL ANSWER" in last_message.content and last_message.name=="supervisor":
        return "end"
    if last_message.name == "supervisor" and messages[-2].name=="user":
        return "supervisor"
    if last_message.name == "supervisor" and messages[-2].name=="analyst_1":
        return "analyst_2"
    if last_message.name == "supervisor" and messages[-2].name=="analyst_2":
        return "analyst_1"
    if last_message.name == "analyst_1" or last_message.name == "analyst_2":
        return "supervisor"
    if last_message.name == "supervisor" and messages[-2].name=="supervisor":
        return "analyst_1"
    return "error"

In [None]:
for member in members:
    workflow.add_edge(member, "supervisor")
conditional_map = {k: k for k in members}
conditional_map["FINISH"] = END
# conditional_map["function_call"] = "function_call"
# workflow.add_conditional_edges("supervisor", lambda x: x["next"], conditional_map)
workflow.add_conditional_edges(
    "supervisor", 
    router, 
    {"supervisor": "supervisor", "analyst_1": "analyst_1", "analyst_2": "analyst_2", "FINISH": END},
)
workflow.add_conditional_edges(
    "analyst_1", 
    router, 
    {"supervisor": "supervisor", "analyst_1": "analyst_1", "analyst_2": "analyst_2"},
)
workflow.add_conditional_edges(
    "analyst_2", 
    router, 
    {"supervisor": "supervisor", "analyst_1": "analyst_1", "analyst_2": "analyst_2"},
)
workflow.set_entry_point("supervisor")

graph = workflow.compile()

In [None]:
for s in graph.stream(
    {
        "messages": [
            HumanMessage(content="Write a brief report for the video. It's path is pizza.mp4.", name="user")
        ],
        "video_path": "pizza.mp4",
    },
    {"recursion_limit": 100},
):
    if "__end__" not in s:
        print(s)
        print("----")