# Video analysis


## Setting
 - Auto Reload
 - path for utils

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys, os
module_path = "../.."
sys.path.append(os.path.abspath(module_path))

## 1. Create Bedrock client

In [None]:
from pprint import pprint
from termcolor import colored
from utils import bedrock
from utils.bedrock import bedrock_info

### ---- ⚠️ Un-comment and edit the below lines as needed for your AWS setup ⚠️ ----
- os.environ["AWS_DEFAULT_REGION"] = "<REGION_NAME>"  # E.g. "us-east-1"
- os.environ["AWS_PROFILE"] = "<YOUR_PROFILE>"
- os.environ["BEDROCK_ASSUME_ROLE"] = "<YOUR_ROLE_ARN>"  # E.g. "arn:aws:..."
- os.environ["BEDROCK_ENDPOINT_URL"] = "<YOUR_ENDPOINT_URL>"  # E.g. "https://..."

In [None]:
boto3_bedrock = bedrock.get_bedrock_client(
    assumed_role=os.environ.get("BEDROCK_ASSUME_ROLE", None),
    endpoint_url=os.environ.get("BEDROCK_ENDPOINT_URL", None),
    region=os.environ.get("AWS_DEFAULT_REGION", None),
)

print (colored("\n== FM lists ==", "green"))
pprint (bedrock_info.get_list_fm_models(verbose=False))

## 2. LLM 정의

In [None]:
from utils.bedrock import bedrock_model
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

In [None]:
llm = bedrock_model(
    #model_id=bedrock_info.get_model_id(model_name="Claude-V3-5-Sonnet"),
    model_id=bedrock_info.get_model_id(model_name="Nova-Pro-CRI"),
    bedrock_client=boto3_bedrock,
    stream=True,
    callbacks=[StreamingStdOutCallbackHandler()],
    inference_config={
        'maxTokens': 1024,
        'stopSequences': ["\n\nHuman"],
        'temperature': 0.01,
        #'topP': ...,
    }
    #additional_model_request_fields={"top_k": 200}
)

## 3. Analysis

### 3.1 LLM caller 정의

In [None]:
from textwrap import dedent
from utils.bedrock import bedrock_utils, bedrock_chain

class llm_call():

    def __init__(self, **kwargs):

        self.llm=kwargs["llm"]
        self.verbose = kwargs.get("verbose", False)
        self.chain = bedrock_chain(bedrock_utils.converse_api) | bedrock_chain(bedrock_utils.outputparser)

    def _message_format(self, role, message):

        if role == "user":
             message_format = {
                "role": "user",
                "content": [{"text": dedent(message)}]
            }
        elif role == "assistant":
            
            message_format = {
                "role": "assistant",
                'content': [{'text': dedent(message)}]
            }

        return message_format
            
    def invoke(self, **kwargs):

        system_prompts = kwargs.get("system_prompts", None)
        messages = kwargs["messages"]
        #llm_name = kwargs["llm_name"]
    
        response = self.chain( ## pipeline의 제일 처음 func의 argument를 입력으로 한다. 여기서는 converse_api의 arg를 쓴다.
            llm=self.llm,
            system_prompts=system_prompts,
            messages=messages,
            verbose=self.verbose
        )
        
        ai_message = self._message_format(role="assistant", message=response["text"])
        messages.append(ai_message)
        return response, messages

In [None]:
llm_caller = llm_call(
    llm=llm,
    verbose=True ## To show token usage
) 

### 3.2 Video analyzer 정의

In [None]:
def _get_message_from_string(role, string, video_format="mp4", imgs=None, videos=None):

    message = {
        "role": role,
        "content": []
    }

    if imgs is not None:
        for img in imgs:
            img_message = {
                "image": {
                    "format": 'png',
                    "source": {"bytes": img}
                }
            }
            message["content"].append(img_message)
    
    if videos is not None:
        for video in videos:
            video_message = {
                "video": {
                    "format": video_format,
                    "source": {"bytes": video}
                }
            }
            message["content"].append(video_message)

    message["content"].append({"text": dedent(string)})

    return message

def _get_price(tokens):
        
    input_price = tokens["input"] * 0.0008 / 1000
    output_price = tokens["output"] * 0.0032 / 1000
    total = input_price + output_price

    print ("======= Cost Calculator =======")
    print (f'Token Usage, input: {tokens["input"]}, Output: {tokens["output"]}')
    print (f'Price: {total} USD')
    print ("===============================")

In [None]:
%%time

def video_analyzer(**kwargs):
    
    video_path=kwargs["video_path"]
    
    video_format = os.path.splitext(video_path)[1][1:]
    with open(video_path, 'rb') as file:
        video_bytes = file.read()
        
    
    system_prompts = dedent(
        '''

        You are a professional video analyst with expertise in behavioral analysis, security assessment, and pattern recognition.
        Your role is to provide detailed, structured analysis of video content while maintaining objectivity and attention to security concerns.

        Model Instructions:
        - Watch the entire video sequence to understand the complete context
        - Focus on significant events and behavioral patterns throughout the video
        - Pay special attention to security risks and anomalous behaviors
        - Maintain objectivity and privacy considerations
        - DO NOT make assumptions about unclear events or behaviors
        - DO NOT include personally identifiable information
        - Provide output in Korean

        Output Schema:
        {
            "summary": "Comprehensive explanation of the entire sequence",
            "key_events": [
                {
                    "description": "Description of significant events",
                    "significance": "Event importance level (HIGH/MEDIUM/LOW)"
                }
            ],
            "objects_involved": {
                "people": ["Number of individuals identified and their roles"],
                "items": ["Key objects involved in the scene"]
            },
            "analysis": {
                "pattern": "Observed behavioral patterns",
                "anomalies": ["Unusual activities or notable irregularities"],
                "risk_assessment": "Evaluation of potential risks"
            }
        }

        Analysis Guidelines:
        - Review the video chronologically to understand the complete context
        - When evaluating event significance, consider:
           - Security risks
           - Abnormal behavior patterns
           - Potential threats to facilities or property
        - Track consistent patterns of behavior throughout the video
        - Identify and assess any security concerns
        - Monitor interactions between people and objects
        - Note temporal patterns and their variations
        - Provide clear and objective descriptions

        Important Considerations:
        - Exclude routine movements and normal activities from key events
        - Identify and classify recurring events as patterns
        - Compare activities against typical behavioral patterns
        - Maintain privacy by excluding personally identifiable information
        - Report only observed facts, avoiding speculation about unclear situations
        - Highlight any detected potential risks
        - Focus on contextually significant details
        - Track individual continuity through consistent behavior patterns and appearance
        - Consider the temporal flow of events rather than isolated moments
        - Assess the overall security implications of observed behaviors

        Remember to evaluate:
        - Patterns of movement and behavior
        - Interactions between individuals
        - Use or manipulation of objects
        - Temporal sequence of events
        - Potential security risks
        - Anomalous behaviors
        - Environmental factors

        This analysis should provide a comprehensive assessment of the video content while maintaining focus on security considerations and behavioral patterns.

        '''
    )
    user_prompts = dedent(
        '''
        Describe given video in Korean.

        '''
    )
    system_prompts = bedrock_utils.get_system_prompt(system_prompts=system_prompts) 

    

    messages = []
    message = _get_message_from_string(
        role="user",
        string=user_prompts,
        video_format=video_format,
        videos=[video_bytes]
    )
    messages.append(message)

    # Call LLM
    resp, messages_updated = llm_caller.invoke(messages=messages, system_prompts=system_prompts)
    
    tokens = {"input": 0, "output": 0, "total": 0}
    #self.pricing = {"input": 0, "output": 0}
    if llm_caller.verbose:
        tokens["input"] += resp["token_usage"]["inputTokens"]
        tokens["output"] += resp["token_usage"]["outputTokens"]
        tokens["total"] += resp["token_usage"]["totalTokens"]            
        _get_price(tokens)
            

In [None]:
%%time
video_analyzer(
    video_path = "./video/video_sample_4.mp4"
)