In [None]:
from langchain.llms import VertexAI as langchain_vertexai

In [None]:
import tiktoken
from langchain_google_community import BigQueryLoader
from langchain import PromptTemplate
from langchain.chains.summarize import load_summarize_chain
#from langchain.llms import VertexAI as langchain_vertexai
from langchain_google_vertexai import VertexAI as langchain_vertexai
from langchain_core.prompts import PromptTemplate
from pathlib import Path as p
import pandas as pd
from vertexai.preview.generative_models import (
    Content,
    GenerationConfig,
    GenerationResponse,
    GenerativeModel,
    Image,
    Part,
    HarmBlockThreshold,
    HarmCategory,
)


vertex_llm_text = langchain_vertexai(model_name="gemini-1.5-pro-002")
generative_multimodal_model= GenerativeModel("gemini-1.5-pro-002")


def estimate_token_length(text, model="gpt2"):
    """Estimates the token length of a given text using a specified model.

      Args:
        text: The input text.
        model: The model to use for tokenization (default: "gpt2").

      Returns:
        The estimated number of tokens.
      """

  
    enc = tiktoken.get_encoding(model)  

    # Tokenize the text and count tokens
    tokens = enc.encode(text)
    token_count = len(tokens)
    return token_count

def get_data(source_query_str: str=None,metadata_columns: str=None,page_content_columns: str=None, project_id: str=None , return_text: bool=True):
    
    """Load data from big query

      Args:
        str source_query_str:  The query string to fetch the data from bigquery
        list[str] metadata_columns:  list of metadata column names
        list[str] page_content_columns:  list of content column names  
        str project_id: project id
        bool return_text: returns the content columns description
      Returns:
          list[langchain_core.documents.base.Document] documents: langchain documents
          
      """
    
    loader = BigQueryLoader(
            query=source_query_str, project=project_id, metadata_columns=metadata_columns, page_content_columns=page_content_columns
        )
    documents = []
    all_texts=[]
    documents.extend(loader.load())
    if return_text:  
         all_texts=[doc.page_content.replace('description:',"",1) for doc in documents]
        
    return documents, '\n'.join(all_texts)
    
 
def summarize_docs(documents: list[object],question_prompt_template: str="", refine_prompt_template: str="" ,is_token_limit_exceeded: bool=False ):
    
    """summarizes the input documents

      Args:
        list[object] documents:  list of langchain documents
        str question_prompt_template:  string question prompt template. 
        str refine_prompt_template:  string refine prompt template in the case that we need to use refine method
        bool is_token_limit_exceeded:  boolean indicating wheather or not the token limit is exceeded.
      Returns:
         dict : summary result
         
      """
       
    question_prompt = PromptTemplate(template=question_prompt_template, input_variables=["text"]) 
    
    if not is_token_limit_exceeded:        
        #if the token limit is in the context window range, use a stuffing method for summary
        chain = load_summarize_chain(vertex_llm_text, chain_type="stuff", 
                                     prompt=question_prompt)
        
    else:     
        #otherwise use a refine summarization method
        refine_prompt = PromptTemplate(input_variables=["existing_answer", "text"], template=refine_prompt_template)
              
        chain = load_summarize_chain(
            vertex_llm_text,
            chain_type="refine",
            question_prompt=question_prompt,
            refine_prompt=refine_prompt,
            return_intermediate_steps=True,
          )
        
    return chain.invoke(documents)

def  get_query_string (assets: str=""):
    """set query string 
      Args:         
        str assets:  comma separated string of all requested assets     
      Returns:
         str source_query_str : string query for loading data from biquery
         
      """
     #source_query_str=f"select distinct combined_id,unique_id,content, chunk, trim(concat(ifnull(headline,''), CHR(10),  description)) as description from `nine-quality-test.vlt_media_embeddings_integration.vlt_all_media_content_text_embeddings` where asset_id in {assets} order by unique_id, chunk asc "
    source_query_str= f"""SELECT          asset_id,                  
                    STRING_AGG(description, '\\n' ) 
                    OVER (PARTITION BY asset_id ORDER BY ifnull(startOffset_seconds,0) ASC , chunk ASC) AS full_description,
                    IDX
              FROM (
                    SELECT  asset_id,startOffset_seconds, CHUNK, 
                    
                    CASE WHEN chunk=0 
                         THEN TRIM(CONCAT(IFNULL(headline,''), CHR(10),  description))  
                         ELSE description 
                    END AS description,
                    ROW_NUMBER() OVER (PARTITION BY asset_id ORDER BY startOffset_seconds desc, chunk desc) AS IDX,
                    FROM `vlt_media_embeddings_integration.vlt_all_media_content_text_embeddings` where asset_id in ({assets})
             )
           WHERE IDX=1
        """
    return source_query_str


In [None]:
def  get_query_string (assets: str=""):
    """set query string 
      Args:         
        str assets:  comma separated string of all requested assets     
      Returns:
         str source_query_str : string query for loading data from biquery
         
      """
     #source_query_str=f"select distinct combined_id,unique_id,content, chunk, trim(concat(ifnull(headline,''), CHR(10),  description)) as description from `nine-quality-test.vlt_media_embeddings_integration.vlt_all_media_content_text_embeddings` where asset_id in {assets} order by unique_id, chunk asc "
    source_query_str= f"""SELECT          asset_id,                  
                    STRING_AGG(description, '\\n' ) 
                    OVER (PARTITION BY asset_id ORDER BY ifnull(startOffset_seconds,0) ASC , chunk ASC) AS full_description,
                    IDX
              FROM (
                    SELECT  asset_id,startOffset_seconds, CHUNK, 
                    CASE WHEN chunk=0 
                         THEN TRIM(CONCAT(IFNULL(headline,''), CHR(10),  description))  
                         ELSE description 
                    END AS description,
                    ROW_NUMBER() OVER (PARTITION BY asset_id ORDER BY startOffset_seconds desc) AS IDX,
                    FROM `vlt_media_embeddings_integration.vlt_all_media_content_text_embeddings` where asset_id in ({assets})
             )
           WHERE IDX=1
        """
    return source_query_str
    
def get_prompt(action_type: str="",platform: str="",persona_text: str="", input_text:str="", Language:str=""):
    
    """set prompt according to the requested action
      Args:         
        str action_type:  the type of action needs to be done
        str platform: platform name for off platform posts 
        str persona_text: for persona based summaries 
      Returns:
         str question_prompt_template : the main prompt for the given action 
         str refine_prompt_template:  the second level prompt for refinement, in the case that the context is too long, we have to use refinement method.
         
      """
 
    question_prompt_template=""
    refine_prompt_template=""
    
    if action_type=="Summary" or action_type=="Summary_Persona":
            #this is the main prompt for summary
            question_prompt_template = """
                You will be given different parts of texts. Provide a summary of the following text"""+persona_text+""". Your result must be detailed and at least 2 paragraphs. 
                When summarizing, directly dive into the narrative or descriptions from the text without using introductory phrases like 'In this passage'. 
                Directly address the main events, characters, and themes, encapsulating the essence and significant details from the text in a flowing narrative. 
                The goal is to present a unified view of the content, continuing the story seamlessly as if the passage naturally progresses into the summary.

                TEXT: {text}
                SUMMARY:
            """

            refine_prompt_template = (
                "Your job is to produce a final summary. Your task is to combine and refine these summaries into a final, comprehensive summary that covers all key events, characters, themes, and details.\n"
                "We have provided an existing summary up to a certain point: {existing_answer}\n"
                "We have the opportunity to refine the existing summary"
                "(only if needed) with some more context below.\n"
                "------------\n"
                "{text}\n"
                "------------\n"
                "Given the new context, refine the original summary"
                "If the context isn't useful, return the original summary."
            )
    elif action_type=="HeadLine":  

        #this is the main prompt for headline
            question_prompt_template = """
                You will be given different parts of texts. Provide a one line headline of the following text. 

                TEXT: {text}
                HEADLINE:
            """

            refine_prompt_template = (
                "Your job is to produce a final headline. Your task is to combine and refine these headlines into a final, comprehensive headline that covers all details.\n"
                "We have provided an existing headline up to a certain point: {existing_answer}\n"
                "We have the opportunity to refine the existing headline"
                "(only if needed) with some more context below.\n"
                "------------\n"
                "{text}\n"
                "------------\n"
                "Given the new context, refine the original headline"
                "If the context isn't useful, return the original headline."
            )
    elif action_type=="OffPlatformPost"  and platform=='Twitter':
               #this is the main prompt for social media post
            question_prompt_template = """
                You will be given different parts of texts. Provide a tweet that that’s catchy, concise, and fits within 280 characters. Make sure to highlight the key message, and encourage engagement with a question or call to action.

                TEXT: {text}
                Tweet: 
            """

            refine_prompt_template = (
                "Your job is to produce a final tweet. Your task is to combine and refine these tweets into a final, comprehensive tweet that covers all details, is catchy, concise, fits within 280 characters, and encourage engagement with a question or call to action.\n"
                "We have provided an existing tweet up to a certain point: {existing_answer}\n"
                "We have the opportunity to refine the existing tweet"
                "(only if needed) with some more context below.\n"
                "------------\n"
                "{text}\n"
                "------------\n"
                "Given the new context, refine the original tweet"
                "If the context isn't useful, return the original tweet."
            )
    elif action_type=="OffPlatformPost" and platform=='Instagram':         
            #this is the main prompt for social media post
            question_prompt_template = """
                You will be given different parts of texts. Provide  into an engaging Instagram post. Craft a short, attention-grabbing caption that highlights the main point. Use emojis to make it lively, and end with a question or call to action to spark conversation in the comments.

                TEXT: {text}
                Instagram Post: 
            """

            refine_prompt_template = (
                "Your job is to produce a final tweet. Your task is to combine and refine these Instagram posts into a final, comprehensive post that covers all details, crafts a short, attention-grabbing caption that highlights the main point. Use emojis to make it lively, and end with a question or call to action to spark conversation in the comments.\n"
                "We have provided an existing post up to a certain point: {existing_answer}\n"
                "We have the opportunity to refine the existing post"
                "(only if needed) with some more context below.\n"
                "------------\n"
                "{text}\n"
                "------------\n"
                "Given the new context, refine the original post"
                "If the context isn't useful, return the original post."
            )
    elif action_type=="Translation":
            #this is the main prompt for social media post
            question_prompt_template = f"""Translate the following text into {Language}.  Make sure to preserve the meaning, tone, and style of the original text, while ensuring it is natural and fluent in {Language}.
            Text:\n 
              {input_text}\n
            Only provide a single response.
            """
      
            
    return question_prompt_template,refine_prompt_template

def get_summary(assets:str="",action_type:str="",platform:str="",persona_text:str="",project_id:str="",context_window_limit: int=2000000):
    
    """get summary according to the action type requested
      Args:         
        str assets:  comma separate string including all assets
        str action_type: requested action
        str persona_text: for persona based summaries 
        str platform: for off platform based posts
        str project_id: project id
        int context_window_limit: context window limit for the llm model
      Returns:
         str :output summary 
      """
    
    #set query string
    source_query_str= get_query_string(assets)
    
    #set prompts
    question_prompt_template, refine_prompt_template=get_prompt(action_type=action_type,platform=platform,persona_text=persona_text)
    
    #set metadata and content columns
    metadata_columns=["asset_id"]
    page_content_columns=["full_description"]
    
    #load data from biqquery
    documents,all_texts=get_data(source_query_str=source_query_str,metadata_columns=metadata_columns,page_content_columns=page_content_columns, project_id=project_id,return_text=True)

    # Estimate the token length
    estimated_token_length = estimate_token_length(all_texts,'cl100k_base') #cl100k_base
    
    message=""
    is_token_limit_exceeded=False
    if estimated_token_length > context_window_limit:
      message="Your text is too long for the Gemini 1.5 Pro context window. We are trying to chunk and return the result."
      is_token_limit_exceeded=True
      summary=summarize_docs(documents=documents,question_prompt_template=question_prompt_template,refine_prompt_template=refine_prompt_template,is_token_limit_exceeded=is_token_limit_exceeded )

    else:
      message="Your text fits within the Gemini 1.5 Pro context window."
      summary=summarize_docs(documents=documents,question_prompt_template=question_prompt_template,is_token_limit_exceeded=is_token_limit_exceeded )
        
    return summary["output_text"]

def get_translation(action_type: str="",input_text: str="",Language:str=""):
    
    """ get translation according to the requested language
      Args:         
        str input_text:  text to be translated
        str Language: destination language
      
      Returns:
         str : translated document 
      """
 
    
    #set prompts
    question_prompt_template, _=get_prompt(action_type=action_type,input_text=input_text,Language=Language)
     
    generation_config= GenerationConfig(temperature=1, max_output_tokens=8192) 
    safety_settings=  {
        HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
        }

    model_input=[question_prompt_template]
        
    response = generative_multimodal_model.generate_content(
        model_input,
        generation_config=generation_config,
        safety_settings=safety_settings, 
        
    )
    
    result=""
    try:
        result=response.text
    except:
        result="No translation can be provided."
        
 
    return result
 
        
def func_generate_content(request):
    
    # Set the Gemini 1.5 Pro context window limit
    context_window_limit = 2000000
    project_id = "nine-quality-test"  # @param {type:"string"}
    REGION = "us-central1"  # @param {type:"string"}
    assets="p5d2tw,p5e9zq,p5e49l" #comma separated asset_ids  
    
    assets= ','.join([ "'"+ id.strip()+"'" for id in assets.split(',')])
    action_type="Translation" # could be Summary, Summary_Persona, HeadLine, OffPlatformPost, Translation
    persona="10-year-old"
    text="""
    Disney+ has lots of cool shows!  There are funny shows like *Abbott Elementary*, which is about teachers at a school in Philadelphia.  There's also *The Americans*, a show about Russian spies pretending to be a regular family.  *Andor* is like *Star Wars* but for grown-ups.  *Arrested Development* is a hilarious show about a crazy family. *Atlanta* is about two cousins, one a rapper and the other his manager, and what it's like to be Black in America. *The Bear* is about a fancy chef who takes over his family's sandwich shop.  *Bob's Burgers* is a cartoon about a family who runs a burger restaurant. And for something really fun, there's *Buffy the Vampire Slayer*, about a girl who fights vampires! *Desperate Housewives* is about a group of women and all the secrets they keep. *Homeland* is about a CIA agent with mental health challenges.

There are also shows with lots of episodes like *How I Met Your Mother*, which is about a group of friends. *Loki* from the Marvel movies has his own show. *Lost* is about people trapped on a strange island.  *Mrs. America* tells the true story of the fight for women's rights. *The Muppet Show* is a classic with puppets! *NYPD Blue* is about police officers in New York City.  *One Mississippi* is a sad but funny show about a woman who goes home to take care of her sick mom.  *Only Murders in the Building* is about three friends who investigate a murder. *The People v. O.J. Simpson* tells the story of a famous trial. *Pose* shows the lives of drag queens in the 1980s.  There are so many shows to watch on Disney+!


Curtis Sittenfeld's new book, *Romantic Comedy*, is about Sally, a writer for a comedy show like *Saturday Night Live*.  Sally makes fun of how average-looking guys often date beautiful women. She writes a joke skit about it called "The Danny Horst Rule". Then Sally meets a handsome pop star named Noah, and she starts to like him.  But Sally doesn't think Noah could ever like her back.  The book is about whether they'll end up together. Sally is a feminist who wants to write romantic comedies that are smart and funny. She and Noah bond over their work, but Sally hides her true feelings.  She's afraid to be vulnerable because a coworker once hurt her feelings. The book also shows what it's like to work at a comedy show.
    """
    Language="Persian"
    #text='How are you?'
    
    persona_text=""
    if action_type=="Summary_Persona" and persona=="":
        return "Error- Please set the persona"    
    else:
         persona_text=f" so that a {persona} can understand it. Use simple words and short sentences"
    
    platform="Twitter" # could be Twitter, Instagram or "" if OffPlatformPost is not selected
    if action_type=="OffPlatformPost" and platform=="":
         return "Error- Please set the platform"
        
    if action_type=="Translation" and (text=="" or Language==""):
        return "Error- Please set the input text to translate and destination language"
        
  
    #get summary
    if action_type!="Translation":
        if action_type=="OffPlatformPost":
            result="Instagram Post:\n"+get_summary(assets =assets,action_type=action_type,platform='Instagram',
                           persona_text=persona_text,project_id=project_id,context_window_limit=context_window_limit)

            
            result=result+"\nTwitter Post:\n"+get_summary(assets =assets,action_type=action_type,platform='Twitter',
                           persona_text=persona_text,project_id=project_id,context_window_limit=context_window_limit)
           
        else:
            result=get_summary(assets =assets,action_type=action_type,platform="",
                           persona_text=persona_text,project_id=project_id,context_window_limit=context_window_limit)
    else:
        result=get_translation(action_type=action_type,input_text=text,Language=Language )
        
      
    return result

In [None]:
summary=func_generate_content('')
print(summary)

In [None]:
import base64
import vertexai
from vertexai.generative_models import GenerativeModel, Part, SafetySetting
 
 
def generate():
    vertexai.init(project="nine-quality-test", location="us-central1")
    model = GenerativeModel(
        "gemini-1.5-pro-002",
    )
    responses = model.generate_content(
        [text1],
        generation_config=generation_config,
        safety_settings=safety_settings,
        stream=True,
    )
    result=""
    for response in responses:
        result=result+response.text
    return result
 
text1 = """Translate the following text into Persian. Make sure to preserve the meaning, tone, and style of the original text, while ensuring it is natural and fluent in Persian.
Text:how are you
 
Only provide a single response"""
 
generation_config = {
    "max_output_tokens": 8192,
    "temperature": 1,
    "top_p": 0.95,
}
 
safety_settings = [
    SafetySetting(
        category=SafetySetting.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
        threshold=SafetySetting.HarmBlockThreshold.OFF
    ),
    SafetySetting(
        category=SafetySetting.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
        threshold=SafetySetting.HarmBlockThreshold.OFF
    ),
    SafetySetting(
        category=SafetySetting.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
        threshold=SafetySetting.HarmBlockThreshold.OFF
    ),
    SafetySetting(
        category=SafetySetting.HarmCategory.HARM_CATEGORY_HARASSMENT,
        threshold=SafetySetting.HarmBlockThreshold.OFF
    ),
]
 
r=generate()
r

In [None]:
result=get_translation(action_type='Translation',input_text="how are you",Language='Persian' )

In [114]:
import functions_framework
import tiktoken
from langchain_google_community import BigQueryLoader
from langchain_core.prompts import PromptTemplate
from langchain.chains.summarize import load_summarize_chain
import pandas as pd
from vertexai.preview.generative_models import (
    Content,
    GenerationConfig,
    GenerationResponse,
    GenerativeModel,
    Image,
    Part,
    HarmBlockThreshold,
    HarmCategory,
)
from langchain_google_vertexai import VertexAI as langchain_vertexai
from vertexai.generative_models._generative_models import SafetySettingsType
import vertexai
import json
from datetime import datetime
import time
from google.cloud import bigquery


def estimate_token_length(text, model="gpt2"):
    """Estimates the token length of a given text using a specified model.

      Args:
        text: The input text.
        model: The model to use for tokenization (default: "gpt2").

      Returns:
        The estimated number of tokens.
      """

  
    enc = tiktoken.get_encoding(model)  

    # Tokenize the text and count tokens
    tokens = enc.encode(text)
    token_count = len(tokens)
    return token_count

def get_data(source_query_str: str=None,metadata_columns: str=None,page_content_columns: str=None, project_id: str=None , return_text: bool=True):
    
    """Load data from big query

      Args:
        str source_query_str:  The query string to fetch the data from bigquery
        list[str] metadata_columns:  list of metadata column names
        list[str] page_content_columns:  list of content column names  
        str project_id: project id
        bool return_text: returns the content columns description
      Returns:
          list[langchain_core.documents.base.Document] documents: langchain documents
          
      """
    
    loader = BigQueryLoader(
            query=source_query_str, project=project_id, metadata_columns=metadata_columns, page_content_columns=page_content_columns
        )
    documents = []
    all_texts=[]
    documents.extend(loader.load())
    if return_text:  
         all_texts=[doc.page_content.replace('full_description:',"",1) for doc in documents]
 
        
    return documents, '\n'.join(all_texts)
    
 
def summarize_docs(documents: list[object],question_prompt_template: str="", refine_prompt_template: str="" ,is_token_limit_exceeded: bool=False ,model: object=None):
    
    """summarizes the input documents

      Args:
        list[object] documents:  list of langchain documents
        str question_prompt_template:  string question prompt template. 
        str refine_prompt_template:  string refine prompt template in the case that we need to use refine method
        bool is_token_limit_exceeded:  boolean indicating wheather or not the token limit is exceeded.
      Returns:
         dict : summary result
         
      """
       
    
    question_prompt = PromptTemplate(template=question_prompt_template, input_variables=["text"]) 

    if not is_token_limit_exceeded:        
        #if the token limit is in the context window range, use a stuffing method for summary
        chain = load_summarize_chain(model, chain_type="stuff", 
                                     prompt=question_prompt)
        
    else:     
        #otherwise use a refine summarization method
        refine_prompt = PromptTemplate(input_variables=["existing_answer", "text"], template=refine_prompt_template)
              
        chain = load_summarize_chain(
            model,
            chain_type="refine",
            question_prompt=question_prompt,
            refine_prompt=refine_prompt,
            return_intermediate_steps=True,
          )
        
    return chain.invoke(documents)

def  get_query_string (assets: str=""):
    """set query string 
      Args:         
        str assets:  comma separated string of all requested assets     
      Returns:
         str source_query_str : string query for loading data from biquery
         
      """
    # source_query_str= f"""SELECT          asset_id,                  
    #                 STRING_AGG(description, '\\n' ) 
    #                 OVER (PARTITION BY asset_id ORDER BY ifnull(startOffset_seconds,0) ASC , chunk ASC) AS full_description,
    #                 IDX
    #           FROM (
    #                 SELECT  asset_id,startOffset_seconds, CHUNK, 
    #                 CASE WHEN chunk=0 
    #                      THEN TRIM(CONCAT(IFNULL(headline,''), CHR(10),  description))  
    #                      ELSE description 
    #                 END AS description,
    #                 ROW_NUMBER() OVER (PARTITION BY asset_id ORDER BY startOffset_seconds desc, chunk desc) AS IDX,
    #                 FROM `vlt_media_embeddings_integration.vlt_all_media_content_text_embeddings` where asset_id in ({assets})
    #          )
    #        WHERE IDX=1
    #     """
    source_query_str= f"""
     SELECT * FROM (
        SELECT          asset_id,                  
                    STRING_AGG(description, '\\n' ) 
                     OVER (PARTITION BY asset_id ORDER BY ifnull(startOffset_seconds,0) ASC , IFNULL(endOffset_seconds,0) ASC ) AS full_description,
                    ROW_NUMBER() OVER (PARTITION BY asset_id ORDER BY startOffset_seconds desc,endOffset_seconds desc) AS IDX,
              FROM (
                    SELECT * FROM 
                    (
                        SELECT  distinct asset_id,startOffset_seconds,endOffset_seconds,
                        TRIM(CONCAT(IFNULL(headline,''), CHR(10),  description))  description,
                        ROW_NUMBER() OVER (PARTITION BY asset_id,startOffset_seconds ORDER BY startOffset_seconds desc, chunk desc) AS IDX,
                        FROM `vlt_media_embeddings_integration.vlt_all_media_content_text_embeddings` where asset_id in ({assets})
                      )
                      WHERE IDX=1
             )
          )
           WHERE IDX=1
        """
    #print(source_query_str)
    return source_query_str

def get_prompt(action_type: str="",platform: str="",persona_text: str="", input_text:str="", Language:str=""):
    
    """set prompt according to the requested action
      Args:         
        str action_type:  the type of action needs to be done
        str platform: platform name for off platform posts 
        str persona_text: for persona based summaries 
      Returns:
         str question_prompt_template : the main prompt for the given action 
         str refine_prompt_template:  the second level prompt for refinement, in the case that the context is too long, we have to use refinement method.
         
      """
 
    question_prompt_template=""
    refine_prompt_template=""
    
    if action_type=="Summary" or action_type=="Summary_Persona":
            #this is the main prompt for summary
            question_prompt_template = """
                You will be given different parts of texts. Provide a summary of the following text"""+persona_text+""". Your result must be detailed and at least 2 paragraphs. 
                When summarizing, directly dive into the narrative or descriptions from the text without using introductory phrases like 'In this passage'. 
                Directly address the main events, characters, and themes, encapsulating the essence and significant details from the text in a flowing narrative. 
                The goal is to present a unified view of the content, continuing the story seamlessly as if the passage naturally progresses into the summary.
                If different parts of texts look unrelevant, give a symmary of each text in 1 paragraph separately.

                TEXT: {text}
                SUMMARY:
            """

            refine_prompt_template = (
                "Your job is to produce a final summary. Your task is to combine and refine these summaries into a final, comprehensive summary that covers all key events, characters, themes, and details.\n"
                "We have provided an existing summary up to a certain point: {existing_answer}\n"
                "We have the opportunity to refine the existing summary"
                "(only if needed) with some more context below.\n"
                "------------\n"
                "{text}\n"
                "------------\n"
                "Given the new context, refine the original summary"
                "If the context isn't useful, return the original summary and add the summary of the new context in a separate paragraph."
            )
    elif action_type=="HeadLine":  

        #this is the main prompt for headline
            question_prompt_template = """
                You will be given different parts of texts. Provide a one line headline of the following text. 

                TEXT: {text}
                HEADLINE:
            """

            refine_prompt_template = (
                "Your job is to produce a final headline. Your task is to combine and refine these headlines into a final, comprehensive headline that covers all details.\n"
                "We have provided an existing headline up to a certain point: {existing_answer}\n"
                "We have the opportunity to refine the existing headline"
                "(only if needed) with some more context below.\n"
                "------------\n"
                "{text}\n"
                "------------\n"
                "Given the new context, refine the original headline"
                "If the context isn't useful, return the original headline and add the headline of the new context in a separate line."
            )
    elif action_type=="OffPlatformPost"  and platform=='Twitter':
               #this is the main prompt for social media post
            question_prompt_template = """
                You will be given different parts of texts. Provide a tweet that that’s catchy, concise, and fits within 280 characters. Make sure to highlight the key message, and encourage engagement with a question or call to action.

                TEXT: {text}
                Tweet: 
            """

            refine_prompt_template = (
                "Your job is to produce a final tweet. Your task is to combine and refine these tweets into a final, comprehensive tweet that covers all details, is catchy, concise, fits within 280 characters, and encourage engagement with a question or call to action.\n"
                "We have provided an existing tweet up to a certain point: {existing_answer}\n"
                "We have the opportunity to refine the existing tweet"
                "(only if needed) with some more context below.\n"
                "------------\n"
                "{text}\n"
                "------------\n"
                "Given the new context, refine the original tweet"
                "If the context isn't useful, return the original tweet."
            )
    elif action_type=="OffPlatformPost" and platform=='Instagram':         
            #this is the main prompt for social media post
            question_prompt_template = """
                You will be given different parts of texts. Provide  into an engaging Instagram post. Craft a short, attention-grabbing caption that highlights the main point. Use emojis to make it lively, and end with a question or call to action to spark conversation in the comments.

                TEXT: {text}
                Instagram Post: 
            """

            refine_prompt_template = (
                "Your job is to produce a final tweet. Your task is to combine and refine these Instagram posts into a final, comprehensive post that covers all details, crafts a short, attention-grabbing caption that highlights the main point. Use emojis to make it lively, and end with a question or call to action to spark conversation in the comments.\n"
                "We have provided an existing post up to a certain point: {existing_answer}\n"
                "We have the opportunity to refine the existing post"
                "(only if needed) with some more context below.\n"
                "------------\n"
                "{text}\n"
                "------------\n"
                "Given the new context, refine the original post"
                "If the context isn't useful, return the original post."
            )
    elif action_type=="Translation":
            #this is the main prompt for social media post
            question_prompt_template = f"""Translate the following text into {Language}.  Make sure to preserve the meaning, tone, and style of the original text, while ensuring it is natural and fluent in {Language}.
            Text:\n 
              {input_text}\n
            Only provide a single response.
            """   
    elif action_type=="TrailerScript":
        
            #this is the main prompt for summary
            question_prompt_template = """
                You will be given a summary of episode. Provide a trailer script that will run for between 2 and 3 minutes.

                TEXT: {text}
                Trailer Script:
            """
 
            refine_prompt_template = ""
    
    elif action_type=="KeyClips":
        
            #this is the main prompt for summary
            question_prompt_template = """
                You will be given a summary of episode. Identify the key clips in this episode and provide a title and summary of each clip along with the time line.

                TEXT: {text}
                Key Clips:
            """
 
            refine_prompt_template = ""

    return question_prompt_template,refine_prompt_template

def get_summary(assets:str="",action_type:str="",platform:str="",persona_text:str="",project_id:str="",context_window_limit: int=2000000, model: object=None):
    
    """get summary according to the action type requested
      Args:         
        str assets:  comma separate string including all assets
        str action_type: requested action
        str persona_text: for persona based summaries 
        str platform: for off platform based posts
        str project_id: project id
        int context_window_limit: context window limit for the llm model
      Returns:
         str :output summary 
      """
    
    #set query string
    source_query_str= get_query_string(assets)
    
    #set prompts
    question_prompt_template, refine_prompt_template=get_prompt(action_type=action_type,platform=platform,persona_text=persona_text)
    
    #set metadata and content columns
    metadata_columns=["asset_id"]
    page_content_columns=["full_description"]
    
    #load data from biqquery
    documents,all_texts=get_data(source_query_str=source_query_str,metadata_columns=metadata_columns,page_content_columns=page_content_columns, project_id=project_id,return_text=True)

    #print(all_texts)
    # Estimate the token length
    estimated_token_length = estimate_token_length(all_texts,'cl100k_base') #cl100k_base
    
    message=""
    is_token_limit_exceeded=False
    if estimated_token_length > context_window_limit:
      message="Your text is too long for the Gemini 1.5 Pro context window. We are trying to chunk and return the result."
      is_token_limit_exceeded=True
      summary=summarize_docs(documents=documents,question_prompt_template=question_prompt_template,refine_prompt_template=refine_prompt_template,is_token_limit_exceeded=is_token_limit_exceeded ,model=model)

    else:
      message="Your text fits within the Gemini 1.5 Pro context window."
      summary=summarize_docs(documents=documents,question_prompt_template=question_prompt_template,is_token_limit_exceeded=is_token_limit_exceeded,model=model )
 
  
    return summary["output_text"]

def get_translation(action_type: str="",input_text: str="",Language:str="", model: object=None):
    
    """ get translation according to the requested language
      Args:         
        str input_text:  text to be translated
        str Language: destination language
      
      Returns:
         str : translated document 
      """
 
    
    #set prompts
    question_prompt_template, _=get_prompt(action_type=action_type,input_text=input_text,Language=Language)
    generation_config= GenerationConfig(temperature=0.2, max_output_tokens=8192) 
    safety_settings=  {
        HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
        }

    model_input=[question_prompt_template]
        
    response = model.generate_content(
        model_input,
        generation_config=generation_config,
        safety_settings=safety_settings, )
    
    result=""
    try:
        result=response.text
    except Exception as e:
        result=str(e)

    return result
 
    


def log_data(result,error,request,elapsed_time,project_id):
    """
      Log the search result into bigquery
    Args:
       List[dict]  result: the result of search
       str error: the error message
       dict request: the request sent
       float elapsed_time: the time taken for the search result to be generated
       str project_id: project id
    """
    rows_to_insert=[] 
    rows_to_insert.append(
                                {  "search_date":  datetime.now().isoformat() ,
                                    "request":request,
                                    "response":   result  , 
                                    "error":  error,
                                    "elapsed_time":elapsed_time ,
                                    "API": "content_generation"
                                  
                                    }
                                            )   

    #create table new if does not exist
    # Load configuration from config.json
    with open('config.json') as config_file:
         config = json.load(config_file)

    table=config['log_table']
    dataset_id=config['log_dataset']
    #push the data into the table
    table_id = f"{project_id}.{dataset_id}.{table}"
    client = bigquery.Client(project_id)
    dataset  = client.dataset(dataset_id)
    table = dataset.table(table)

    #send a big query streaming insert job- dont need to wait for the job to finish
    job = client.load_table_from_json(rows_to_insert, table) 

def func_generate_content(request):
    
    """
    Cloud Function entry point. This function handles the incoming request, 
    performs content generation according to the given action
    """

    # Parse the incoming request to extract text or image file
    request_json = request.get_json(silent=True)   
    project_id = request_json.get('project')  
    location = request_json.get('region')  
    action_type = request_json.get('action_type') # could be Summary, Summary_Persona, HeadLine, OffPlatformPost, Translation
    

    if "asset_ids" in request_json:
        assets = request_json.get('asset_ids') #comma separated assets
    else:
        assets=""

        
    if "persona" in request_json:
       persona = request_json.get('persona')   #persona filter
    else:
        persona=""
    
    if action_type=="Summary_Persona" and persona=="":
        return "Error- Persona must be set"  

    if "input_text" in request_json: 
       input_text = request_json.get('input_text')   #input text for translation
    else:
        input_text=""

    if action_type=="Translation" and input_text=="":
        return "Error- Provide input text for translation"

    if "language" in request_json: 
       Language = request_json.get('language')   #input text for translation
    else:
        Language="Chineese"

    if action_type=="Translation" and Language=="":
        return "Error- Provide destination language for translation"
        
          # Load configuration from config.json
    with open('config.json') as config_file:
         config = json.load(config_file)

   # Set the Gemini 1.5 Pro context window limit
    context_window_limit=int(config['context_window_limit']) 
    model_name=config['model_name'] 

    #Init vertex ai
    vertexai.init(project=project_id, location=location )
   

    #set assets for search
    if assets.strip() !="":
        assets= ','.join([ "'"+ id.strip()+"'" for id in assets.split(',')])
    else:
        assets=""

   #Set persona text    
    persona_text=""
    if action_type=="Summary_Persona" and persona=="":
        return "Error- Please set the persona"    
    else:
         persona_text=f" so that a {persona} can understand it. Use simple words and short sentences"

    error="" 
    start_time = time.time()     
    #generate content according to the requested action
    try:
        if action_type!="Translation":
            #generate other content according to the requested action
            vertex_llm_text = langchain_vertexai(model_name=model_name)
           
            if action_type=="OffPlatformPost": #for OffPlatformPost, create both Twitter and Instagram posts
                result="Instagram Post:\n"+get_summary(assets =assets,action_type=action_type,platform='Instagram',
                            persona_text=persona_text,project_id=project_id,context_window_limit=context_window_limit,model=vertex_llm_text)

                
                result=result+"\nTwitter Post:\n"+get_summary(assets =assets,action_type=action_type,platform='Twitter',
                            persona_text=persona_text,project_id=project_id,context_window_limit=context_window_limit,model=vertex_llm_text)
            
            else:           
                result=get_summary(assets =assets,action_type=action_type,platform="",
                            persona_text=persona_text,project_id=project_id,context_window_limit=context_window_limit,model=vertex_llm_text)
        else:
            #generate trannlation
            generative_multimodal_model= GenerativeModel(model_name)
            result=get_translation(action_type=action_type,input_text=input_text,Language=Language, model= generative_multimodal_model)
    
    except Exception as e:
        result="Error- Service is not available or content generation has faced an issue:\n"+str(e)
        error=result

    end_time = time.time()
    # Calculate the elapsed time
    elapsed_time = end_time - start_time
    #record the search log
    log_data(result,error,request_json,elapsed_time,project_id)

    return result

In [118]:
from unittest.mock import Mock
import json


#vlt_video_extract_SIXTY_MINUTES_60MI23_14_A_HBB.mp4
data={"asset_ids":"vlt_video_extract_NINE_NEWS_SYD-NINE_NNNT23_101_A.mp4", 
"project":"nine-quality-test",
"region":"us-central1",
"action_type":"TrailerScript"  
       }
 
# Simulating an HTTP request with the mock object
mock_request = Mock()
mock_request.get_json.return_value = data  # Mock the get_json method to return your data


In [None]:
if 1==1:
    #set query string
    assets="'p5d1x4','p5dx0h','p5duot','a4e57c915b48502be148d6fcb08944efa22d2107.jpeg','3466295218ab26efc1739b775d21453a5f1a819b.jpeg','03245688ac9a7dd651797170992af7d8421ae2c2.jpeg','vlt_video_extract_SIXTY_MINUTES_60MI23_10_A_HBB.mp4','vlt_video_extract_NINE_NEWS_SYD-NINE_NNNT23_101_A.mp4'"
    source_query_str= get_query_string(assets)
    
    #set prompts
    question_prompt_template, refine_prompt_template=get_prompt(action_type='HeadLine',platform='',persona_text='')
    
    #set metadata and content columns
    metadata_columns=["asset_id"]
    page_content_columns=["full_description"]
    
    #load data from biqquery
    documents,all_texts=get_data(source_query_str=source_query_str,metadata_columns=metadata_columns,page_content_columns=page_content_columns, project_id='nine-quality-test',return_text=True)
    print(all_texts)

In [None]:
estimated_token_length = estimate_token_length(all_texts,'cl100k_base') #cl100k_base
estimated_token_length

In [None]:
question_prompt_template

In [None]:
if 1==1:
    question_prompt_template, _=get_prompt(action_type="OffPlatformPost",platform="Instagram",input_text=all_texts,Language='')
    generation_config= GenerationConfig(temperature=0.2, max_output_tokens=8192) 
    safety_settings=  {
        HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
        }

    p=f""" You will be given different parts of texts. Provide a summary of the following text. Your result must be detailed and at least 2 paragraphs. 
                When summarizing, directly dive into the narrative or descriptions from the text without using introductory phrases like 'In this passage'. 
                Directly address the main events, characters, and themes, encapsulating the essence and significant details from the text in a flowing narrative. 
                The goal is to present a unified view of the content, continuing the story seamlessly as if the passage naturally progresses into the summary.
                If different parts of texts look unrelevant, give a symmary of each text in 1 paragraph separately.""" 
   
    model_input=[p+" Texts are separated by ------------------------------\n"+all_texts]
        
    model= GenerativeModel("gemini-1.5-pro-002")

    response = model.generate_content(
        model_input,
        generation_config=generation_config,
        safety_settings=safety_settings, )
    
    result=""
    try:
        result=response.text
    except Exception as e:
        result=str(e)

In [None]:
all_texts

In [None]:
print(result)

In [119]:
x=func_generate_content(mock_request)
print(x)

**(Scene: Opens with rapid cuts of flashing police lights, crime scene tape, a burnt-out car, a tense close-up of a detective's face, and a grieving family member.)**

**(Voiceover, urgent and dramatic):** In a city gripped by fear, a wave of violence explodes onto the streets...

**(Scene: Aerial shot of Liverpool, then cuts to Bianca Balzer reporting from the scene of the shooting.)**

**(Balzer):** A man, shot dead in broad daylight.  Multiple gunshot wounds...including to the head.

**(Voiceover):** A shocking execution…two burnt-out cars…are they connected?

**(Scene: Quick cuts of police swarming a crime scene, investigators examining evidence.)**

**(Voiceover):**  Detectives race against time to untangle a web of deadly secrets...

**(Scene: Karen Webb, Police Commissioner, in a press conference, looking stressed.)**

**(Reporter's voice, sharp and accusatory):** How can you justify not watching the bodycam footage?

**(Scene: Flashing image of a taser, then a hospital bed with