In [107]:
import tiktoken
from langchain_google_community import BigQueryLoader
from langchain import PromptTemplate
from langchain.chains.summarize import load_summarize_chain
#from langchain.llms import VertexAI as langchain_vertexai
from langchain_google_vertexai import VertexAI as langchain_vertexai
from langchain import PromptTemplate
from pathlib import Path as p
import pandas as pd
from vertexai.preview.generative_models import (
    Content,
    GenerationConfig,
    GenerationResponse,
    GenerativeModel,
    Image,
    Part,
    HarmBlockThreshold,
    HarmCategory,
)


vertex_llm_text = langchain_vertexai(model_name="gemini-1.5-pro-002")
generative_multimodal_model= GenerativeModel("gemini-1.5-pro-002")


def estimate_token_length(text, model="gpt2"):
    """Estimates the token length of a given text using a specified model.

      Args:
        text: The input text.
        model: The model to use for tokenization (default: "gpt2").

      Returns:
        The estimated number of tokens.
      """

  
    enc = tiktoken.get_encoding(model)  

    # Tokenize the text and count tokens
    tokens = enc.encode(text)
    token_count = len(tokens)
    return token_count

def get_data(source_query_str: str=None,metadata_columns: str=None,page_content_columns: str=None, project_id: str=None , return_text: bool=True):
    
    """Load data from big query

      Args:
        str source_query_str:  The query string to fetch the data from bigquery
        list[str] metadata_columns:  list of metadata column names
        list[str] page_content_columns:  list of content column names  
        str project_id: project id
        bool return_text: returns the content columns description
      Returns:
          list[langchain_core.documents.base.Document] documents: langchain documents
          
      """
    
    loader = BigQueryLoader(
            query=source_query_str, project=project_id, metadata_columns=metadata_columns, page_content_columns=page_content_columns
        )
    documents = []
    all_texts=[]
    documents.extend(loader.load())
    if return_text:  
         all_texts=[doc.page_content.replace('description:',"",1) for doc in documents]
        
    return documents, '\n'.join(all_texts)
    
 
def summarize_docs(documents: list[object],question_prompt_template: str="", refine_prompt_template: str="" ,is_token_limit_exceeded: bool=False ):
    
    """summarizes the input documents

      Args:
        list[object] documents:  list of langchain documents
        str question_prompt_template:  string question prompt template. 
        str refine_prompt_template:  string refine prompt template in the case that we need to use refine method
        bool is_token_limit_exceeded:  boolean indicating wheather or not the token limit is exceeded.
      Returns:
         dict : summary result
         
      """
       
    question_prompt = PromptTemplate(template=question_prompt_template, input_variables=["text"]) 
    
    if not is_token_limit_exceeded:        
        #if the token limit is in the context window range, use a stuffing method for summary
        chain = load_summarize_chain(vertex_llm_text, chain_type="stuff", 
                                     prompt=question_prompt)
        
    else:     
        #otherwise use a refine summarization method
        refine_prompt = PromptTemplate(input_variables=["existing_answer", "text"], template=refine_prompt_template)
              
        chain = load_summarize_chain(
            vertex_llm_text,
            chain_type="refine",
            question_prompt=question_prompt,
            refine_prompt=refine_prompt,
            return_intermediate_steps=True,
          )
        
    return chain.invoke(documents)


In [109]:
def  get_query_string (assets: str=""):
    """set query string 
      Args:         
        str assets:  comma separated string of all requested assets     
      Returns:
         str source_query_str : string query for loading data from biquery
         
      """
     #source_query_str=f"select distinct combined_id,unique_id,content, chunk, trim(concat(ifnull(headline,''), CHR(10),  description)) as description from `nine-quality-test.vlt_media_embeddings_integration.vlt_all_media_content_text_embeddings` where asset_id in {assets} order by unique_id, chunk asc "
    source_query_str= f"""SELECT          asset_id,                  
                    STRING_AGG(description, '\\n' ) 
                    OVER (PARTITION BY asset_id ORDER BY ifnull(startOffset_seconds,0) ASC , chunk ASC) AS full_description,
                    IDX
              FROM (
                    SELECT  asset_id,startOffset_seconds, CHUNK, 
                    CASE WHEN chunk=0 
                         THEN TRIM(CONCAT(IFNULL(headline,''), CHR(10),  description))  
                         ELSE description 
                    END AS description,
                    ROW_NUMBER() OVER (PARTITION BY asset_id ORDER BY startOffset_seconds desc) AS IDX,
                    FROM `vlt_media_embeddings_integration.vlt_all_media_content_text_embeddings` where asset_id in ({assets})
             )
           WHERE IDX=1
        """
    return source_query_str
    
def get_prompt(action_type: str="",platform: str="",persona_text: str="", input_text:str="", Language:str=""):
    
    """set prompt according to the requested action
      Args:         
        str action_type:  the type of action needs to be done
        str platform: platform name for off platform posts 
        str persona_text: for persona based summaries 
      Returns:
         str question_prompt_template : the main prompt for the given action 
         str refine_prompt_template:  the second level prompt for refinement, in the case that the context is too long, we have to use refinement method.
         
      """
   
    question_prompt_template=""
    refine_prompt_template=""
    
    if action_type=="Summary" or action_type=="Summary_Persona":
            #this is the main prompt for summary
            question_prompt_template = """
                You will be given different parts of texts. Provide a summary of the following text"""+persona_text+""". Your result must be detailed and at least 2 paragraphs. 
                When summarizing, directly dive into the narrative or descriptions from the text without using introductory phrases like 'In this passage'. 
                Directly address the main events, characters, and themes, encapsulating the essence and significant details from the text in a flowing narrative. 
                The goal is to present a unified view of the content, continuing the story seamlessly as if the passage naturally progresses into the summary.

                TEXT: {text}
                SUMMARY:
            """

            refine_prompt_template = (
                "Your job is to produce a final summary. Your task is to combine and refine these summaries into a final, comprehensive summary that covers all key events, characters, themes, and details.\n"
                "We have provided an existing summary up to a certain point: {existing_answer}\n"
                "We have the opportunity to refine the existing summary"
                "(only if needed) with some more context below.\n"
                "------------\n"
                "{text}\n"
                "------------\n"
                "Given the new context, refine the original summary"
                "If the context isn't useful, return the original summary."
            )
    elif action_type=="HeadLine":  

        #this is the main prompt for headline
            question_prompt_template = """
                You will be given different parts of texts. Provide a one line headline of the following text. 

                TEXT: {text}
                HEADLINE:
            """

            refine_prompt_template = (
                "Your job is to produce a final headline. Your task is to combine and refine these headlines into a final, comprehensive headline that covers all details.\n"
                "We have provided an existing headline up to a certain point: {existing_answer}\n"
                "We have the opportunity to refine the existing headline"
                "(only if needed) with some more context below.\n"
                "------------\n"
                "{text}\n"
                "------------\n"
                "Given the new context, refine the original headline"
                "If the context isn't useful, return the original headline."
            )
    elif action_type=="OffPlatformPost"  and Platform=='Twitter':

            #this is the main prompt for social media post
            question_prompt_template = """
                You will be given different parts of texts. Provide a tweet that that’s catchy, concise, and fits within 280 characters. Make sure to highlight the key message, and encourage engagement with a question or call to action.

                TEXT: {text}
                Tweet: 
            """

            refine_prompt_template = (
                "Your job is to produce a final tweet. Your task is to combine and refine these tweets into a final, comprehensive tweet that covers all details, is catchy, concise, fits within 280 characters, and encourage engagement with a question or call to action.\n"
                "We have provided an existing tweet up to a certain point: {existing_answer}\n"
                "We have the opportunity to refine the existing tweet"
                "(only if needed) with some more context below.\n"
                "------------\n"
                "{text}\n"
                "------------\n"
                "Given the new context, refine the original tweet"
                "If the context isn't useful, return the original tweet."
            )
    elif action_type=="OffPlatformPost" and Platform=='Instagram':

            #this is the main prompt for social media post
            question_prompt_template = """
                You will be given different parts of texts. Provide  into an engaging Instagram post. Craft a short, attention-grabbing caption that highlights the main point. Use emojis to make it lively, and end with a question or call to action to spark conversation in the comments.

                TEXT: {text}
                Instagram Post: 
            """

            refine_prompt_template = (
                "Your job is to produce a final tweet. Your task is to combine and refine these Instagram posts into a final, comprehensive post that covers all details, crafts a short, attention-grabbing caption that highlights the main point. Use emojis to make it lively, and end with a question or call to action to spark conversation in the comments.\n"
                "We have provided an existing post up to a certain point: {existing_answer}\n"
                "We have the opportunity to refine the existing post"
                "(only if needed) with some more context below.\n"
                "------------\n"
                "{text}\n"
                "------------\n"
                "Given the new context, refine the original post"
                "If the context isn't useful, return the original post."
            )
    elif action_type=="Translation":
         #this is the main prompt for social media post
            question_prompt_template = f"""
                Translate the following text into {Language}.  Make sure to preserve the meaning, tone, and style of the original text, while ensuring it is natural and fluent in {Language}.

            """
      
            
    return question_prompt_template,refine_prompt_template

def get_summary(assets:str="",action_type:str="",platform:str="",persona_text:str="",project_id:str="",context_window_limit: int=2000000):
    
    """get summary according to the action type requested
      Args:         
        str assets:  comma separate string including all assets
        str action_type: requested action
        str persona_text: for persona based summaries 
        str platform: for off platform based posts
        str project_id: project id
        int context_window_limit: context window limit for the llm model
      Returns:
         str :output summary 
      """
    
    #set query string
    source_query_str= get_query_string(assets)
    
    #set prompts
    question_prompt_template, refine_prompt_template=get_prompt(action_type=action_type,platform=platform,persona_text=persona_text)
   
    #set metadata and content columns
    metadata_columns=["asset_id"]
    page_content_columns=["full_description"]
    
    #load data from biqquery
    documents,all_texts=get_data(source_query_str=source_query_str,metadata_columns=metadata_columns,page_content_columns=page_content_columns, project_id=project_id,return_text=True)

    # Estimate the token length
    estimated_token_length = estimate_token_length(all_texts,'cl100k_base') #cl100k_base
    
    message=""
    is_token_limit_exceeded=False
    if estimated_token_length > context_window_limit:
      message="Your text is too long for the Gemini 1.5 Pro context window. We are trying to chunk and return the result."
      is_token_limit_exceeded=True
      summary=summarize_docs(documents=documents,question_prompt_template=question_prompt_template,refine_prompt_template=refine_prompt_template,is_token_limit_exceeded=is_token_limit_exceeded )

    else:
      message="Your text fits within the Gemini 1.5 Pro context window."
      summary=summarize_docs(documents=documents,question_prompt_template=question_prompt_template,is_token_limit_exceeded=is_token_limit_exceeded )
        
    return summary["output_text"]

def get_translation(input_text: str="",Language:str=""):
    
    """ get translation according to the requested language
      Args:         
        str input_text:  text to be translated
        str Language: destination language
      
      Returns:
         str : translated document 
      """
 
    
    #set prompts
    question_prompt_template, _=get_prompt(action_type=action_type,input_text=input_text,Language=Language)
    generation_config= GenerationConfig(temperature=0.2, max_output_tokens=8192) 
    safety_settings=  {
        HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
        }

    model_input=[question_prompt_template, input_text]
        
    response = generative_multimodal_model.generate_content(
        model_input,
        generation_config=generation_config,
        safety_settings=safety_settings, )
     
    print(response)
 
    return response
 
        
def func_generate_content(request):
    
    # Set the Gemini 1.5 Pro context window limit
    context_window_limit = 2000000
    project_id = "nine-quality-test"  # @param {type:"string"}
    REGION = "us-central1"  # @param {type:"string"}
    assets="p5d2tw,p5e9zq,p5e49l" #comma separated asset_ids  
    
    assets= ','.join([ "'"+ id.strip()+"'" for id in assets.split(',')])
    action_type="Summary" # could be Summary, Summary_Persona, HeadLine, OffPlatformPost, Translation
    persona="10-year-old"
    text="text to translate to Chaineese"
    Language="Chineese"
    
    persona_text=""
    if action_type=="Summary_Persona" and persona=="":
        return "Error- Please set the persona"    
    else:
         persona_text=f" so that a {persona} can understand it. Use simple words and short sentences"
    
    platform="Twitter" # could be Twitter, Instagram or "" if OffPlatformPost is not selected
    if action_type=="OffPlatformPost" and platform=="":
         return "Error- Please set the platform"
        
    if action_type=="Translation" and (text=="" or Language==""):
        return "Error- Please set the input text to translate and destination language"
        
  
    #get summary
    if action_type!="Translation":
        
        result=get_summary(assets =assets,action_type=action_type,platform=platform,
                           persona_text=persona_text,project_id=project_id,context_window_limit=context_window_limit)
    else:
        result=get_translation(input_text=text,Language=Language )
        
      
    return result

In [103]:
summary=func_generate_content('')

In [None]:
doc1-->s1
doc2-->s2
doc3


In [110]:
result=get_translation(input_text="how are you",Language='Persian' )

candidates {
  content {
    role: "model"
    parts {
      text: "TEXT: {text} is missing.  I need the text to summarize it.  Please provide the text you would like me to summarize.\n\nSUMMARY:\nSince there is no text provided, I cannot offer a summary.  I am ready to create a detailed, flowing summary once you give me the text. Please provide the text so I can fulfill your request.\n"
    }
  }
  finish_reason: STOP
  safety_ratings {
    category: HARM_CATEGORY_HATE_SPEECH
    probability: NEGLIGIBLE
    probability_score: 0.06754685938358307
    severity: HARM_SEVERITY_NEGLIGIBLE
    severity_score: 0.13568978011608124
  }
  safety_ratings {
    category: HARM_CATEGORY_DANGEROUS_CONTENT
    probability: NEGLIGIBLE
    probability_score: 0.04603387415409088
    severity: HARM_SEVERITY_NEGLIGIBLE
    severity_score: 0.09534948319196701
  }
  safety_ratings {
    category: HARM_CATEGORY_HARASSMENT
    probability: NEGLIGIBLE
    probability_score: 0.20817901194095612
    severity: HA

In [104]:
print(summary)

Disney+ offers a diverse range of adult programming, from comedies like *Abbott Elementary*, *Arrested Development*, and *Bob's Burgers* to adventures like *How I Met Your Mother* and *The Muppet Show*.  For more intense viewing, the platform boasts thrillers such as *The Americans*, *Andor*, and *Buffy the Vampire Slayer*,  mysteries like *Lost* and *Only Murders in the Building*, and suspenseful dramas like *Homeland*.  Disney+ also delves into real-world issues with shows like *Mrs. America*, *The People v. O.J. Simpson*, and *Pose*. While the platform offers lighter fare like the novel-based show "Romantic Comedy," exploring relationships and the entertainment industry,  it's worth noting that the realities of that industry, particularly regarding toxic work environments and abusive power dynamics, are explored in Maureen Ryan's book "Burn It Down," which exposes systemic issues behind the scenes of shows like *Lost* and even *Saturday Night Live*, highlighting a stark contrast to 