In [17]:
import tiktoken
from langchain_google_community import BigQueryLoader
from langchain import PromptTemplate
from langchain.chains.summarize import load_summarize_chain
#from langchain.llms import VertexAI as langchain_vertexai
from langchain_google_vertexai import VertexAI as langchain_vertexai
from langchain import PromptTemplate
from pathlib import Path as p
import pandas as pd
 


vertex_llm_text = langchain_vertexai(model_name="gemini-1.5-pro-002")


def estimate_token_length(text, model="gpt2"):
    """Estimates the token length of a given text using a specified model.

      Args:
        text: The input text.
        model: The model to use for tokenization (default: "gpt2").

      Returns:
        The estimated number of tokens.
      """

  
    enc = tiktoken.get_encoding(model)  

    # Tokenize the text and count tokens
    tokens = enc.encode(text)
    token_count = len(tokens)
    return token_count

def get_data(source_query_str: str=None,metadata_columns: str=None,page_content_columns: str=None, project_id: str=None, return_only_text:bool=False):
    
    loader = BigQueryLoader(
            query=source_query_str, project=project_id, metadata_columns=metadata_columns, page_content_columns=page_content_columns
        )
    documents = []
    all_texts=[]
    documents.extend(loader.load())
    if return_only_text:  
        all_texts=[doc.page_content.replace('description:',"",1) for doc in documents]
        
    return documents, '\n'.join(all_texts)

 
def summarize_docs(documents: list[object],question_prompt_template: str="", refine_prompt_template: str="" ,is_token_limit_exceeded: bool=False ):
    
  
    question_prompt = PromptTemplate(template=question_prompt_template, input_variables=["text"])    
    if not is_token_limit_exceeded:        
        #if the token limit is in the context window range, use a stuffing method for summary
        chain = load_summarize_chain(vertex_llm_text, chain_type="stuff", 
                                     prompt=question_prompt)
    else:     
        refine_prompt = PromptTemplate(input_variables=["existing_answer", "text"], template=refine_prompt_template)
              
        chain = load_summarize_chain(
            vertex_llm_text,
            chain_type="refine",
            question_prompt=question_prompt,
            refine_prompt=refine_prompt,
            return_intermediate_steps=False,
          )
        
    return chain.invoke(documents)


In [45]:
# Set the Gemini 1.5 Pro context window limit
context_window_limit = 2000000
PROJECT_ID = "nine-quality-test"  # @param {type:"string"}
REGION = "us-central1"  # @param {type:"string"}
assets='p5d2tw','p5e9zq','p5e49l'
persona_text=" so that a 10-year-old can understand it. Use simple words and short sentences"
Summary='False'
Summary_Persona='False'
HeadLine='False'
OffPlatformPost='True'

source_query_str=f"select distinct combined_id,unique_id,content, chunk, trim(concat(ifnull(headline,''), CHR(10),  description)) as description from `nine-quality-test.vlt_media_embeddings_integration.vlt_all_media_content_text_embeddings` where asset_id in {assets} order by unique_id, chunk asc "
source_query_str= f"""SELECT          asset_id,                  
                STRING_AGG(description, '\\n' ) 
                OVER (PARTITION BY asset_id ORDER BY ifnull(startOffset_seconds,0) ASC , chunk ASC) AS full_description,
                IDX
          FROM (
                SELECT  asset_id,startOffset_seconds, CHUNK, 
                CASE WHEN chunk=0 
                     THEN TRIM(CONCAT(IFNULL(headline,''), CHR(10),  description))  
                     ELSE description 
                END AS description,
                ROW_NUMBER() OVER (PARTITION BY asset_id ORDER BY startOffset_seconds desc) AS IDX,
                FROM `nine-quality-test.vlt_media_embeddings_integration.vlt_all_media_content_text_embeddings` where asset_id in {assets}
         )
       WHERE IDX=1
    """

if Summary=='True' or Summary_Persona=='True':
        #this is the main prompt for summary
        question_prompt_template = """
            You will be given different parts of texts. Provide a summary of the following text"""+persona+""". Your result must be detailed and at least 2 paragraphs. 
            When summarizing, directly dive into the narrative or descriptions from the text without using introductory phrases like 'In this passage'. 
            Directly address the main events, characters, and themes, encapsulating the essence and significant details from the text in a flowing narrative. 
            The goal is to present a unified view of the content, continuing the story seamlessly as if the passage naturally progresses into the summary.

            TEXT: {text}
            SUMMARY:
        """

        refine_prompt_template = (
            "Your job is to produce a final summary. Your task is to combine and refine these summaries into a final, comprehensive summary that covers all key events, characters, themes, and details.\n"
            "We have provided an existing summary up to a certain point: {existing_answer}\n"
            "We have the opportunity to refine the existing summary"
            "(only if needed) with some more context below.\n"
            "------------\n"
            "{text}\n"
            "------------\n"
            "Given the new context, refine the original summary"
            "If the context isn't useful, return the original summary."
        )
elif HeadLine=='True':  
    
    #this is the main prompt for headline
        question_prompt_template = """
            You will be given different parts of texts. Provide a one line headline of the following text. 

            TEXT: {text}
            HEADLINE:
        """

        refine_prompt_template = (
            "Your job is to produce a final headline. Your task is to combine and refine these headlines into a final, comprehensive headline that covers all details.\n"
            "We have provided an existing headline up to a certain point: {existing_answer}\n"
            "We have the opportunity to refine the existing headline"
            "(only if needed) with some more context below.\n"
            "------------\n"
            "{text}\n"
            "------------\n"
            "Given the new context, refine the original headline"
            "If the context isn't useful, return the original headline."
        )
elif OffPlatformPost=='True' and Platform=='Twitter':

        #this is the main prompt for social media post
        question_prompt_template = """
            You will be given different parts of texts. Provide a tweet that that’s catchy, concise, and fits within 280 characters. Make sure to highlight the key message, and encourage engagement with a question or call to action.

            TEXT: {text}
            Tweet: 
        """

        refine_prompt_template = (
            "Your job is to produce a final tweet. Your task is to combine and refine these tweets into a final, comprehensive tweet that covers all details, is catchy, concise, fits within 280 characters, and encourage engagement with a question or call to action.\n"
            "We have provided an existing tweet up to a certain point: {existing_answer}\n"
            "We have the opportunity to refine the existing tweet"
            "(only if needed) with some more context below.\n"
            "------------\n"
            "{text}\n"
            "------------\n"
            "Given the new context, refine the original tweet"
            "If the context isn't useful, return the original tweet."
        )
elif OffPlatformPost=='True' and Platform=='Instagram':

        #this is the main prompt for social media post
        question_prompt_template = """
            You will be given different parts of texts. Provide  into an engaging Instagram post. Craft a short, attention-grabbing caption that highlights the main point. Use emojis to make it lively, and end with a question or call to action to spark conversation in the comments.
           
            TEXT: {text}
            Instagram Post: 
        """

        refine_prompt_template = (
            "Your job is to produce a final tweet. Your task is to combine and refine these Instagram posts into a final, comprehensive post that covers all details, Craft a short, attention-grabbing caption that highlights the main point. Use emojis to make it lively, and end with a question or call to action to spark conversation in the comments.\n"
            "We have provided an existing tweet up to a certain point: {existing_answer}\n"
            "We have the opportunity to refine the existing tweet"
            "(only if needed) with some more context below.\n"
            "------------\n"
            "{text}\n"
            "------------\n"
            "Given the new context, refine the original tweet"
            "If the context isn't useful, return the original tweet."
        )
metadata_columns=["asset_id"]
page_content_columns=["full_description"]
documents, all_texts=get_data(source_query_str=source_query_str,metadata_columns=metadata_columns,page_content_columns=page_content_columns, project_id=PROJECT_ID, return_only_text=True)
    
 


In [46]:
#documents,all_texts=get_data(source_query_str,metadata_columns,page_content_columns,PROJECT_ID,return_only_text=True)
# Estimate the token length
estimated_token_length = estimate_token_length(all_texts,'cl100k_base') #cl100k_base
print(estimated_token_length)
message=""
is_token_limit_exceeded=False
if estimated_token_length > context_window_limit:
  message="Your text is too long for the Gemini 1.5 Pro context window. We are trying to chunk and return the result."
  is_token_limit_exceeded=True
  summary=summarize_docs(documents=documents,question_prompt_template=question_prompt_template,refine_prompt_template=refine_prompt_template,is_token_limit_exceeded=is_token_limit_exceeded )
 
else:
  message="Your text fits within the Gemini 1.5 Pro context window."
  summary=summarize_docs(documents=documents,question_prompt_template=question_prompt_template,is_token_limit_exceeded=is_token_limit_exceeded )

 
    
 

3872


In [38]:
message

'Your text fits within the Gemini 1.5 Pro context window.'

In [47]:
summary

{'input_documents': [Document(metadata={'asset_id': 'p5e49l'}, page_content='full_description: Disney may be better known for its theme parks and blockbuster movies, but its streaming service successfully draws on a pair of historic America television networks, spin-offs from its fabled franchises, and a growing cohort of original commissions. Putting together the platform’s essential scripted shows required difficult choices, but the list is illustrious.\n Abbott Elementary\nThe great American sitcom is not dead, it’s just morphed into this delightfully witty mockumentary about the ever-burdened staff at a primary school in West Philadelphia. Creator and star Quinta Brunson delivers 22-minute episodes that sing with character humour and absurd machinations. Janella James, as the school’s questionable principal, is the best scene-stealer on television right now.\n The Americans\nWhat began as a sharp Cold War thriller, following a pair of Russian spies (Keri Russell and Matthew Rhys) l