In [None]:
!python -m pip install google-generativeai ipywidgets pandas openai streamlit

# Note:
- Many errors with api
- Super slow speed: ~90s for each answer
- Major bugs: sometimes doesn't return any answer. IOMGR endpoint shutdown

# Far chat - RAG
- Take in pdf files
- Parse them
- Load into vector database
- Take user query
- Retrieve relevent info
- Pass to LLM
- LLM output to user
  

  



- Func1: take in folder name, parse all files, return 

In [None]:
import os
import glob
from dotenv import load_dotenv
from unstructured_client import UnstructuredClient
from unstructured_client.models import shared
from unstructured_client.models.errors import SDKError
from unstructured.staging.base import dict_to_elements
from unstructured.chunking.title import chunk_by_title
from langchain.schema import Document
from IPython.display import display, Markdown

def process_pdfs(input_folder):
  # Load environment variables from a .env file
  load_dotenv()

  # Get unstructured API key
  unstructured_api_key = os.getenv("UNSTRUCTURED_API_KEY")
  if not unstructured_api_key:
      raise ValueError("UNSTRUCTURED_API_KEY environment variable not found")

  # Initialize the UnstructuredClient
  s = UnstructuredClient(api_key_auth=unstructured_api_key, server_url='https://redhorse-d652ahtg.api.unstructuredapp.io')

  # Initialize a list to hold the combined content
  combined_content = []

  # Iterate through all PDF files in the directory
  for filename in glob.glob(os.path.join(input_folder, "*.pdf")):
      print(f"Processing {filename}...")
      with open(filename, "rb") as file:
          req = shared.PartitionParameters(
              files=shared.Files(
                  content=file.read(),
                  file_name=filename,
              ),
              strategy="fast",
          )

          try:
              res = s.general.partition(req)
              # Append the parsed elements to the combined content list
              combined_content.extend(res.elements)
          except SDKError as e:
              print(f"Error processing {filename}: {e}")

  print(f"Combined content length: {len(combined_content)}")
  return combined_content

def process_data(combined_content):
  pdf_elements = dict_to_elements(combined_content)
  elements = chunk_by_title(pdf_elements,  max_characters=5000,  overlap=700)
  
  documents = []
  for element in elements:
      metadata = element.metadata.to_dict()
      if "languages" in metadata:
          del metadata["languages"]
      metadata["source"] = metadata.get("filename", "Unknown")
      documents.append(Document(page_content=element.text, metadata=metadata))
  
  return documents

def print_chunk_info(documents):
  print(f"Total number of chunks: {len(documents)}")
  for i, doc in enumerate(documents[:10], 1):  # Print info for first 10 chunks
      print(f"\nChunk {i}:")
    #   print(f"Metadata: {doc.metadata}")
    #   print(f"Content (first 200 chars): {doc.page_content[:200]}...")
      display(Markdown(doc.page_content))
      print(f"Content length: {len(doc.page_content)} characters")
      print("-" * 50)

# Main execution
if __name__ == "__main__":
  input_folder = "./test"

  combined_content = process_pdfs(input_folder)
  documents = process_data(combined_content)
  
  print("\nChunking Results:")
  print_chunk_info(documents)

  # Calculate and print total content length
  total_length = sum(len(doc.page_content) for doc in documents)
  print(f"\nTotal length of all chunks combined: {total_length} characters")





# Gemini only

In [1]:
import os
import google.generativeai as genai
from IPython.display import display, Markdown
import streamlit as st

def load_prompt(file_path):
    with open(file_path, "r") as file:
        return file.read().strip()

genai.configure(api_key=st.secrets["GOOGLE_API_KEY"])

# Load file
far_text = ""
with open('/Users/huyknguyen/Desktop/redhorse/code_projects/far_chat/docs/far10.rtf', 'r') as file:
    far_text = file.read()

# Create the model
generation_config = {
    "temperature": 0,
    # "top_p": 0.95,
    # "top_k": 64,
    "max_output_tokens": 8192,
}

system_instruction = load_prompt("./prompts/system_instruction.txt")

safety_settings = [
    {
        "category": "HARM_CATEGORY_DANGEROUS",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_HARASSMENT",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_HATE_SPEECH",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
        "threshold": "BLOCK_NONE",
    }
]

model = genai.GenerativeModel(
            model_name="gemini-1.5-flash",
            generation_config=generation_config,
            system_instruction=system_instruction,
            safety_settings=safety_settings
        )


# Initialize conversation history
conversation_history = []
conversation_history.append(f"system prompt: {system_instruction}")

def chat_with_far(query):
    global conversation_history
    global far_text
    
    # Add the user's query to the conversation history
    conversation_history.append(f"Human: {query}")
    
    # Prepare the full context for the model
    full_context = load_prompt("./prompts/chat_content.txt").format(
        far_text=far_text,
        conversation_history=conversation_history,
        query=query,
        user_feedback = ""
    )
    
    print(f"full context: {full_context}")
    
    # Generate response
    response = model.generate_content(full_context, stream=True)
    
    answer = ""
    print("Answer:\n")
    for chunk in response:
        print(chunk.text, end="")
        answer += chunk.text
    
    # Add the model's response to the conversation history
    conversation_history.append(f"Assistant: {answer}")
    
    # Limit the conversation history to the last 10 exchanges (5 from user, 5 from assistant)
    if len(conversation_history) > 10:
        conversation_history = conversation_history[-10:]

    return answer

# Main loop for conversation
while True:
    user_input = input("\nAsk a question about FAR (or type 'exit' to quit): ")
    if user_input.lower() == 'exit':
        break
    answer = chat_with_far(user_input)

print("Thank you for using the FAR chat assistant!")

full context: History of conversation:
['system prompt: Role: You are an expert on the Federal Acquisition Regulation (FAR).\n\nTask: Your task is to answer user queries based on the FAR document content provided.\n\nAnswer instructions:\n- Carefully understand the user query. \n- Info accuracy is critical, so priority accuracy, but do not repeat the exact same words from the document.\n- Cite the relevant FAR sections concisely.\n- Stay focus on your task and avoid irrelevant topic/info.\n- Provide accurate, concise, and easy to understand answers. Can ask user if they want a more comprehensive answer.', 'Human: tell me about the section "1.102 Statement of guiding principles for the Federal Acquisition System" in exact word and format']
end of History of conversation.
_____________________________________________________



User query:
tell me about the section "1.102 Statement of guiding principles for the Federal Acquisition System" in exact word and format
end of User query.
_____

KeyboardInterrupt: 

In [None]:
global conversation_history
global far_text

query = "tell me about the 1.102-2 Performance standards section in exact wording"
# Add the user's query to the conversation history
conversation_history.append(f"Human: {query}")

# Prepare the full context for the model
full_context = load_prompt("./prompts/chat_content.txt").format(
    far_text=far_text,
    conversation_history=conversation_history,
    query=query,
    user_feedback = ""
)

print(f"full context: {full_context}")

# Generate response
response = model.generate_content(full_context, stream=True)

answer = ""
print("Answer:\n")
for chunk in response:
    print(chunk.text, end="")
    answer += chunk.text

# Add the model's response to the conversation history
conversation_history.append(f"Assistant: {answer}")

# Limit the conversation history to the last 10 exchanges (5 from user, 5 from assistant)
if len(conversation_history) > 10:
    conversation_history = conversation_history[-10:]


In [5]:
response.usage_metadata

prompt_token_count: 13724
candidates_token_count: 573
total_token_count: 14297

# Gemini with cache

In [76]:
import os
import google.generativeai as genai
from google.generativeai import caching
import datetime
import time

# Configure the Gemini API
genai.configure(api_key=os.environ['GOOGLE_API_KEY'])

# Global variables
far_doc_path = None
model_name = 'models/gemini-1.5-pro-001'
cache_ttl = datetime.timedelta(hours=1)
cache = None
model = None

safety_settings = [
    {
        "category": "HARM_CATEGORY_DANGEROUS",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_HARASSMENT",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_HATE_SPEECH",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
        "threshold": "BLOCK_NONE",
    },
]


def initialize_cache(far_doc_path, model_name='models/gemini-1.5-flash-001', cache_ttl=datetime.timedelta(hours=1)):
    global cache, model
    
    # Read the FAR document
    with open(far_doc_path, 'r', encoding='utf-8') as file:
        far_content = file.read()
    print(f"FAR document loaded successfully. Document length: {len(far_content)} characters.")

    # Create a cache
    cache = caching.CachedContent.create(
        model=model_name,
        display_name='FAR Document Cache',

        system_instruction=(
            "You are an expert on the Federal Acquisition Regulation (FAR). "
            "Your task is to answer user queries based on the FAR document content provided. "
            "Always provide accurate information and cite the relevant FAR sections when possible."
        ),
        contents=[far_content],
        ttl=cache_ttl,
    )
    print(f"Cache created successfully.")

    # Construct a GenerativeModel which uses the created cache
    model = genai.GenerativeModel.from_cached_content(cached_content=cache, safety_settings=safety_settings)
    print("Model with cache initialized successfully.")

def ask_question(question):
    if not model:
        raise ValueError("Cache not initialized. Call initialize_cache() first.")
    
    print(f"Getting answer for question: {question}")
    response = model.generate_content(question)
    print(f"Response: {response}")
    print(f"Response text: {response.text}")
    print("Token Usage:")
    print(f"Prompt tokens: {response.usage_metadata.prompt_token_count}")
    print(f"Cached tokens: {response.usage_metadata.cached_content_token_count}")
    print(f"Response tokens: {response.usage_metadata.candidates_token_count}")
    print(f"Total tokens: {response.usage_metadata.total_token_count}")
    return response.text

def update_cache_ttl(new_ttl):
    global cache
    if cache:
        cache.update(ttl=new_ttl)
        print(f"Cache TTL updated to {new_ttl}")
    else:
        print("Cache not initialized yet.")

def delete_cache():
    global cache, model
    if cache:
        cache.delete()
        cache = None
        model = None
        print("Cache deleted successfully.")
    else:
        print("No cache to delete.")

# Example usage
far_path = "/Users/huyknguyen/Desktop/redhorse/code_projects/far_chat/docs/FAR.rtf"
initialize_cache(far_path)

FAR document loaded successfully. Document length: 7021793 characters.


I0000 00:00:1720723276.659246 3477388 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported


Cache created successfully.
Model with cache initialized successfully.


In [None]:
if not model:
    raise ValueError("Cache not initialized. Call initialize_cache() first.")

question = "Tell me about the 1.201-1 The two councils in verbatim:"


print(f"Getting answer for question: {question}")
response = model.generate_content(question)

print(f"Response: {response}")
print(f"Response text: {response.text}")
print("Token Usage:")
print(f"Prompt tokens: {response.usage_metadata.prompt_token_count}")
print(f"Cached tokens: {response.usage_metadata.cached_content_token_count}")
print(f"Response tokens: {response.usage_metadata.candidates_token_count}")
print(f"Total tokens: {response.usage_metadata.total_token_count}")



In [None]:
response = model.generate_content("Tell me about the 1.201-1 The two councils section in verbatim")
display(Markdown(response.text))

In [None]:
import os
import google.generativeai as genai
from google.generativeai import caching
import datetime
import time

# Configure the Gemini API
genai.configure(api_key=os.environ['GOOGLE_API_KEY'])

class FARChatbot:
    def __init__(self, far_doc_path, model_name='models/gemini-1.5-flash-001', cache_ttl=datetime.timedelta(hours=1)):
        self.far_doc_path = far_doc_path
        self.model_name = model_name
        self.cache_ttl = cache_ttl
        self.cache = None
        self.model = None

    def initialize_cache(self):
        # Read the FAR document
        with open(self.far_doc_path, 'r', encoding='utf-8') as file:
            far_content = file.read()
            print(f"FAR document loaded successfully. Document length: {len(far_content)} characters.")

        # Create a cache
        self.cache = caching.CachedContent.create(
            model=self.model_name,
            display_name='FAR Document Cache',
            system_instruction=(
                "You are an expert on the Federal Acquisition Regulation (FAR). "
                "Your task is to answer user queries based on the FAR document content provided. "
                "Always provide accurate information and cite the relevant FAR sections when possible."
            ),
            contents=[far_content],
            ttl=self.cache_ttl,
        )
        print(f"Cache created successfully.")

        # Construct a GenerativeModel which uses the created cache
        self.model = genai.GenerativeModel.from_cached_content(cached_content=self.cache)
        print("Model with cache initialized successfully.")

    def ask_question(self, question):
        if not self.model:
            raise ValueError("Cache not initialized. Call initialize_cache() first.")
        print(f"Getting answer for question: {question}")
        response = self.model.generate_content(question)
        print(f"Response: {response}")
        print(f"Response text: {response.text}")
        
        print("Token Usage:")
        print(f"Prompt tokens: {response.usage_metadata.prompt_token_count}")
        print(f"Cached tokens: {response.usage_metadata.cached_content_token_count}")
        print(f"Response tokens: {response.usage_metadata.candidates_token_count}")
        print(f"Total tokens: {response.usage_metadata.total_token_count}")

        return response.text

    def update_cache_ttl(self, new_ttl):
        if self.cache:
            self.cache.update(ttl=new_ttl)
            print(f"Cache TTL updated to {new_ttl}")
        else:
            print("Cache not initialized yet.")

    def delete_cache(self):
        if self.cache:
            self.cache.delete()
            self.cache = None
            self.model = None
            print("Cache deleted successfully.")
        else:
            print("No cache to delete.")

# Example usage
far_path = "/Users/huyknguyen/Desktop/redhorse/code_projects/far_chat/docs/FAR.rtf"
chatbot = FARChatbot(far_path)

chatbot.initialize_cache()


In [None]:
user_input = "Tell me about the 1.201-1 The two councils in verbatim:"


print(f"Getting answer for question: {question}")
response = self.model.generate_content(question)
print(f"Response: {response}")
print(f"Response text: {response.text}")


print("Answer:", answer)
print()

# chatbot.delete_cache()

In [None]:
import json
def parse_json_object(lines):
    obj = {}
    for line in lines:
        line = line.strip()
        if line.startswith('"Q":'):
            obj['Q'] = line.split('"Q":', 1)[1].strip().strip('"')
        elif line.startswith('"A":'):
            obj['A'] = line.split('"A":', 1)[1].strip().strip('"')
    return obj if obj else None

# def process_testdata(chatbot, input_file, output_file):
with open(input_file, 'r', encoding='utf-8') as infile, open(output_file, 'w', encoding='utf-8') as outfile:
    content = infile.read()
    
    # Split the content into individual JSON-like objects
    json_objects = content.split('}\n{')
    
    for i, obj in enumerate(json_objects):
        # Add back the curly braces that were removed in the split
        if i > 0:
            obj = '{' + obj
        if i < len(json_objects) - 1:
            obj += '}'
        
        parsed = parse_json_object(obj.split('\n'))
        if parsed:
            question = parsed.get('Q', '')
            print(f"\nParsed question: {question}...")
            if question:
                answer = chatbot.ask_question(question)
                print(f"\nAnswer: {answer[:50]}...")
                result = {
                    'question': question,
                    'test_answer': parsed.get('A', ''),
                    'model_answer': answer
                }
                json.dump(result, outfile, ensure_ascii=False)
                outfile.write('\n')
                print(f"Processed question: {question[:50]}...")
        else:
            print(f"Error parsing object {i+1}")

input_file = "/Users/huyknguyen/Desktop/redhorse/code_projects/far_chat/docs/testData.jsonl"
output_file = "/Users/huyknguyen/Desktop/redhorse/code_projects/far_chat/docs/testDataAnswers.jsonl"

process_testdata(chatbot, input_file, output_file)

In [None]:
user_input = input("Ask a question about FAR (or type 'exit' to quit): ")
# if user_input.lower() == 'exit' or 'q':
#     break

answer = chatbot.ask_question(user_input)
print("Answer:", answer)
print()

get answer from the gemini cache
save into a jsonl file

Create evaluation function
- Take in testDataset: with Q&A jsonl format
- Take in the model answers in jsonl format
- Create evaluation matrix
- Compare and evaluate the model answers with the testDataset answers, using openai api, get back a reasoning and score.
- Organize them into a df with test question, test answer, model answer, score, reasoning
- Export df into an excel file


# Test