In [None]:
!python -m pip install google-generativeai ipywidgets pandas openai

# Note:
- Many errors with api
- Super slow speed: ~90s for each answer
- Major bugs: sometimes doesn't return any answer. IOMGR endpoint shutdown

# Gemini only

In [None]:
get answer from the gemini cache
save into a jsonl file

Create evaluation function
- Take in testDataset: with Q&A jsonl format
- Take in the model answers in jsonl format
- Create evaluation matrix
- Compare and evaluate the model answers with the testDataset answers, using openai api, get back a reasoning and score.
- Organize them into a df with test question, test answer, model answer, score, reasoning
- Export df into an excel file


In [98]:

import os

import google.generativeai as genai

genai.configure(api_key=os.environ["GOOGLE_API_KEY"])


#load file:
text = ""
with open('/Users/huyknguyen/Desktop/redhorse/code_projects/far_chat/docs/FAR_28-39.rtf', 'r') as file:
    text = file.read()
    

# Create the model
# See https://ai.google.dev/api/python/google/generativeai/GenerativeModel
generation_config = {
  "temperature": 0,
  "top_p": 0.95,
  "top_k": 64,
  "max_output_tokens": 8192,
  "response_mime_type": "text/plain",
}

model = genai.GenerativeModel(
  model_name="gemini-1.5-flash",
  generation_config=generation_config, 
  # content=text,
  # safety_settings = Adjust safety settings
  # See https://ai.google.dev/gemini-api/docs/safety-settings
  system_instruction="You are an expert on the Federal Acquisition Regulation (FAR). \nYour task is to answer user queries based on the FAR document content provided. \nAlways provide accurate information and cite the relevant FAR sections when possible.",
)



response = model.generate_content(f"Based on this file:{text}.\n\n Tell me about the 47.403-3 Disallowance of expenditures, in verbatim:", stream=True)
for chunk in response:
  print(chunk.text)


I0000 00:00:1720724062.718382 3541301 tcp_posix.cc:809] IOMGR endpoint shutdown
I0000 00:00:1720724062.732190 3477388 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported
I0000 00:00:1720724093.396321 3610849 tcp_posix.cc:809] IOMGR endpoint shutdown


The
 FAR 47.403-3 Disallowance of Expenditures states:


**(a)** Agencies shall disallow expenditures for U.S. Government-
financed commercial international air transportation on foreign-flag air carriers unless there is attached to the appropriate voucher a memorandum adequately explaining why service by U.S.-flag
 air carriers was not available, or why it was necessary to use foreign-flag air carriers.

**(b)** When the travel is by indirect route or the
 traveler otherwise fails to use available U.S.-flag air carrier service, the amount to be disallowed against the traveler is based on the loss of revenues suffered by U.S.-flag air carriers as determined under the following formula, which is
 prescribed and more fully explained in 56 Comp. Gen. 209 (1977):

**(c)** The justification requirement is satisfied by the contractor's use of a statement similar to the one contained in the
 clause at \cf3 52.247-63\cf2 , Preference for U.S.-Flag Air Carriers. (See \cf3 47.405\cf2 ). 



I0000 00:00:1720724407.147536 3541293 tcp_posix.cc:809] IOMGR endpoint shutdown


In [99]:
response

response:
GenerateContentResponse(
    done=True,
    iterator=None,
    result=protos.GenerateContentResponse({
      "candidates": [
        {
          "content": {
            "parts": [
              {
                "text": "The FAR 47.403-3 Disallowance of Expenditures states:\n\n**(a)** Agencies shall disallow expenditures for U.S. Government-financed commercial international air transportation on foreign-flag air carriers unless there is attached to the appropriate voucher a memorandum adequately explaining why service by U.S.-flag air carriers was not available, or why it was necessary to use foreign-flag air carriers.\n\n**(b)** When the travel is by indirect route or the traveler otherwise fails to use available U.S.-flag air carrier service, the amount to be disallowed against the traveler is based on the loss of revenues suffered by U.S.-flag air carriers as determined under the following formula, which is prescribed and more fully explained in 56 Comp. Gen. 209 (1977):\

# Gemini with cache

In [76]:
import os
import google.generativeai as genai
from google.generativeai import caching
import datetime
import time

# Configure the Gemini API
genai.configure(api_key=os.environ['GOOGLE_API_KEY'])

# Global variables
far_doc_path = None
model_name = 'models/gemini-1.5-pro-001'
cache_ttl = datetime.timedelta(hours=1)
cache = None
model = None

safety_settings = [
    {
        "category": "HARM_CATEGORY_DANGEROUS",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_HARASSMENT",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_HATE_SPEECH",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
        "threshold": "BLOCK_NONE",
    },
]


def initialize_cache(far_doc_path, model_name='models/gemini-1.5-flash-001', cache_ttl=datetime.timedelta(hours=1)):
    global cache, model
    
    # Read the FAR document
    with open(far_doc_path, 'r', encoding='utf-8') as file:
        far_content = file.read()
    print(f"FAR document loaded successfully. Document length: {len(far_content)} characters.")

    # Create a cache
    cache = caching.CachedContent.create(
        model=model_name,
        display_name='FAR Document Cache',

        system_instruction=(
            "You are an expert on the Federal Acquisition Regulation (FAR). "
            "Your task is to answer user queries based on the FAR document content provided. "
            "Always provide accurate information and cite the relevant FAR sections when possible."
        ),
        contents=[far_content],
        ttl=cache_ttl,
    )
    print(f"Cache created successfully.")

    # Construct a GenerativeModel which uses the created cache
    model = genai.GenerativeModel.from_cached_content(cached_content=cache, safety_settings=safety_settings)
    print("Model with cache initialized successfully.")

def ask_question(question):
    if not model:
        raise ValueError("Cache not initialized. Call initialize_cache() first.")
    
    print(f"Getting answer for question: {question}")
    response = model.generate_content(question)
    print(f"Response: {response}")
    print(f"Response text: {response.text}")
    print("Token Usage:")
    print(f"Prompt tokens: {response.usage_metadata.prompt_token_count}")
    print(f"Cached tokens: {response.usage_metadata.cached_content_token_count}")
    print(f"Response tokens: {response.usage_metadata.candidates_token_count}")
    print(f"Total tokens: {response.usage_metadata.total_token_count}")
    return response.text

def update_cache_ttl(new_ttl):
    global cache
    if cache:
        cache.update(ttl=new_ttl)
        print(f"Cache TTL updated to {new_ttl}")
    else:
        print("Cache not initialized yet.")

def delete_cache():
    global cache, model
    if cache:
        cache.delete()
        cache = None
        model = None
        print("Cache deleted successfully.")
    else:
        print("No cache to delete.")

# Example usage
far_path = "/Users/huyknguyen/Desktop/redhorse/code_projects/far_chat/docs/FAR.rtf"
initialize_cache(far_path)

FAR document loaded successfully. Document length: 7021793 characters.


I0000 00:00:1720723276.659246 3477388 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported


Cache created successfully.
Model with cache initialized successfully.


In [None]:
if not model:
    raise ValueError("Cache not initialized. Call initialize_cache() first.")

question = "Tell me about the 1.201-1 The two councils in verbatim:"


print(f"Getting answer for question: {question}")
response = model.generate_content(question)

print(f"Response: {response}")
print(f"Response text: {response.text}")
print("Token Usage:")
print(f"Prompt tokens: {response.usage_metadata.prompt_token_count}")
print(f"Cached tokens: {response.usage_metadata.cached_content_token_count}")
print(f"Response tokens: {response.usage_metadata.candidates_token_count}")
print(f"Total tokens: {response.usage_metadata.total_token_count}")



In [77]:
response = model.generate_content("Tell me about the 1.201-1 The two councils section in verbatim")
display(Markdown(response.text))

I0000 00:00:1720723303.286800 3477388 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported
I0000 00:00:1720723351.391640 3541348 tcp_posix.cc:809] IOMGR endpoint shutdown


ValueError: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. Please check the `candidate.safety_ratings` to determine if the response was blocked.

In [None]:
import os
import google.generativeai as genai
from google.generativeai import caching
import datetime
import time

# Configure the Gemini API
genai.configure(api_key=os.environ['GOOGLE_API_KEY'])

class FARChatbot:
    def __init__(self, far_doc_path, model_name='models/gemini-1.5-flash-001', cache_ttl=datetime.timedelta(hours=1)):
        self.far_doc_path = far_doc_path
        self.model_name = model_name
        self.cache_ttl = cache_ttl
        self.cache = None
        self.model = None

    def initialize_cache(self):
        # Read the FAR document
        with open(self.far_doc_path, 'r', encoding='utf-8') as file:
            far_content = file.read()
            print(f"FAR document loaded successfully. Document length: {len(far_content)} characters.")

        # Create a cache
        self.cache = caching.CachedContent.create(
            model=self.model_name,
            display_name='FAR Document Cache',
            system_instruction=(
                "You are an expert on the Federal Acquisition Regulation (FAR). "
                "Your task is to answer user queries based on the FAR document content provided. "
                "Always provide accurate information and cite the relevant FAR sections when possible."
            ),
            contents=[far_content],
            ttl=self.cache_ttl,
        )
        print(f"Cache created successfully.")

        # Construct a GenerativeModel which uses the created cache
        self.model = genai.GenerativeModel.from_cached_content(cached_content=self.cache)
        print("Model with cache initialized successfully.")

    def ask_question(self, question):
        if not self.model:
            raise ValueError("Cache not initialized. Call initialize_cache() first.")
        print(f"Getting answer for question: {question}")
        response = self.model.generate_content(question)
        print(f"Response: {response}")
        print(f"Response text: {response.text}")
        
        print("Token Usage:")
        print(f"Prompt tokens: {response.usage_metadata.prompt_token_count}")
        print(f"Cached tokens: {response.usage_metadata.cached_content_token_count}")
        print(f"Response tokens: {response.usage_metadata.candidates_token_count}")
        print(f"Total tokens: {response.usage_metadata.total_token_count}")

        return response.text

    def update_cache_ttl(self, new_ttl):
        if self.cache:
            self.cache.update(ttl=new_ttl)
            print(f"Cache TTL updated to {new_ttl}")
        else:
            print("Cache not initialized yet.")

    def delete_cache(self):
        if self.cache:
            self.cache.delete()
            self.cache = None
            self.model = None
            print("Cache deleted successfully.")
        else:
            print("No cache to delete.")

# Example usage
far_path = "/Users/huyknguyen/Desktop/redhorse/code_projects/far_chat/docs/FAR.rtf"
chatbot = FARChatbot(far_path)

chatbot.initialize_cache()


In [None]:
user_input = "Tell me about the 1.201-1 The two councils in verbatim:"


print(f"Getting answer for question: {question}")
response = self.model.generate_content(question)
print(f"Response: {response}")
print(f"Response text: {response.text}")


print("Answer:", answer)
print()

# chatbot.delete_cache()

In [None]:
import json
def parse_json_object(lines):
    obj = {}
    for line in lines:
        line = line.strip()
        if line.startswith('"Q":'):
            obj['Q'] = line.split('"Q":', 1)[1].strip().strip('"')
        elif line.startswith('"A":'):
            obj['A'] = line.split('"A":', 1)[1].strip().strip('"')
    return obj if obj else None

# def process_testdata(chatbot, input_file, output_file):
with open(input_file, 'r', encoding='utf-8') as infile, open(output_file, 'w', encoding='utf-8') as outfile:
    content = infile.read()
    
    # Split the content into individual JSON-like objects
    json_objects = content.split('}\n{')
    
    for i, obj in enumerate(json_objects):
        # Add back the curly braces that were removed in the split
        if i > 0:
            obj = '{' + obj
        if i < len(json_objects) - 1:
            obj += '}'
        
        parsed = parse_json_object(obj.split('\n'))
        if parsed:
            question = parsed.get('Q', '')
            print(f"\nParsed question: {question}...")
            if question:
                answer = chatbot.ask_question(question)
                print(f"\nAnswer: {answer[:50]}...")
                result = {
                    'question': question,
                    'test_answer': parsed.get('A', ''),
                    'model_answer': answer
                }
                json.dump(result, outfile, ensure_ascii=False)
                outfile.write('\n')
                print(f"Processed question: {question[:50]}...")
        else:
            print(f"Error parsing object {i+1}")

input_file = "/Users/huyknguyen/Desktop/redhorse/code_projects/far_chat/docs/testData.jsonl"
output_file = "/Users/huyknguyen/Desktop/redhorse/code_projects/far_chat/docs/testDataAnswers.jsonl"

process_testdata(chatbot, input_file, output_file)

In [None]:
user_input = input("Ask a question about FAR (or type 'exit' to quit): ")
# if user_input.lower() == 'exit' or 'q':
#     break

answer = chatbot.ask_question(user_input)
print("Answer:", answer)
print()

get answer from the gemini cache
save into a jsonl file

Create evaluation function
- Take in testDataset: with Q&A jsonl format
- Take in the model answers in jsonl format
- Create evaluation matrix
- Compare and evaluate the model answers with the testDataset answers, using openai api, get back a reasoning and score.
- Organize them into a df with test question, test answer, model answer, score, reasoning
- Export df into an excel file


# Test