In [1]:
from dotenv import dotenv_values
import requests

from helpers.google_interactor import GoogleInteractor
from helpers.html_parser import HTMLParser 
from helpers.storage_manager import StorageManager
from helpers.refiner_model import RefinerModel
from helpers.improver_model import ImproverModel
from helpers.writer_citer_model import WriterCiterModel


"""
===
Run through a single trial of the experiment
===
"""

"""
Set up and configuration
"""

# Load environment variables
config = dotenv_values("../.env")
google_api_key = config['GOOGLE_API_KEY']
search_engine_id = config['SEARCH_ENGINE_ID']

# Instantiate helpers
google_interactor = GoogleInteractor(api_key=google_api_key, search_engine_id=search_engine_id)
storage_manager = StorageManager("../data/")    
website_refiner = RefinerModel(api_key=config['OPEN_AI_API_KEY'])
corpus_refiner = RefinerModel(api_key=config['OPEN_AI_API_KEY'])
improver_model = ImproverModel(api_key=config['OPEN_AI_API_KEY'])
writer_citer = WriterCiterModel(api_key=config['OPEN_AI_API_KEY'])

# Number of links to consider
K = 5

# Declare trial number
trial_number = 2

# Load the clean corpus text
with open("../data/clean_content.txt") as f:
    corpus_text = f.read()


# Select a query to use for the current trial
query = "What products are offered by Southern Farm Bureau Life Insurance Company?"

"""
Submit query to google and store the response
"""

# Submit the query to Google
search_results = google_interactor.search_google(query=query, num_results=10)
    
# Store the response
storage_manager.save_to_folder("InitialGoogleResponses", search_results, f"{trial_number}.json")


"""
Get Raw Text from the HTML associated with the links from Google
"""

# Retrieve the stored response
search_results = storage_manager.load_from_folder("InitialGoogleResponses", f"{trial_number}.json")

raw_text_from_html = []
items = search_results['items']

# For each item of raw text that was stored ...
for item in search_results['items'][:K]:
    link = item['link']

    # Fet the HTML associated with the link
    response = requests.get(link)

    # Get the raw text associated with the HTML
    parsed_text = HTMLParser.parse_html(response.content)

    # Store the raw text from the HTML
    raw_text_from_html.append({"link" : link, "raw_text" : parsed_text})
    
storage_manager.save_to_folder("RawTextFromHTML", raw_text_from_html, f"{trial_number}.json")


"""
Get the refined text from the raw text
"""

raw_text_from_html = storage_manager.load_from_folder("RawTextFromHTML", f"{trial_number}.json")

website_refiner_responses = []
# For each element of raw text that was stored ...
for current_item in raw_text_from_html:
    
    link = current_item['link']
    raw_text = current_item['raw_text']   

    
    response_from_website_refiner = website_refiner.refine_text(raw_text=raw_text, query=query)
    
    if response_from_website_refiner:
        response_from_website_refiner['link'] = link
        website_refiner_responses.append(response_from_website_refiner)
    else:
        print("Failed to refine text.")

storage_manager.save_to_folder("WebsiteRefinerModelResponses", website_refiner_responses, f"{trial_number}.json")


"""
Filter the corpus to information that may be relevant to the query
"""

response_from_corpus_refiner = corpus_refiner.refine_text(raw_text=corpus_text, query=query)
storage_manager.save_to_folder("CorpusRefinerModelResponses", response_from_corpus_refiner, f"{trial_number}.json")


"""
Improve the text from the SFBLIC site
"""

# Get the refined text from the website
website_refiner_responses = storage_manager.load_from_folder("WebsiteRefinerModelResponses", f"{trial_number}.json")
sfblic_response = [response for response in website_refiner_responses if response['link'] == 'https://www.sfbli.com/'][0]
refined_text_from_website = sfblic_response['choices'][0]['message']['content']

# Get the refined text from the corpus
response_from_corpus_refiner = storage_manager.load_from_folder("CorpusRefinerModelResponses", f"{trial_number}.json")
refined_text_from_corpus = response_from_corpus_refiner['choices'][0]['message']['content']


# Generate improved text
improved_text = improver_model.improve_text(
    query=query,
    refined_text_from_website=refined_text_from_website,
    refined_text_from_corpus=refined_text_from_corpus
)

if improved_text:
    print("Improved Text generated...")
    
else:
    print("Failed to improve text.")


# Save to disk
storage_manager.save_to_folder("ImproverModelResponses", improved_text, f"{trial_number}.json")


"""
Create multiple sets of reference material
"""

# Create a set of reference material that has the original (unimproved) content from the website

website_refiner_model_responses = storage_manager.load_from_folder("WebsiteRefinerModelResponses", f"{trial_number}.json")

unmodified_set_of_reference_material =  [
    {
        "link" : item['link'],
        "information" : item['choices'][0]['message']['content']
    }
    for item in website_refiner_model_responses
]

# Create a set of reference material that has the new (improved) content
improved_text = storage_manager.load_from_folder('ImproverModelResponses', f"{trial_number}.json")
improved_version = improved_text['choices'][0]['message']['content']


# Create a new set of reference material

new_set_of_reference_material = []

for item in unmodified_set_of_reference_material:
    if item['link'] == 'https://www.sfbli.com/':
        new_set_of_reference_material.append({"link" : link, "information" : improved_version})
    else:
        new_set_of_reference_material.append(item)


"""
Send sets of reference material to the writer-citer
"""

# Generate a cited response
cited_response = writer_citer.generate_cited_response(
    rephrased_query=query,
    set_of_reference_query=unmodified_set_of_reference_material
)

if cited_response:
    print("Cited Response:")
    print(cited_response)
else:
    print("Failed to generate cited response.")
        

# Generate a cited response
cited_response = writer_citer.generate_cited_response(
    rephrased_query=query,
    set_of_reference_query=new_set_of_reference_material
)

if cited_response:
    print("Cited Response:")
    print(cited_response)
else:
    print("Failed to generate cited response.")




Data saved to ../data/InitialGoogleResponses/2.json
Data loaded from ../data/InitialGoogleResponses/2.json
Data saved to ../data/RawTextFromHTML/2.json
Data loaded from ../data/RawTextFromHTML/2.json
Data saved to ../data/WebsiteRefinerModelResponses/2.json
Data saved to ../data/CorpusRefinerModelResponses/2.json
Data loaded from ../data/WebsiteRefinerModelResponses/2.json
Data loaded from ../data/CorpusRefinerModelResponses/2.json
Improved Text generated...
Data saved to ../data/ImproverModelResponses/2.json
Data loaded from ../data/WebsiteRefinerModelResponses/2.json


NameError: name 'ImproverModelResponses' is not defined

Data loaded from ../data/ImproverModelResponses/2.json
Cited Response:
{'id': 'chatcmpl-AWsdUC1h9QwvsBUijsO7IvD6xSJuN', 'choices': [{'finish_reason': 'stop', 'index': 0, 'logprobs': None, 'message': {'content': "Southern Farm Bureau Life Insurance Company offers a variety of products designed to ensure financial security for individuals and families. Their offerings include several types of life insurance policies such as whole life, term life, and universal life insurance [3][5]. Additionally, they provide annuities, with options including fixed and indexed annuities [5]. The company also offers retirement plans, which include Individual Retirement Accounts (IRAs) [5]. Other services provided by the company encompass health insurance, farm insurance, business insurance, umbrella insurance, financial services, retirement planning, and estate planning [4]. These diverse products aim to offer comprehensive financial protection tailored to their policyholders' needs [3][4][5].", 'refusal'