In [1]:
from dotenv import dotenv_values
import requests

from helpers.google_interactor import GoogleInteractor
from helpers.html_parser import HTMLParser 
from helpers.storage_manager import StorageManager
from helpers.refiner_model import RefinerModel
from helpers.improver_model import ImproverModel
from helpers.writer_citer_model import WriterCiterModel


"""
============================================
Run through a single trial of the experiment
============================================
"""

"""
Set up
"""

# Number of links to consider
K = 5

# Declare trial number
trial_number = 10

# Select a query to use for the current trial
query = "How do I decide which type of life insurance is right for me?"

"""
Configuration
"""

sfblic_link = "https://www.sfbli.com/"

# Load environment variables
config = dotenv_values("../.env")
google_api_key = config['GOOGLE_API_KEY']
search_engine_id = config['SEARCH_ENGINE_ID']

# Instantiate helpers
google_interactor = GoogleInteractor(api_key=google_api_key, search_engine_id=search_engine_id)
storage_manager = StorageManager("../data/")    
website_refiner = RefinerModel(api_key=config['OPEN_AI_API_KEY'])
corpus_refiner = RefinerModel(api_key=config['OPEN_AI_API_KEY'])
improver_model = ImproverModel(api_key=config['OPEN_AI_API_KEY'])
writer_citer = WriterCiterModel(api_key=config['OPEN_AI_API_KEY'])


# Load the clean corpus text
with open("../data/clean_content.txt") as f:
    corpus_text = f.read()

In [2]:
"""
Submit the query to Google and store the results
"""

# Submit the query to Google
search_results = google_interactor.search_google(query=query, num_results=10)
    
# Store the response
storage_manager.save_to_folder("InitialGoogleResponses", search_results, f"{trial_number}.json")

Data saved to ../data/InitialGoogleResponses/10.json


In [3]:
"""
View links
"""

top_k_links = [item['link'] for item in search_results['items']][:K]
top_k_links

['https://content.naic.org/article/consumer-insight-what-type-life-insurance-right-you',
 'https://www.iii.org/article/how-choose-right-type-life-insurance',
 'https://www.investopedia.com/types-of-life-insurance-plans-and-how-to-decide-which-one-is-right-for-you-7482251',
 'https://www.nerdwallet.com/article/insurance/types-of-life-insurance',
 'https://www.progressive.com/answers/best-life-insurance-for-me/']

In [4]:
"""
Check to see if SFBLIC is in the top K links. 
If not, add it
"""

if sfblic_link in top_k_links:
    updated_links = top_k_links
else:
    top_k_links[K-1] = sfblic_link
    updated_links = top_k_links

updated_links

['https://content.naic.org/article/consumer-insight-what-type-life-insurance-right-you',
 'https://www.iii.org/article/how-choose-right-type-life-insurance',
 'https://www.investopedia.com/types-of-life-insurance-plans-and-how-to-decide-which-one-is-right-for-you-7482251',
 'https://www.nerdwallet.com/article/insurance/types-of-life-insurance',
 'https://www.sfbli.com/']

In [5]:
storage_manager.save_to_folder("UpdatedGoogleResponses", updated_links, f"{trial_number}.json")

Data saved to ../data/UpdatedGoogleResponses/10.json


In [6]:
"""
Get Raw Text from the HTML associated with the links from Google
"""

# Retrieve the stored response
updated_links = storage_manager.load_from_folder("UpdatedGoogleResponses", f"{trial_number}.json")

raw_text_from_html = []

# For each item of raw text that was stored ...
for link in updated_links:    

    # Fet the HTML associated with the link
    response = requests.get(link)

    # Get the raw text associated with the HTML
    parsed_text = HTMLParser.parse_html(response.content)

    # Store the raw text from the HTML
    raw_text_from_html.append({"link" : link, "raw_text" : parsed_text})
    
storage_manager.save_to_folder("RawTextFromHTML", raw_text_from_html, f"{trial_number}.json")

Data loaded from ../data/UpdatedGoogleResponses/10.json
Data saved to ../data/RawTextFromHTML/10.json


In [7]:
"""
Get the refined text from the raw text
"""

raw_text_from_html = storage_manager.load_from_folder("RawTextFromHTML", f"{trial_number}.json")

website_refiner_responses = []
# For each element of raw text that was stored ...
for current_item in raw_text_from_html:
    
    link = current_item['link']
    raw_text = current_item['raw_text']   

    
    response_from_website_refiner = website_refiner.refine_text(raw_text=raw_text, query=query)
    
    if response_from_website_refiner:
        response_from_website_refiner['link'] = link
        website_refiner_responses.append(response_from_website_refiner)
    else:
        print("Failed to refine text.")

storage_manager.save_to_folder("WebsiteRefinerModelResponses", website_refiner_responses, f"{trial_number}.json")

Data loaded from ../data/RawTextFromHTML/10.json
Data saved to ../data/WebsiteRefinerModelResponses/10.json


In [8]:
for item in website_refiner_responses:
    print(item['link'])
    print(item['choices'][0]['message']['content'])
    print()

https://content.naic.org/article/consumer-insight-what-type-life-insurance-right-you
To decide which type of life insurance is right for you, consider the following:

1. **Types of Policies**: There are two main types of life insurance products:
   - **Term life insurance**: This policy is purchased for a specific period (term) and pays benefits if the insured dies during that term. It generally provides lower-cost coverage for specific timeframes, such as 10 or 20 years. It may allow renewal, but premiums could increase.
   - **Cash value life insurance**: This type includes policies like whole life, universal life, and variable life. These can be kept for as long as needed and have savings or investment features, allowing policy owners to access money while still alive.

2. **Questions to Consider**:
   - How much of the family income do I provide?
   - Will these financial obligations change over time?
   - In how many years do you anticipate needing death benefits, knowing that the

In [9]:
"""
Filter the corpus to information that may be relevant to the query
"""

response_from_corpus_refiner = corpus_refiner.refine_text(raw_text=corpus_text, query=query)
storage_manager.save_to_folder("CorpusRefinerModelResponses", response_from_corpus_refiner, f"{trial_number}.json")

Data saved to ../data/CorpusRefinerModelResponses/10.json


In [10]:
print(response_from_corpus_refiner['choices'][0]['message']['content'])

To decide what type of life insurance is right for you, assess your financial needs, goals, and budget.

### **Key Considerations:**
1. **Types of Policies:**
   - **Term Life Insurance:** Provides affordable coverage for a specific period (e.g., 10-30 years). Ideal for short-term needs like covering dependents, a mortgage, or college expenses. Look for renewable or convertible options if your needs may evolve.
   - **Permanent Life Insurance:** Includes whole life, universal life, and variable life policies. These offer lifelong coverage, build cash value, and suit long-term goals like estate planning or creating a financial safety net for heirs. They require higher premiums but provide added financial benefits.

2. **Financial Goals:**
   - For income replacement or temporary obligations, term insurance is often best.
   - For lifelong protection and potential savings growth, permanent policies are more appropriate.

3. **Budget and Premiums:**
   - Term policies typically have lower

In [11]:
"""
Improve the text from the SFBLIC site
"""

# Get the refined text from the website
website_refiner_responses = storage_manager.load_from_folder("WebsiteRefinerModelResponses", f"{trial_number}.json")
sfblic_response = [response for response in website_refiner_responses if response['link'] == 'https://www.sfbli.com/'][0]
refined_text_from_website = sfblic_response['choices'][0]['message']['content']

# Get the refined text from the corpus
response_from_corpus_refiner = storage_manager.load_from_folder("CorpusRefinerModelResponses", f"{trial_number}.json")
refined_text_from_corpus = response_from_corpus_refiner['choices'][0]['message']['content']


# Generate improved text
improved_text = improver_model.improve_text(
    query=query,
    refined_text_from_website=refined_text_from_website,
    refined_text_from_corpus=refined_text_from_corpus
)

if improved_text:
    print("Improved Text generated...")
    
else:
    print("Failed to improve text.")


# Save to disk
storage_manager.save_to_folder("ImproverModelResponses", improved_text, f"{trial_number}.json")


Data loaded from ../data/WebsiteRefinerModelResponses/10.json
Data loaded from ../data/CorpusRefinerModelResponses/10.json
Improved Text generated...
Data saved to ../data/ImproverModelResponses/10.json


In [12]:
print(refined_text_from_website)

THERE IS NO RELEVANT INFORMATION IN THE RAW TEXT


In [13]:
print(improved_text['choices'][0]['message']['content'])

To decide on the right type of life insurance for you, you should evaluate your financial needs, goals, and budget. Here are some key considerations:

1. **Types of Policies:**
   - **Term Life Insurance:** Offers affordable coverage for a specified timeframe (e.g., 10-30 years) and is best suited for short-term needs such as supporting dependents, paying off a mortgage, or covering college expenses. Consider renewable or convertible options if your requirements might change.
   - **Permanent Life Insurance:** Includes whole life, universal life, and variable life policies. These provide lifelong coverage, build cash value, and are ideal for long-term goals like estate planning or establishing a financial safety net for heirs. They usually demand higher premiums but offer additional financial benefits.

2. **Financial Goals:**
   - For income replacement or managing temporary obligations, term insurance is often the better choice.
   - If you need lifelong protection and a potential fo

In [14]:
"""
Create multiple sets of reference material
"""

# Create a set of reference material that has the original (unimproved) content from the website

website_refiner_model_responses = storage_manager.load_from_folder("WebsiteRefinerModelResponses", f"{trial_number}.json")

unmodified_set_of_reference_material =  [
    {
        "link" : item['link'],
        "information" : item['choices'][0]['message']['content']
    }
    for item in website_refiner_model_responses
]

# Create a set of reference material that has the new (improved) content
improved_text = storage_manager.load_from_folder('ImproverModelResponses', f"{trial_number}.json")
improved_version = improved_text['choices'][0]['message']['content']


# Create a new set of reference material

new_set_of_reference_material = []

for item in unmodified_set_of_reference_material:
    if item['link'] == 'https://www.sfbli.com/':
        new_set_of_reference_material.append({"link" : link, "information" : improved_version})
    else:
        new_set_of_reference_material.append(item)

Data loaded from ../data/WebsiteRefinerModelResponses/10.json
Data loaded from ../data/ImproverModelResponses/10.json


In [15]:
"""
Store Sets of Reference Material
"""

sets_of_reference_material = []

improvement_method = 0
set_of_reference_material = unmodified_set_of_reference_material

sets_of_reference_material.append({
    "improvement_method" : improvement_method,
    "set_of_reference_material" : set_of_reference_material,
})

improvement_method = 1
set_of_reference_material = new_set_of_reference_material

sets_of_reference_material.append({
    "improvement_method" : improvement_method,
    "set_of_reference_material" : set_of_reference_material,
})


storage_manager.save_to_folder("SetsOfReferenceMaterial", sets_of_reference_material, f"{trial_number}.json")

Data saved to ../data/SetsOfReferenceMaterial/10.json


In [16]:
"""
Send sets of reference material to the writer-citer
"""

writer_citer_model_responses = [] 

sets_of_reference_material = storage_manager.load_from_folder("SetsOfReferenceMaterial", f"{trial_number}.json")

# Unimproved Method
# -----------------

set_of_reference_material = sets_of_reference_material[0]
improvement_method = set_of_reference_material['improvement_method']
reference_material = set_of_reference_material['set_of_reference_material']

# Generate a cited response
cited_response = writer_citer.generate_cited_response(
    rephrased_query=query,
    set_of_reference_query=reference_material
)

cited_response['improvement_method'] = improvement_method

writer_citer_model_responses.append(cited_response)

# Improved Method
# -----------------

set_of_reference_material = sets_of_reference_material[1]
improvement_method = set_of_reference_material['improvement_method']
reference_material = set_of_reference_material['set_of_reference_material']

# Generate a cited response
cited_response = writer_citer.generate_cited_response(
    rephrased_query=query,
    set_of_reference_query=reference_material
)

cited_response['improvement_method'] = improvement_method

writer_citer_model_responses.append(cited_response)


Data loaded from ../data/SetsOfReferenceMaterial/10.json


In [18]:
for item in writer_citer_model_responses:
    print(item['choices'][0]['message']['content'])
    print("---")

Deciding which type of life insurance is right for you requires consideration of several key factors [1][2][3][4]. 

Start by evaluating the **types of life insurance** available. The two main types are **term life insurance** and **permanent life insurance** (which includes whole life, universal life, and variable life policies). Term life insurance covers a specific period, making it a good option if you need a large amount of coverage for a limited time and have a constrained budget, while permanent policies offer lifelong coverage and accumulate cash value, which can be accessed during your lifetime [1][2].

Determine the **purpose of the policy**. Consider whether it’s to cover specific needs like children’s college expenses (which may suit a term policy) or lifelong financial security with savings elements (typical with permanent policies) [3]. Understand how much of the family income you supply and any financial obligations' changes over time, as these factors can significantly 

In [19]:
# Save to disk
storage_manager.save_to_folder("WriterCiterModelResponses", writer_citer_model_responses, f"{trial_number}.json")

Data saved to ../data/WriterCiterModelResponses/10.json
