In [1]:
from dotenv import dotenv_values
import requests

from helpers.google_interactor import GoogleInteractor
from helpers.html_parser import HTMLParser 
from helpers.storage_manager import StorageManager
from helpers.refiner_model import RefinerModel
from helpers.improver_model import ImproverModel
from helpers.writer_citer_model import WriterCiterModel


"""
============================================
Run through a single trial of the experiment
============================================
"""

"""
Set up
"""

# Number of links to consider
K = 5

# Declare trial number
trial_number = 6

"""
Configuration
"""

sfblic_link = "https://www.sfbli.com/"

# Load environment variables
config = dotenv_values("../.env")
google_api_key = config['GOOGLE_API_KEY']
search_engine_id = config['SEARCH_ENGINE_ID']

# Instantiate helpers
google_interactor = GoogleInteractor(api_key=google_api_key, search_engine_id=search_engine_id)
storage_manager = StorageManager("../data/")    
website_refiner = RefinerModel(api_key=config['OPEN_AI_API_KEY'])
corpus_refiner = RefinerModel(api_key=config['OPEN_AI_API_KEY'])
improver_model = ImproverModel(api_key=config['OPEN_AI_API_KEY'])
writer_citer = WriterCiterModel(api_key=config['OPEN_AI_API_KEY'])


# Load the clean corpus text
with open("../data/clean_content.txt") as f:
    corpus_text = f.read()


# Select a query to use for the current trial
query = "Why do I need life insurance?"

In [2]:
"""
Submit the query to Google and store the results
"""

# Submit the query to Google
search_results = google_interactor.search_google(query=query, num_results=10)
    
# Store the response
storage_manager.save_to_folder("InitialGoogleResponses", search_results, f"{trial_number}.json")

Data saved to ../data/InitialGoogleResponses/6.json


In [3]:
"""
View links
"""

top_k_links = [item['link'] for item in search_results['items']][:K]
top_k_links

['https://www.reddit.com/r/FinancialPlanning/comments/12vb4s6/do_any_of_you_have_life_insurance_and_is_there/',
 'https://www.tdi.texas.gov/tips/life-insurance.html',
 'https://www.newyorklife.com/articles/six-reasons-to-buy-life-insurance',
 'https://www.reddit.com/r/LifeInsurance/comments/171nrj4/what_are_the_benefits_of_life_insurance_and_when/',
 'https://www.securian.com/insights-tools/articles/why-do-i-need-life-insurance.html']

In [4]:
"""
Check to see if SFBLIC is in the top K links. 
If not, add it
"""

if sfblic_link in top_k_links:
    updated_links = top_k_links
else:
    top_k_links[K-1] = sfblic_link
    updated_links = top_k_links

updated_links

['https://www.reddit.com/r/FinancialPlanning/comments/12vb4s6/do_any_of_you_have_life_insurance_and_is_there/',
 'https://www.tdi.texas.gov/tips/life-insurance.html',
 'https://www.newyorklife.com/articles/six-reasons-to-buy-life-insurance',
 'https://www.reddit.com/r/LifeInsurance/comments/171nrj4/what_are_the_benefits_of_life_insurance_and_when/',
 'https://www.sfbli.com/']

In [5]:
storage_manager.save_to_folder("UpdatedGoogleResponses", updated_links, f"{trial_number}.json")

Data saved to ../data/UpdatedGoogleResponses/6.json


In [6]:
"""
Get Raw Text from the HTML associated with the links from Google
"""

# Retrieve the stored response
updated_links = storage_manager.load_from_folder("UpdatedGoogleResponses", f"{trial_number}.json")

raw_text_from_html = []

# For each item of raw text that was stored ...
for link in updated_links:    

    # Fet the HTML associated with the link
    response = requests.get(link)

    # Get the raw text associated with the HTML
    parsed_text = HTMLParser.parse_html(response.content)

    # Store the raw text from the HTML
    raw_text_from_html.append({"link" : link, "raw_text" : parsed_text})
    
storage_manager.save_to_folder("RawTextFromHTML", raw_text_from_html, f"{trial_number}.json")

Data loaded from ../data/UpdatedGoogleResponses/6.json
Data saved to ../data/RawTextFromHTML/6.json


In [7]:
"""
Get the refined text from the raw text
"""

raw_text_from_html = storage_manager.load_from_folder("RawTextFromHTML", f"{trial_number}.json")

website_refiner_responses = []
# For each element of raw text that was stored ...
for current_item in raw_text_from_html:
    
    link = current_item['link']
    raw_text = current_item['raw_text']   

    
    response_from_website_refiner = website_refiner.refine_text(raw_text=raw_text, query=query)
    
    if response_from_website_refiner:
        response_from_website_refiner['link'] = link
        website_refiner_responses.append(response_from_website_refiner)
    else:
        print("Failed to refine text.")

storage_manager.save_to_folder("WebsiteRefinerModelResponses", website_refiner_responses, f"{trial_number}.json")

Data loaded from ../data/RawTextFromHTML/6.json
Data saved to ../data/WebsiteRefinerModelResponses/6.json


In [8]:
for item in website_refiner_responses:
    print(item['link'])
    print(item['choices'][0]['message']['content'])
    print()

https://www.reddit.com/r/FinancialPlanning/comments/12vb4s6/do_any_of_you_have_life_insurance_and_is_there/
THERE IS NO RELEVANT INFORMATION IN THE RAW TEXT

https://www.tdi.texas.gov/tips/life-insurance.html
Life insurance is important to help your loved ones with financial needs if you aren’t there anymore. It considers factors such as your mortgage, other debts, income replacement, funeral costs, and education for your children. If you have a family that depends on your financial support or if you have a mortgage, term life insurance could be a sensible option. Permanent life insurance provides coverage for your entire life and may have investment features, but it typically costs more than term insurance.

https://www.newyorklife.com/articles/six-reasons-to-buy-life-insurance
Life insurance is important because it protects your spouse and children from potential financial losses if something were to happen to you. It provides financial security, helps cover debts, living expenses, a

In [9]:
"""
Filter the corpus to information that may be relevant to the query
"""

response_from_corpus_refiner = corpus_refiner.refine_text(raw_text=corpus_text, query=query)
storage_manager.save_to_folder("CorpusRefinerModelResponses", response_from_corpus_refiner, f"{trial_number}.json")

Data saved to ../data/CorpusRefinerModelResponses/6.json


In [10]:
print(response_from_corpus_refiner['choices'][0]['message']['content'])

Life insurance is necessary to help your loved ones with financial needs if you aren’t there anymore. It considers factors such as your age, financial situation, and if you have people who depend on your income. Important considerations include covering debts like a mortgage, replacing your income, covering funeral costs, and funding college for children.

Term life insurance is suitable for those with dependents or a mortgage and is generally the least expensive option. It provides coverage for a specified period. Permanent life insurance offers coverage for your entire life but is more costly and complex.

Having life insurance is a way to ensure that your beneficiaries—such as a spouse, children, or other family members—are financially supported after your death.

Life insurance is important because it protects your spouse and children from potentially devastating financial losses if something happens to you. It provides financial security, helps pay off debts, living expenses, and 

In [11]:
"""
Improve the text from the SFBLIC site
"""

# Get the refined text from the website
website_refiner_responses = storage_manager.load_from_folder("WebsiteRefinerModelResponses", f"{trial_number}.json")
sfblic_response = [response for response in website_refiner_responses if response['link'] == 'https://www.sfbli.com/'][0]
refined_text_from_website = sfblic_response['choices'][0]['message']['content']

# Get the refined text from the corpus
response_from_corpus_refiner = storage_manager.load_from_folder("CorpusRefinerModelResponses", f"{trial_number}.json")
refined_text_from_corpus = response_from_corpus_refiner['choices'][0]['message']['content']


# Generate improved text
improved_text = improver_model.improve_text(
    query=query,
    refined_text_from_website=refined_text_from_website,
    refined_text_from_corpus=refined_text_from_corpus
)

if improved_text:
    print("Improved Text generated...")
    
else:
    print("Failed to improve text.")


# Save to disk
storage_manager.save_to_folder("ImproverModelResponses", improved_text, f"{trial_number}.json")


Data loaded from ../data/WebsiteRefinerModelResponses/6.json
Data loaded from ../data/CorpusRefinerModelResponses/6.json
Improved Text generated...
Data saved to ../data/ImproverModelResponses/6.json


In [12]:
print(refined_text_from_website)

THERE IS NO RELEVANT INFORMATION IN THE RAW TEXT


In [13]:
print(improved_text['choices'][0]['message']['content'])

Life insurance is essential to ensure your loved ones are financially protected if you are no longer around. It helps cover debts like a mortgage, replaces your income, covers funeral expenses, and can even fund college for your children. Life insurance provides a financial safety net, ensuring that your family maintains their living standard and is not burdened by economic hardships. The payout your beneficiaries receive is generally tax-free, providing full benefits without additional financial strain. Life insurance gives peace of mind by securing a financial future for your loved ones, acting as a crucial part of financial planning.


In [14]:
"""
Create multiple sets of reference material
"""

# Create a set of reference material that has the original (unimproved) content from the website

website_refiner_model_responses = storage_manager.load_from_folder("WebsiteRefinerModelResponses", f"{trial_number}.json")

unmodified_set_of_reference_material =  [
    {
        "link" : item['link'],
        "information" : item['choices'][0]['message']['content']
    }
    for item in website_refiner_model_responses
]

# Create a set of reference material that has the new (improved) content
improved_text = storage_manager.load_from_folder('ImproverModelResponses', f"{trial_number}.json")
improved_version = improved_text['choices'][0]['message']['content']


# Create a new set of reference material

new_set_of_reference_material = []

for item in unmodified_set_of_reference_material:
    if item['link'] == 'https://www.sfbli.com/':
        new_set_of_reference_material.append({"link" : link, "information" : improved_version})
    else:
        new_set_of_reference_material.append(item)



Data loaded from ../data/WebsiteRefinerModelResponses/6.json
Data loaded from ../data/ImproverModelResponses/6.json


In [15]:
"""
Store Sets of Reference Material
"""

sets_of_reference_material = []

improvement_method = 0
set_of_reference_material = unmodified_set_of_reference_material

sets_of_reference_material.append({
    "improvement_method" : improvement_method,
    "set_of_reference_material" : set_of_reference_material,
})

improvement_method = 1
set_of_reference_material = new_set_of_reference_material

sets_of_reference_material.append({
    "improvement_method" : improvement_method,
    "set_of_reference_material" : set_of_reference_material,
})


storage_manager.save_to_folder("SetsOfReferenceMaterial", sets_of_reference_material, f"{trial_number}.json")



Data saved to ../data/SetsOfReferenceMaterial/6.json


In [17]:

"""
Send sets of reference material to the writer-citer
"""

sets_of_reference_material = storage_manager.load_from_folder("SetsOfReferenceMaterial", f"{trial_number}.json")



Data loaded from ../data/SetsOfReferenceMaterial/6.json


In [22]:
writer_citer_model_responses = [] 

In [20]:
set_of_reference_material = sets_of_reference_material[0]
improvement_method = set_of_reference_material['improvement_method']
reference_material = set_of_reference_material['set_of_reference_material']

# Generate a cited response
cited_response = writer_citer.generate_cited_response(
    rephrased_query=query,
    set_of_reference_query=reference_material
)



In [23]:
cited_response['improvement_method'] = improvement_method

writer_citer_model_responses.append(cited_response)
writer_citer_model_responses

[{'id': 'chatcmpl-AX9KvNCXvMd7h0egjcuc0iQuaMgzz',
  'choices': [{'finish_reason': 'stop',
    'index': 0,
    'logprobs': None,
    'message': {'content': 'Life insurance is a crucial financial tool that provides peace of mind by ensuring your loved ones are protected financially in your absence. Primarily, it offers a lump-sum payment to dependents, safeguarding them from potential financial losses and aiding in covering essential expenses such as debts, mortgage payments, childcare, education, and other living costs[2]. Additionally, it helps address final expenses, including medical bills and funeral costs[3].\n\nOne of the key benefits of life insurance is income replacement, which maintains the financial stability of your family, allowing them to sustain their lifestyle even if you are no longer around[3]. Moreover, the death benefit generally passes to beneficiaries without being subjected to federal income tax, adding another layer of financial relief[2].\n\nLife insurance can a

In [24]:
set_of_reference_material = sets_of_reference_material[1]
improvement_method = set_of_reference_material['improvement_method']
reference_material = set_of_reference_material['set_of_reference_material']

# Generate a cited response
cited_response = writer_citer.generate_cited_response(
    rephrased_query=query,
    set_of_reference_query=reference_material
)

cited_response['improvement_method'] = improvement_method

writer_citer_model_responses.append(cited_response)
writer_citer_model_responses

[{'id': 'chatcmpl-AX9KvNCXvMd7h0egjcuc0iQuaMgzz',
  'choices': [{'finish_reason': 'stop',
    'index': 0,
    'logprobs': None,
    'message': {'content': 'Life insurance is a crucial financial tool that provides peace of mind by ensuring your loved ones are protected financially in your absence. Primarily, it offers a lump-sum payment to dependents, safeguarding them from potential financial losses and aiding in covering essential expenses such as debts, mortgage payments, childcare, education, and other living costs[2]. Additionally, it helps address final expenses, including medical bills and funeral costs[3].\n\nOne of the key benefits of life insurance is income replacement, which maintains the financial stability of your family, allowing them to sustain their lifestyle even if you are no longer around[3]. Moreover, the death benefit generally passes to beneficiaries without being subjected to federal income tax, adding another layer of financial relief[2].\n\nLife insurance can a

In [25]:
for item in writer_citer_model_responses:
    print(item['choices'][0]['message']['content'])
    print("---")

Life insurance is a crucial financial tool that provides peace of mind by ensuring your loved ones are protected financially in your absence. Primarily, it offers a lump-sum payment to dependents, safeguarding them from potential financial losses and aiding in covering essential expenses such as debts, mortgage payments, childcare, education, and other living costs[2]. Additionally, it helps address final expenses, including medical bills and funeral costs[3].

One of the key benefits of life insurance is income replacement, which maintains the financial stability of your family, allowing them to sustain their lifestyle even if you are no longer around[3]. Moreover, the death benefit generally passes to beneficiaries without being subjected to federal income tax, adding another layer of financial relief[2].

Life insurance can also serve as an investment tool, particularly with whole life policies, which build cash value that grows tax-deferred and can be accessed for various financial

In [26]:
# Save to disk
storage_manager.save_to_folder("WriterCiterModelResponses", writer_citer_model_responses, f"{trial_number}.json")

Data saved to ../data/WriterCiterModelResponses/6.json
