In [1]:
from dotenv import dotenv_values
import requests

from helpers.google_interactor import GoogleInteractor
from helpers.html_parser import HTMLParser 
from helpers.storage_manager import StorageManager
from helpers.refiner_model import RefinerModel

# Load environment variables
config = dotenv_values("../.env")
google_api_key = config['GOOGLE_API_KEY']
search_engine_id = config['SEARCH_ENGINE_ID']

# Instantiate helpers
google_interactor = GoogleInteractor(api_key=google_api_key, search_engine_id=search_engine_id)
storage_manager = StorageManager("../data/")    
website_refiner = RefinerModel(api_key=config['OPEN_AI_API_KEY'])
corpus_refiner = RefinerModel(api_key=config['OPEN_AI_API_KEY'])

# Load the clean corpus text
with open("../data/clean_content.txt") as f:
    corpus_text = f.read()

# Number of links to consider
K = 5

# Declare trial number
trial_number = 1

# Select a query to use for the current trial
query = "Where to buy life insurance in Mississippi?"

# Submit the query to Google
search_results = google_interactor.search_google(query=query, num_results=10)
    
# Store the response
storage_manager.save_to_folder("InitialGoogleResponses", search_results, f"{trial_number}.json")

# Retrieve the stored response
search_results = storage_manager.load_from_folder("InitialGoogleResponses", f"{trial_number}.json")


raw_text_from_html = []
items = search_results['items']

# For each item of raw text that was stored ...
for item in search_results['items'][:K]:
    link = item['link']

    # Fet the HTML associated with the link
    response = requests.get(link)

    # Get the raw text associated with the HTML
    parsed_text = HTMLParser.parse_html(response.content)

    # Store the raw text from the HTML
    raw_text_from_html.append({"link" : link, "raw_text" : parsed_text})
    
storage_manager.save_to_folder("RawTextFromHTML", raw_text_from_html, f"{trial_number}.json")

Data saved to ../data/InitialGoogleResponses/1.json
Data loaded from ../data/InitialGoogleResponses/1.json
Data saved to ../data/RawTextFromHTML/1.json


In [2]:
raw_text_from_html = storage_manager.load_from_folder("RawTextFromHTML", f"{trial_number}.json")

website_refiner_responses = []
# For each element of raw text that was stored ...
for current_item in raw_text_from_html:
    
    link = current_item['link']
    raw_text = current_item['raw_text']   

    
    response_from_website_refiner = website_refiner.refine_text(raw_text=raw_text, query=query)
    
    if response_from_website_refiner:
        website_refiner_responses.append(response_from_website_refiner)
    else:
        print("Failed to refine text.")

website_refiner_responses

Data loaded from ../data/RawTextFromHTML/1.json


[{'id': 'chatcmpl-AWp8QS26E5CnZLDtuCLzkaIsfbXML',
  'choices': [{'finish_reason': 'stop',
    'index': 0,
    'logprobs': None,
    'message': {'content': "To buy life insurance in Mississippi, you can consider the following steps:\n\n1. **Visit the Mississippi Insurance Department's Official Website**: They provide valuable resources and information about life insurance, including purchasing tips and coverage options.\n   \n2. **Search for Licensed Insurance Agents**: Use the licensing search feature on the Mississippi Insurance Department's website to find licensed insurance agents in your area who can assist you in purchasing life insurance.\n\n3. **Shop Around**: It's recommended to compare different insurance policies and talk to various agents to find the best coverage and rates tailored to your needs.\n\n4. **Contact Consumer Assistance**: For further guidance, you can call the toll-free Mississippi Insurance Department Consumer Help Line at 800-562-2957 or 601-359-2453.\n\nThes

In [3]:
storage_manager.save_to_folder("WebsiteRefinerModelResponses", website_refiner_responses, f"{trial_number}.json")

Data saved to ../data/WebsiteRefinerModelResponses/1.json


In [4]:
# Use the first raw text for refinement
# response_from_website_refiner = website_refiner.refine_text(raw_text=raw_text, query=query)

response_from_corpus_refiner = corpus_refiner.refine_text(raw_text=corpus_text, query=query)
response_from_corpus_refiner

{'id': 'chatcmpl-AWp8fdXGZ6G3Tvcs04GNCXeJfeKuC',
 'choices': [{'finish_reason': 'stop',
   'index': 0,
   'logprobs': None,
   'message': {'content': 'You can buy life insurance in Mississippi from Southern Farm Bureau Life Insurance Company. They offer a variety of life insurance products, including:\n\n1. **Permanent Life Insurance**: Whole life, single premium, modified premium, and adjustable premium options.\n2. **Term Life Insurance**: Available in 10, 20, and 30-year intervals.\n\n### Contact Information:\n- **Address**: 1401 Livingston Lane, Jackson, MS 39213\n- **Phone**: \n  - Customer Service: 601-981-7422\n  - Death Claims: 1-800-562-0919\n  - Policy Service: 1-800-457-9611\n- **Website**: [sfbli.com](https://www.sfbli.com) \n\nSouthern Farm Bureau Life Insurance Company is known for competitive products and superior customer support.',
    'refusal': None,
    'role': 'assistant',
    'audio': None,
    'function_call': None,
    'tool_calls': None}}],
 'created': 17323864

In [5]:
storage_manager.save_to_folder("CorpusRefinerModelResponses", response_from_corpus_refiner, f"{trial_number}.json")

Data saved to ../data/CorpusRefinerModelResponses/1.json
