In [1]:
from dotenv import dotenv_values
import requests

from helpers.google_interactor import GoogleInteractor
from helpers.html_parser import HTMLParser 
from helpers.storage_manager import StorageManager
from helpers.website_refiner_model import WebsiteRefinerModel

# Load environment variables
config = dotenv_values("../.env")
google_api_key = config['GOOGLE_API_KEY']
search_engine_id = config['SEARCH_ENGINE_ID']

# Declare trial number
trial_number = 1
 
google_interactor = GoogleInteractor(api_key=google_api_key, search_engine_id=search_engine_id)
storage_manager = StorageManager("../data/")    
website_refiner = WebsiteRefinerModel(api_key=config['OPEN_AI_API_KEY'])

query = "Where to buy life insurance in Mississippi?"
search_results = google_interactor.search_google(query=query, num_results=10)
    

storage_manager.save_to_folder("InitialGoogleResponses", search_results, f"{trial_number}.json")
search_results = storage_manager.load_from_folder("InitialGoogleResponses", f"{trial_number}.json")


items = search_results['items']

raw_text_from_html = []

for item in search_results['items']:
    link = item['link']
    response = requests.get(link)
    parsed_text = HTMLParser.parse_html(response.content)
    raw_text_from_html.append({"link" : link, "raw_text" : parsed_text})
    
storage_manager.save_to_folder("RawTextFromHTML", raw_text_from_html, f"{trial_number}.json")

Data saved to ../data/InitialGoogleResponses/1.json
Data loaded from ../data/InitialGoogleResponses/1.json


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Data saved to ../data/RawTextFromHTML/1.json


In [2]:
raw_text_from_html = storage_manager.load_from_folder("RawTextFromHTML", f"{trial_number}.json")
current_item = raw_text_from_html[0]
raw_text_for_current_item = current_item['raw_text']
raw_text_for_current_item

Data loaded from ../data/RawTextFromHTML/1.json


"\n\n\n\n\n\n\n\n\n\n\n\nMississippi Insurance Department - Official Website of the Mississippi Insurance Department\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nSkip to content\n\n\r\n\t\tMain Navigation\t\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nSearch for:\n\n\n\n\n\n\nConsumers\n\nUnderstanding Premiums and Rates in MississippiQuestions and answers about premiums and rates in Mississippi.\nAuto InsuranceState law requirements, shopping tips, lowering costs, filing a claim, and more.\nCompany InformationRating services, market share reports, and examination reports.\nConsumer AlertsIssued by the Mississippi Insurance Department to inform consumers of dangers, threats, and problems.\nConsumer ResourcesHelpful insurance-related resources for consumers.\nFight Insurance FraudHow fraud affects you, agent fraud schemes, fraudulent claims, fraud against seniors, business insurance fraud, reporting insurance fraud, and more.\nFile a ComplaintFile online complaints, downlo

In [3]:
# Use the first raw text for refinement
refined_text = website_refiner.refine_text(raw_text=raw_text_for_current_item, query=query)

if refined_text:
    print("Refined Text:")
    print(refined_text)
else:
    print("Failed to refine text.")


Refined Text:
{'id': 'chatcmpl-AWnTNlk0FpUfzyfLYlkNYIs8WTHCw', 'choices': [{'finish_reason': 'stop', 'index': 0, 'logprobs': None, 'message': {'content': 'To buy life insurance in Mississippi, you can visit the official website of the Mississippi Insurance Department (MID) at MID.MS.GOV for resources and information. Additionally, you can find licensed insurance agents in your area by contacting the Mississippi Insurance Department Consumer Help Line at 800-562-2957 or 601-359-2453. They provide assistance and guidance on finding suitable insurance coverage, including life insurance options.', 'refusal': None, 'role': 'assistant', 'audio': None, 'function_call': None, 'tool_calls': None}}], 'created': 1732380033, 'model': 'gpt-4o-mini-2024-07-18', 'object': 'chat.completion', 'service_tier': None, 'system_fingerprint': 'fp_0705bf87c0', 'usage': {'completion_tokens': 85, 'prompt_tokens': 2761, 'total_tokens': 2846, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_to

In [4]:
storage_manager.save_to_folder("WebsiteRefinerModelResponses", refined_text, f"{trial_number}.json")

Data saved to ../data/WebsiteRefinerModelResponses/1.json
