# Week 1 Day 5 - An AI assistant that generates insurance marketing emails for prospective customers

This notebook implements an AI assistant that generates insurance marketing emails for prospective customers.

### Features
- Receives website urls and a customer name (defaulting to "Customer")
- Scraps through the given website url and generates a marketing email with all the insurance information that a user may need.

In [4]:
# imports
# If these fail, please check you're running from an 'activated' environment with (llms) in the command prompt

import os
import sys

base_path = os.path.abspath(os.path.join(os.getcwd(), "../../"))
sys.path.append(base_path)

print(base_path)

from scraper import fetch_website_links

import json
from dotenv import load_dotenv
from IPython.display import Markdown, display, update_display
from website_scraper import fetch_website_contents
from openai import OpenAI



/Users/johnmboga/Documents/Applications/Andela AI Bootcamp/llm_engineering/week1


In [31]:
# Initialize and constants

load_dotenv(override=True)
api_key = os.getenv('OPENROUTER_API_KEY')

if api_key and api_key.startswith('sk-or-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    
MODEL = 'gpt-5-nano'
openai = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=api_key)

API key looks good so far


In [32]:
#website url to scrape
website_url = "https://ke.cicinsurancegroup.com/"
company_name = "CIC Insurance Group"

In [33]:
#system prompt for the OpenAI API
link_system_prompt = """
You are provided with a list of links found on an insurance company’s website.

Your task is to identify links that are most likely to contain information about the insurance products offered by the company.

Prioritize links such as:
- Product overview pages
- Insurance category pages (e.g., auto, health, life, travel, commercial)
- Coverage or policy description pages
- Solutions or offerings pages

Ignore links such as:
- Careers / Jobs
- Blog / News / Press releases
- Investor relations
- Privacy policy / Terms
- Login / Signup
- Contact pages

Only include links that are highly likely to describe actual insurance products or coverage offerings.

Respond in JSON format:

{
    "product_pages": [
        {"category_hint": "auto insurance", "url": "https://example.com/auto"},
        {"category_hint": "health insurance", "url": "https://example.com/health"}
    ]
}

Only return valid JSON. Do not include commentary.
"""


In [48]:
#method to construct messages for the openai api
def get_links_user_prompt(url):
    links = fetch_website_links(url)

    user_prompt = f"""
You are analyzing links from the insurance company website: {url}

Your goal is to identify links that are most likely to contain information about
the insurance products or coverage offerings provided by the company.

Only choose from the links listed below.
Do NOT invent URLs.
Ignore:
- Careers / Jobs
- Blog / News / Press
- Investor relations
- Terms / Privacy
- Contact / Login pages

Limit the number of links to 10.

Prefer:
- Product pages
- Insurance category pages (auto, health, life, travel, commercial, etc.)
- Coverage description pages
- Solutions / offerings pages

Return ONLY valid JSON in this format:

{{
    "product_pages": [
        {{"type": "product 1", "url": "FULL_HTTPS_URL"}},
        {{"type": "product 2", "url": "FULL_HTTPS_URL"}}
    ]
}}

Links:
"""

    user_prompt += "\n".join(links)
    return user_prompt


In [49]:
#method to construct messages for the get_links_user_prompt
def messages_for(url):
    return [
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(url)}
        ]

In [50]:
# method to select relevant links from the website
def select_relevant_links(url):
    print(f"Selecting relevant links for {url} by calling {MODEL}")
    messages = messages_for(url)
    response = openai.chat.completions.create(
        model=MODEL,
        messages=messages,
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    links = json.loads(result)
    print(links)
    print(f"Found {len(links['product_pages'])} relevant links")
    return links

In [51]:
# method to fetch the insurance company's products 
def fetch_page_and_all_relevant_links(url):
    contents = fetch_website_contents(url)
    relevant_links = select_relevant_links(url)
    result = f"## Landing Page:\n\n{contents}\n## Relevant Links:\n"
    for link in relevant_links['product_pages']:
        result += f"\n\n### Link: {link['type']}\n"
        result += fetch_website_contents(link["url"])
    return result

In [52]:
#system prompt for the marketing email
email_system_prompt = """
You are an assistant that creates a concise marketing email for an insurance company
based on the content of the company’s product pages.

Your email should:
- Highlight the insurance products and coverage offered (auto, health, life, travel, commercial, etc.)
- Emphasize key benefits and features of each product
- Include information about the company’s values or culture if available
- Optionally mention target customers or market segments
- Include career opportunities only if explicitly mentioned on the product pages

Requirements:
- Use persuasive, professional, and approachable language suitable for prospective customers, investors, and recruits
- Keep the email concise (1–3 short paragraphs)
- Use plain markdown; do not include code blocks
- Only include information found on the provided pages; do not invent facts

Respond with a ready-to-send marketing email.
"""

In [15]:
#method to construct messages for the get_email_user_prompt
def get_email_user_prompt(customer_name, company_name, url):
    user_prompt = f"""
You are analyzing content from the insurance company "{company_name}".
Below are the contents of its landing page and all relevant product pages identified for the company.
Use this information to create a concise, persuasive marketing email in markdown (no code blocks).

The email which should be addressed to {customer_name} should:
- Highlight the insurance products offered (auto, health, life, travel, commercial, etc.)
- Emphasize key benefits, coverage options, and features
- Include company values or culture if mentioned on the pages
- Optionally mention target customers or market segments
- Include career opportunities only if explicitly present on the product pages

Do NOT:
- Invent any information
- Include unrelated pages such as blogs, press releases, terms, privacy, or contact info

Respond with a ready-to-send marketing email suitable for prospective customers, investors, and recruits.
"""

    user_prompt += fetch_page_and_all_relevant_links(url)
    user_prompt = user_prompt[:5_000]  # Truncate if more than 5,000 characters
    return user_prompt


In [53]:
#method to construct messages for the email_messages_for
def email_messages_for(customer_name, company_name, url):
    return [
        {"role": "system", "content": email_system_prompt},
        {"role": "user", "content": get_email_user_prompt(customer_name, company_name, url)}
    ]


In [54]:
# now we can stream the email
def stream_email(customer_name, company_name, url):
    print(f"Generating email for {customer_name} from {company_name} at {url}")
    messages = email_messages_for(customer_name, company_name, url)
    stream = openai.chat.completions.create(
        model="gpt-4.1-mini",
        messages=messages,
        stream=True
    )    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        update_display(Markdown(response), display_id=display_handle.display_id)

In [None]:
customer_name = "John Doe"
print(f"Hello, {customer_name}!")
stream_email(customer_name,company_name, website_url)