In [7]:
# Install necessary packages
!pip install langchain openai faiss-cpu pypdf langchain-openai



In [8]:
# Setup environment and API key
import os
from google.colab import userdata

os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')

In [9]:
# Imports
import glob
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser


In [10]:
# Load PDF documents
pdf_files = glob.glob("*.pdf")
print(f"Found {len(pdf_files)} PDF files: {pdf_files}")

# Load and combine documents
documents = []
for pdf_file in pdf_files:
    loader = PyPDFLoader(pdf_file)
    docs = loader.load()
    documents.extend(docs)
    print(f"Loaded {len(docs)} pages from {pdf_file}")

print(f"Total documents loaded: {len(documents)}")

Found 1 PDF files: ['Businessowners-Policy-Cov-Robert-Richardson-2.pdf']
Loaded 414 pages from Businessowners-Policy-Cov-Robert-Richardson-2.pdf
Total documents loaded: 414


In [11]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
doc_chunks = text_splitter.split_documents(documents)
print(f"Split documents into {len(doc_chunks)} chunks.")


Split documents into 1002 chunks.


In [12]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

embeddings_openai = OpenAIEmbeddings()

vectorstore_openai = FAISS.from_documents(doc_chunks, embeddings_openai)
vectorstore_openai.save_local("faiss_bop_index")

In [13]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
retriever = vectorstore_openai.as_retriever(search_kwargs={"k": 8})

# Initialize LLM
llm = ChatOpenAI(model_name="gpt-4", temperature=0.3)

# Prompt Templates
prompt_template_appetite = PromptTemplate(
    input_variables=["business_type", "retrieved_docs"],
    template="""
    Based on the following underwriting guidelines, determine the appetite guidelines
    for the business type: {business_type}.

    Consider different qualifications or variations such as:
    - Office only vs. Office and Manufacturing
    - Retail excluding products vs. including products
    - Businesses with or without alcohol sales

    Examples:
    - Preferred: Retail excluding high-risk products
    - Acceptable: Restaurants selling alcohol below 25%
    - Declined: Nightclubs, heavy alcohol consumption businesses

    Underwriting Guidelines:
    {retrieved_docs}

    Clearly list out preferred, acceptable, and declined categories with brief reasoning.
    """
)

prompt_template_restrictions = PromptTemplate(
    input_variables=["business_type", "retrieved_docs"],
    template="""
    Based on the following underwriting guidelines, detail the specific restrictions
    that apply to insuring businesses of the type: {business_type}.

    Consider:
    - Size of buildings or companies
    - Mandatory practices or policies
    - Preferences that aren't mandatory but strongly recommended

    Examples:
    - Must have fire suppression system for restaurants
    - Manufacturing buildings must be under 10,000 sq ft
    - Companies must not handle hazardous materials without proper certification

    Underwriting Guidelines:
    {retrieved_docs}

    Clearly categorize restrictions as mandatory (deal-breakers) or preferences (strongly recommended).
    """
)

In [14]:
def agentic_pipeline(business_type):
    retrieved_docs = retriever.get_relevant_documents(business_type)
    doc_text = "\n".join([doc.page_content for doc in retrieved_docs])

    # Generate first draft responses using RunnableSequence
    appetite_chain = prompt_template_appetite | llm
    restrictions_chain = prompt_template_restrictions | llm

    # Initial Response
    response_appetite = appetite_chain.invoke({"business_type": business_type, "retrieved_docs": doc_text}).content
    response_restrictions = restrictions_chain.invoke({"business_type": business_type, "retrieved_docs": doc_text}).content

    # Reflection prompt
    reflection_prompt = PromptTemplate(
        input_variables=["response", "retrieved_docs"],
        template="""
        Critically evaluate the following response for accuracy, clarity, and completeness based on the provided underwriting guidelines.
        If improvements or corrections are needed, rewrite the response.
        If the response is satisfactory, repeat it unchanged.

        Response:
        {response}

        Guidelines:
        {retrieved_docs}

        Evaluated and improved response in Markdown format:
        """
    )

    reflection_chain = reflection_prompt | llm

    # Reflection and improvement steps
    improved_appetite = reflection_chain.invoke({"response": response_appetite, "retrieved_docs": doc_text}).content
    improved_restrictions = reflection_chain.invoke({"response": response_restrictions, "retrieved_docs": doc_text}).content

    # Combine responses in markdown format
    markdown_output = f"## {business_type}\n\n### Appetite Guidelines\n\n{improved_appetite}\n\n### Specific Restrictions\n\n{improved_restrictions}\n"

    return markdown_output



In [15]:
# Example usage
business_type = "Restaurants"
markdown_result = agentic_pipeline(business_type)

print(markdown_result)

  retrieved_docs = retriever.get_relevant_documents(business_type)


## Restaurants

### Appetite Guidelines

Preferred:
1. Limited Cooking Restaurants: These restaurants are preferred due to their lower risk profile. They are characterized by a maximum floor area of 7,500 square feet, a seating capacity no greater than 75, and sales of beer or wine only, which should not exceed 25% of total sales. They do not have a bar or cocktail lounge and catering service must not exceed 10% of total sales. This helps to reduce the risk of alcohol-related incidents and the smaller size and seating capacity limit potential liabilities.

Acceptable:
1. Fine Dining Restaurants: These are acceptable due to their moderate risk profile. They have a maximum floor area of 7,500 square feet, seating capacity no greater than 150, and sales of beer, wine or liquor no greater than 75% of total sales. Catering must not exceed 15% of total sales and there are no bar operations during hours when full table service is not available. This indicates a controlled environment with a f

In [16]:


# Initialize markdown file with YAML front matter
markdown_filename = "bop_guidelines.md"

front_matter = """---
title: "Business Owners Policy Underwriting Guidelines"
author: "Automated Agentic Underwriting Assistant"
date: "2024-04-28"
output: pdf_document
---\n\n"""

with open(markdown_filename, "w") as md_file:
    md_file.write(front_matter)

# Read business types from file and apply agentic pipeline
with open("bop_categories.txt", "r") as categories_file:
    business_types = [line.strip() for line in categories_file if line.strip()]



In [17]:
# Loop through each business type and append to markdown file
for idx, business_type in enumerate(business_types):
    print(f"Processing {idx+1}/{len(business_types)}: {business_type}")

    markdown_section = agentic_pipeline(business_type)

    with open(markdown_filename, "a") as md_file:
        md_file.write(markdown_section)
        md_file.write("\n\n---\n\n")

print(f"Markdown guidelines successfully created in '{markdown_filename}'.")

Processing 1/142: Accounting & Financial Services
Processing 2/142: Actuarial & Appraisal Services
Processing 3/142: Adult Day Care
Processing 4/142: Advertising Agencies
Processing 5/142: Air Conditioning & Heating (Sales/Service/Manufacturing)
Processing 6/142: Alarm System Installation
Processing 7/142: Ambulance & Emergency Services
Processing 8/142: Animal & Veterinary Services
Processing 9/142: Answering & Telemarketing Services
Processing 10/142: Antique & Collectible Stores
Processing 11/142: Appliance Sales & Repair
Processing 12/142: Architectural Services
Processing 13/142: Army/Navy/Military Surplus Stores
Processing 14/142: Art Galleries & Art Supply Stores
Processing 15/142: Artificial Flowers & Floral Supplies
Processing 16/142: Artists & Craft Studios
Processing 17/142: Asphalt & Paving Services
Processing 18/142: Assembly & Packaging Services
Processing 19/142: Associations (Business, Professional, Civic, Alumni, Trade)
Processing 20/142: Audio & Visual Equipment Sales

In [20]:
# Additional code to generate global requirements in an agentic manner

# Define global requirement generation prompt
global_requirements_prompt = PromptTemplate(
    input_variables=["retrieved_docs"],
    template="""
    Based on the following underwriting guidelines, identify and list global underwriting requirements
    that could apply to all business types universally. These should be broad standards or rules applicable
    across various businesses, irrespective of their specific category.

    Guidelines:
    {retrieved_docs}

    Respond with a concise numbered list.
    """
)

# Run initial global requirements generation
initial_requirements_chain = {"retrieved_docs": lambda _: "\n".join([doc.page_content for doc in retriever.get_relevant_documents("global underwriting requirements")])} | global_requirements_prompt | llm | StrOutputParser()

initial_global_requirements = initial_requirements_chain.invoke("")
print("Initial Global Requirements:")
print(initial_global_requirements)

Initial Global Requirements:
1. All businesses must adhere to the policy’s applicable perils, limits of insurance, and other applicable policy terms and provisions.
2. Businesses must comply with the dollar limitations on annual gross sales and contractor payroll as per the policy.
3. Liability coverage applies to businesses operating in multiple countries, based on where the liability is determined.
4. The insurer has the right to inspect the business owner’s premises or operations at any time and provide the business owner with reports on conditions at the premises.
5. The insurer can recommend changes based on inspections, surveys, reports, or recommendations, which relate only to insurability and the premiums to be charged.
6. The insurer is not responsible for injury, damages, or costs incurred because of a violation of a safety code not disclosed by the insurer’s inspector.
7. Businesses must adhere to the square footage limitations and gross sales limitations for their specific 

In [28]:
# Parse requirements into list
requirements_list = [line.strip() for line in initial_global_requirements.split('\n') if line.strip()]

# Define detailed refinement prompt
detailed_requirement_prompt = PromptTemplate(
    input_variables=["requirement", "retrieved_docs"],
    template="""
    You are writing details in a business's underwriting policy guide for Business Owner Policies. Provide a detailed explanation and categorization (mandatory or recommended) of the following global underwriting requirement:

    Requirement:
    {requirement}

    Guidelines:
    {retrieved_docs}

    Respond clearly in Markdown format.
    """
)

# Append to markdown file
with open(markdown_filename, "a") as md_file:
    md_file.write("## Global Underwriting Requirements\n\n")

    for idx, requirement in enumerate(requirements_list):
        print(f"Refining global requirement {idx+1}/{len(requirements_list)}")
        retrieved_docs = "\n".join([doc.page_content for doc in retriever.get_relevant_documents(requirement)])

        detailed_chain = (
            {"requirement": lambda _: requirement, "retrieved_docs": lambda _: retrieved_docs}
            | detailed_requirement_prompt
            | llm
            | StrOutputParser()
        )

        detailed_response = detailed_chain.invoke({})

        md_file.write(detailed_response)
        md_file.write("\n\n---\n\n")

print(f"Global underwriting requirements successfully appended to '{markdown_filename}'.")


Refining global requirement 1/11
Refining global requirement 2/11
Refining global requirement 3/11
Refining global requirement 4/11
Refining global requirement 5/11
Refining global requirement 6/11
Refining global requirement 7/11
Refining global requirement 8/11
Refining global requirement 9/11
Refining global requirement 10/11
Refining global requirement 11/11
Global underwriting requirements successfully appended to 'bop_guidelines.md'.


In [24]:
# Additional code to label each business and append a summary table
import re
import random

# Read the existing markdown file to extract business names
with open("bop_guidelines.md", "r") as md_file:
    markdown_content = md_file.read()

# Extract business names from markdown headers
business_names = re.findall(r'^##\s+(.*)$', markdown_content, re.MULTILINE)


In [25]:

# Define prompt to categorize each business
status_prompt = PromptTemplate(
    input_variables=["business_name", "business_details"],
    template="""
    Review the underwriting details for the following business:

    Business Name:
    {business_name}

    Details:
    {business_details}

    Categorize the business clearly into one of the following statuses based on the ease or difficulty of meeting the listed requirements:
    - targeted (highly desirable)
    - acceptable (requirements easily met)
    - limited (some challenges, but feasible)
    - not acceptable (requirements too difficult to meet)

    Respond with just the status.
    """
)

In [26]:


# Initialize a dictionary to hold business statuses
business_statuses = []

# Process each business to get status
for idx, business_name in enumerate(business_names):
    print(f"Evaluating status for {business_name} ({idx+1}/{len(business_names)})")

    # Extract detailed section for the business
    pattern = rf"## {re.escape(business_name)}(.*?)---"
    match = re.search(pattern, markdown_content, re.DOTALL)
    business_details = match.group(1).strip() if match else "No details found."

    status_chain = (
        {"business_name": lambda _: business_name, "business_details": lambda _: business_details}
        | status_prompt
        | llm
        | StrOutputParser()
    )

    status = status_chain.invoke({}).strip()
    business_code = random.randint(10000, 99999)

    business_statuses.append({"name": business_name, "code": business_code, "status": status})

# Append the summary table to markdown file
with open("bop_guidelines.md", "a") as md_file:
    md_file.write("\n## Business Categorization Summary\n\n")
    md_file.write("| Name | Code | Status |\n")
    md_file.write("|------|------|--------|\n")

    for entry in business_statuses:
        md_file.write(f"| {entry['name']} | {entry['code']} | {entry['status']} |\n")

print("Business categorization summary successfully appended to 'bop_guidelines.md'.")


Evaluating status for Accounting & Financial Services (1/144)
Evaluating status for Actuarial & Appraisal Services (2/144)
Evaluating status for Adult Day Care (3/144)
Evaluating status for Advertising Agencies (4/144)
Evaluating status for Air Conditioning & Heating (Sales/Service/Manufacturing) (5/144)
Evaluating status for Alarm System Installation (6/144)
Evaluating status for Ambulance & Emergency Services (7/144)
Evaluating status for Animal & Veterinary Services (8/144)
Evaluating status for Answering & Telemarketing Services (9/144)
Evaluating status for Antique & Collectible Stores (10/144)
Evaluating status for Appliance Sales & Repair (11/144)
Evaluating status for Architectural Services (12/144)
Evaluating status for Army/Navy/Military Surplus Stores (13/144)
Evaluating status for Art Galleries & Art Supply Stores (14/144)
Evaluating status for Artificial Flowers & Floral Supplies (15/144)
Evaluating status for Artists & Craft Studios (16/144)
Evaluating status for Asphalt 

In [32]:
!apt-get install pandoc texlive-xetex -y
!pandoc bop_guidelines_draft.md -o bop_guidelines_draft.pdf --pdf-engine=xelatex --toc
