In [None]:
# Install necessary packages
!pip install langchain openai faiss-cpu pypdf langchain-openai langchain-community

Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Collecting pypdf
  Downloading pypdf-5.4.0-py3-none-any.whl.metadata (7.3 kB)
Collecting langchain-openai
  Downloading langchain_openai-0.3.15-py3-none-any.whl.metadata (2.3 kB)
Collecting langchain-community
  Downloading langchain_community-0.3.23-py3-none-any.whl.metadata (2.5 kB)
Collecting tiktoken<1,>=0.7 (from langchain-openai)
  Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.9.1-py3-none-any.whl.metadata (3.8 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (f

In [None]:
# Setup environment and API key
import os
from google.colab import userdata

os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')

In [None]:
# Imports
import glob
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser


In [None]:
# Load PDF documents
pdf_files = glob.glob("*.pdf")
print(f"Found {len(pdf_files)} PDF files: {pdf_files}")

# Load and combine documents
documents = []
for pdf_file in pdf_files:
    loader = PyPDFLoader(pdf_file)
    docs = loader.load()
    documents.extend(docs)
    print(f"Loaded {len(docs)} pages from {pdf_file}")

print(f"Total documents loaded: {len(documents)}")

Found 5 PDF files: ['AFS_BOP_General_Guidelines_InfoSheet_MKT5674.pdf', 'ARIC Underwriting Guide 7-2012.pdf', 'Attune-Appetite-Guide.pdf', 'Businessowners-Policy-Cov-Robert-Richardson-2.pdf', 'UnderwritingGuidelines-BusinessOwners-2.pdf']
Loaded 1 pages from AFS_BOP_General_Guidelines_InfoSheet_MKT5674.pdf
Loaded 43 pages from ARIC Underwriting Guide 7-2012.pdf
Loaded 3 pages from Attune-Appetite-Guide.pdf
Loaded 414 pages from Businessowners-Policy-Cov-Robert-Richardson-2.pdf
Loaded 9 pages from UnderwritingGuidelines-BusinessOwners-2.pdf
Total documents loaded: 470


In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
doc_chunks = text_splitter.split_documents(documents)
print(f"Split documents into {len(doc_chunks)} chunks.")


Split documents into 1157 chunks.


In [None]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

embeddings_openai = OpenAIEmbeddings()

vectorstore_openai = FAISS.from_documents(doc_chunks, embeddings_openai)
vectorstore_openai.save_local("faiss_bop_index")

In [None]:
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate

retriever_coverage = vectorstore_openai.as_retriever(search_kwargs={"k": 5})
retriever_underwriting = vectorstore_openai.as_retriever(search_kwargs={"k": 8})

llm = ChatOpenAI(model_name="gpt-4", temperature=0.3)

def agentic_pipeline(business_type):
    docs_coverage = retriever_coverage.get_relevant_documents(business_type)
    docs_underwriting = retriever_underwriting.get_relevant_documents(business_type)

    coverage_text = "\n".join([doc.page_content for doc in docs_coverage])
    underwriting_text = "\n".join([doc.page_content for doc in docs_underwriting])

    steps = {}

    # Step 1: Thoroughly describe business
    prompt_step1 = PromptTemplate(
        input_variables=["business_type"],
        template="""
        Thoroughly describe the type of work done by small businesses classified as {business_type}. Include where the work is performed, typical customers, and a typical workday.
        """
    )
    steps["step1"] = (prompt_step1 | llm).invoke({"business_type": business_type}).content

    # Step 2: Property risks
    prompt_step2 = PromptTemplate(
        input_variables=["business_description", "coverage_text"],
        template="""
        Using the description of the business:
        {business_description}

        Identify property insurance risks for this class, referencing this coverage guide:
        {coverage_text}
        """
    )
    steps["step2"] = (prompt_step2 | llm).invoke({"business_description": steps["step1"], "coverage_text": coverage_text}).content

    # Step 3: Property coverage recommendations
    prompt_step3 = PromptTemplate(
        input_variables=["property_risks", "coverage_text"],
        template="""
        Based on the identified property risks:
        {property_risks}

        Recommend essential property coverages, including optional coverages, referring to this coverage guide:
        {coverage_text}
        """
    )
    steps["step3"] = (prompt_step3 | llm).invoke({"property_risks": steps["step2"], "coverage_text": coverage_text}).content

    # Step 4: Liability risks
    prompt_step4 = PromptTemplate(
        input_variables=["business_description", "coverage_text"],
        template="""
        Using the description of the business:
        {business_description}

        Identify general liability insurance risks for this class, referencing this coverage guide:
        {coverage_text}
        """
    )
    steps["step4"] = (prompt_step4 | llm).invoke({"business_description": steps["step1"], "coverage_text": coverage_text}).content

    # Step 5: Liability coverage recommendations
    prompt_step5 = PromptTemplate(
        input_variables=["liability_risks", "coverage_text"],
        template="""
        Based on the identified liability risks:
        {liability_risks}

        Recommend essential liability coverages, including optional coverages, referring to this coverage guide:
        {coverage_text}
        """
    )
    steps["step5"] = (prompt_step5 | llm).invoke({"liability_risks": steps["step4"], "coverage_text": coverage_text}).content

    # Step 6: Underwriting appetite guidelines
    prompt_step6 = PromptTemplate(
        input_variables=["previous_steps", "underwriting_text"],
        template="""
        Using information:
        {previous_steps}

        Develop concise underwriting appetite guidelines (preferred, acceptable, not acceptable categories), requirements (mandatory or preferred), and out-of-appetite optional coverages, referencing this underwriting guide:
        {underwriting_text}
        """
    )
    combined_steps_1_to_5 = '\n\n'.join([steps[f"step{i}"] for i in range(1, 6)])
    steps["step6"] = (prompt_step6 | llm).invoke({"previous_steps": combined_steps_1_to_5, "underwriting_text": underwriting_text}).content

    # Step 7: Coverage restrictions with detailed reasoning
    prompt_step7 = PromptTemplate(
        input_variables=["previous_steps"],
        template="""
        Based on the following information:
        {previous_steps}

        Determine if any coverage restrictions are necessary. List each restriction clearly, providing detailed reasoning on why each restriction is necessary.
        """
    )
    steps["step7"] = (prompt_step7 | llm).invoke({"previous_steps": combined_steps_1_to_5}).content

    # Step 8: Summary of unintuitive risks
    prompt_step8 = PromptTemplate(
        input_variables=["previous_steps"],
        template="""
        Summarize any unintuitive risks associated with insuring this class of business:
        {previous_steps}

        Limit your response to 1-3 sentences.
        """
    )
    steps["step8"] = (prompt_step8 | llm).invoke({"previous_steps": combined_steps_1_to_5}).content

    # Step 9: Final concise review
    prompt_step9 = PromptTemplate(
        input_variables=["steps_6_8"],
        template="""
        Review and refine the following underwriting guidelines for consistency, relevance, and conciseness. Ensure all restrictions clearly include detailed reasoning:
        {steps_6_8}

        Provide the final, concise underwriting appetite guideline clearly in Markdown format.
        """
    )
    combined_steps_6_to_8 = '\n\n'.join([steps[f"step{i}"] for i in range(6, 9)])
    final_guidelines = (prompt_step9 | llm).invoke({"steps_6_8": combined_steps_6_to_8}).content

    markdown_output = f"## {business_type}\n\n{final_guidelines}\n"

    return markdown_output





In [None]:


# Initialize markdown file with YAML front matter
markdown_filename = "bop_guidelines.md"

front_matter = """---
title: "Business Owners Policy Underwriting Guidelines"
author: "Automated Agentic Underwriting Assistant"
date: "2024-04-28"
output: pdf_document
---\n\n"""

with open(markdown_filename, "w") as md_file:
    md_file.write(front_matter)

# Read business types from file and apply agentic pipeline
with open("bop_categories.txt", "r") as categories_file:
    business_types = [line.strip() for line in categories_file if line.strip()]



<enumerate at 0x78db6791e430>

In [None]:
# Loop through each business type and append to markdown file
for idx, business_type in enumerate(business_types):
    print(f"Processing {idx+1}/{len(business_types)}: {business_type}")

    markdown_section = agentic_pipeline(business_type)

    with open(markdown_filename, "a") as md_file:
        md_file.write(markdown_section)
        md_file.write("\n\n---\n\n")

print(f"Markdown guidelines successfully created in '{markdown_filename}'.")

Processing 1/127: Accounting & Financial Services
Processing 2/127: Actuarial & Appraisal Services
Processing 3/127: Advertising Agencies
Processing 4/127: Air Conditioning & Heating (Sales/Service/Manufacturing)
Processing 5/127: Alarm System Installation
Processing 6/127: Ambulance & Emergency Services
Processing 7/127: Animal & Veterinary Services
Processing 8/127: Answering & Telemarketing Services
Processing 9/127: Antique & Collectible Stores
Processing 10/127: Appliance Sales & Repair
Processing 11/127: Army/Navy/Military Surplus Stores
Processing 12/127: Art Galleries & Art Supply Stores
Processing 13/127: Artificial Flowers & Floral Supplies
Processing 14/127: Artists & Craft Studios
Processing 15/127: Asphalt & Paving Services
Processing 16/127: Assembly & Packaging Services
Processing 17/127: Audio & Visual Equipment Sales/Service
Processing 18/127: Auto Parts & Accessories
Processing 19/127: Auto Services & Repair (including body shops)
Processing 20/127: Bakeries & Bagel S

In [None]:
start_idx = 37  # zero-indexed, so 37 means starting from the 38th business
for idx, business_type in enumerate(business_types[start_idx:], start=start_idx + 1):
    print(f"Processing {idx}/{len(business_types)}: {business_type}")

    markdown_section = agentic_pipeline(business_type)

    with open(markdown_filename, "a") as md_file:
        md_file.write(markdown_section)
        md_file.write("\n\n---\n\n")

print(f"Markdown guidelines successfully appended from business #{start_idx + 1} onward in '{markdown_filename}'.")


Processing 38/127: Cemeteries & Funeral Services
Processing 39/127: Churches & Religious Organizations
Processing 40/127: Clothing & Apparel (Retail & Wholesale)
Processing 41/127: Clubs & Recreation Facilities
Processing 42/127: Coffee Shops & Tea Houses
Processing 43/127: Computer & Technology Services
Processing 44/127: Concrete & Masonry Services
Processing 45/127: Convenience Stores
Processing 46/127: Cosmetics & Toiletries
Processing 47/127: Countertop & Surface Installation
Processing 48/127: Craft & Hobby Stores
Processing 49/127: Dairy & Ice Cream Shops
Processing 50/127: Dance, Drama, & Music Schools
Processing 51/127: Day Care & Child Care Centers
Processing 52/127: Delicatessens & Sandwich Shops
Processing 53/127: Department & Discount Stores
Processing 54/127: Detective & Security Services
Processing 55/127: Diaper & Linen Services
Processing 56/127: Door & Window Installation/Sales
Processing 57/127: Dry Cleaning & Laundry Services
Processing 58/127: Educational & School 

In [None]:
# Enhanced global requirements generation pipeline

# Step 1: Initial identification of global underwriting requirements
initial_prompt = PromptTemplate(
    input_variables=["retrieved_docs"],
    template="""
    Based on the following underwriting guidelines, list concise and universally applicable underwriting requirements that apply broadly to all business types.
    Clearly number your response.

    Guidelines:
    {retrieved_docs}
    """
)

initial_chain = (
    {"retrieved_docs": lambda _: "\n".join(
        [doc.page_content for doc in retriever.get_relevant_documents("global underwriting requirements")])}
    | initial_prompt
    | llm
    | StrOutputParser()
)

initial_requirements_text = initial_chain.invoke("")

# Clearly print initial list for verification
print("Initial Global Requirements Identified:")
print(initial_requirements_text)

# Parse requirements list
requirements_list = [line.strip() for line in initial_requirements_text.split('\n') if line.strip()]

# Step 2: Detailed Explanation with consistent categorization and reasoning
detail_prompt = PromptTemplate(
    input_variables=["requirement", "retrieved_docs"],
    template="""
    Clearly define the following underwriting requirement for a Business Owner Policy Guide. Include:

    1. Detailed description of the requirement.
    2. Categorization (mandatory or recommended).
    3. Explicit and consistent reasoning explaining why this requirement is necessary from an underwriting perspective.
    4. Any specific underwriting outcomes or risks addressed by this requirement.

    Requirement:
    {requirement}

    Guidelines:
    {retrieved_docs}

    Respond in clear and structured Markdown format.
    """
)

detailed_responses = []

for idx, requirement in enumerate(requirements_list):
    print(f"Refining global requirement {idx+1}/{len(requirements_list)}")

    retrieved_docs = "\n".join([doc.page_content for doc in retriever.get_relevant_documents(requirement)])

    detailed_chain = (
        {"requirement": lambda _: requirement, "retrieved_docs": lambda _: retrieved_docs}
        | detail_prompt
        | llm
        | StrOutputParser()
    )

    detailed_response = detailed_chain.invoke({})
    detailed_responses.append(detailed_response)

# Step 3: Validation and Consistency Check
final_review_prompt = PromptTemplate(
    input_variables=["detailed_requirements"],
    template="""
    Review the following detailed underwriting guidelines:

    {detailed_requirements}

    Validate each guideline ensuring:
    - Consistency in structure and categorization.
    - Explicit reasoning clearly tied to underwriting relevance.
    - Removal of any redundant or irrelevant content.

    Provide a final, refined, and concise list of global underwriting guidelines in Markdown format.
    """
)

final_review_chain = (
    {"detailed_requirements": lambda _: "\n\n".join(detailed_responses)}
    | final_review_prompt
    | llm
    | StrOutputParser()
)

final_refined_guidelines = final_review_chain.invoke({})

# Append refined global guidelines to markdown
with open(markdown_filename, "a") as md_file:
    md_file.write("## Global Underwriting Requirements\n\n")
    md_file.write(final_refined_guidelines)
    md_file.write("\n\n---\n\n")

print(f"Refined global underwriting requirements successfully appended to '{markdown_filename}'.")


In [None]:
# Parse requirements list
requirements_list = [line.strip() for line in initial_requirements_text.split('\n') if line.strip()]

# Step 2: Detailed Explanation with consistent categorization and reasoning
detail_prompt = PromptTemplate(
    input_variables=["requirement", "retrieved_docs"],
    template="""
    Clearly define the following underwriting requirement for a Business Owner Policy Guide. Include:

    1. Detailed description of the requirement.
    2. Categorization (mandatory or recommended).
    3. Explicit and consistent reasoning explaining why this requirement is necessary from an underwriting perspective.
    4. Any specific underwriting outcomes or risks addressed by this requirement.

    Requirement:
    {requirement}

    Guidelines:
    {retrieved_docs}

    Respond in clear and structured Markdown format.
    """
)

detailed_responses = []

for idx, requirement in enumerate(requirements_list):
    print(f"Refining global requirement {idx+1}/{len(requirements_list)}")

    retrieved_docs = "\n".join([doc.page_content for doc in retriever.get_relevant_documents(requirement)])

    detailed_chain = (
        {"requirement": lambda _: requirement, "retrieved_docs": lambda _: retrieved_docs}
        | detail_prompt
        | llm
        | StrOutputParser()
    )

    detailed_response = detailed_chain.invoke({})
    detailed_responses.append(detailed_response)

# Step 3: Validation and Consistency Check
final_review_prompt = PromptTemplate(
    input_variables=["detailed_requirements"],
    template="""
    Review the following detailed underwriting guidelines:

    {detailed_requirements}

    Validate each guideline ensuring:
    - Consistency in structure and categorization.
    - Explicit reasoning clearly tied to underwriting relevance.
    - Removal of any redundant or irrelevant content.

    Provide a final, refined, and concise list of global underwriting guidelines in Markdown format.
    """
)

final_review_chain = (
    {"detailed_requirements": lambda _: "\n\n".join(detailed_responses)}
    | final_review_prompt
    | llm
    | StrOutputParser()
)

final_refined_guidelines = final_review_chain.invoke({})

# Append refined global guidelines to markdown
with open(markdown_filename, "a") as md_file:
    md_file.write("## Global Underwriting Requirements\n\n")
    md_file.write(final_refined_guidelines)
    md_file.write("\n\n---\n\n")

print(f"Refined global underwriting requirements successfully appended to '{markdown_filename}'.")


In [None]:
# Additional code to label each business and append a summary table
import re
import random

# Read the existing markdown file to extract business names
with open("bop_guidelines.md", "r") as md_file:
    markdown_content = md_file.read()

# Extract business names from markdown headers
business_names = re.findall(r'^##\s+(.*)$', markdown_content, re.MULTILINE)


In [None]:

# Define prompt to categorize each business
status_prompt = PromptTemplate(
    input_variables=["business_name", "business_details"],
    template="""
    Review the underwriting details for the following business:

    Business Name:
    {business_name}

    Details:
    {business_details}

    Categorize the business clearly into one of the following statuses based on the ease or difficulty of meeting the listed requirements:
    - targeted (highly desirable)
    - acceptable (requirements easily met)
    - limited (some challenges, but feasible)
    - not acceptable (requirements too difficult to meet)

    Respond with just the status.
    """
)

In [None]:


# Initialize a dictionary to hold business statuses
business_statuses = []

# Process each business to get status
for idx, business_name in enumerate(business_names):
    print(f"Evaluating status for {business_name} ({idx+1}/{len(business_names)})")

    # Extract detailed section for the business
    pattern = rf"## {re.escape(business_name)}(.*?)---"
    match = re.search(pattern, markdown_content, re.DOTALL)
    business_details = match.group(1).strip() if match else "No details found."

    status_chain = (
        {"business_name": lambda _: business_name, "business_details": lambda _: business_details}
        | status_prompt
        | llm
        | StrOutputParser()
    )

    status = status_chain.invoke({}).strip()
    business_code = random.randint(10000, 99999)

    business_statuses.append({"name": business_name, "code": business_code, "status": status})

# Append the summary table to markdown file
with open("bop_guidelines.md", "a") as md_file:
    md_file.write("\n## Business Categorization Summary\n\n")
    md_file.write("| Name | Code | Status |\n")
    md_file.write("|------|------|--------|\n")

    for entry in business_statuses:
        md_file.write(f"| {entry['name']} | {entry['code']} | {entry['status']} |\n")

print("Business categorization summary successfully appended to 'bop_guidelines.md'.")


In [None]:
!apt-get install pandoc texlive-xetex -y
!pandoc bop_guidelines-10.md -o bop_guidelines_draft.pdf \
  --pdf-engine=xelatex \
  -V geometry:margin=1cm \
  --toc



Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  dvisvgm fonts-droid-fallback fonts-lato fonts-lmodern fonts-noto-mono
  fonts-texgyre fonts-urw-base35 libapache-pom-java
  libcmark-gfm-extensions0.29.0.gfm.3 libcmark-gfm0.29.0.gfm.3
  libcommons-logging-java libcommons-parent-java libfontbox-java libgs9
  libgs9-common libidn12 libijs-0.35 libjbig2dec0 libkpathsea6 libpdfbox-java
  libptexenc1 libruby3.0 libsynctex2 libteckit0 libtexlua53 libtexluajit2
  libwoff1 libzzip-0-13 lmodern pandoc-data poppler-data preview-latex-style
  rake ruby ruby-net-telnet ruby-rubygems ruby-webrick ruby-xmlrpc ruby3.0
  rubygems-integration t1utils teckit tex-common tex-gyre texlive-base
  texlive-binaries texlive-fonts-recommended texlive-latex-base
  texlive-latex-extra texlive-latex-recommended texlive-pictures
  texlive-plain-generic tipa xfonts-encodings xfonts-utils
Suggested packages:
  fonts-

In [None]:
!pandoc bop_guidelines-10.md -o bop_guidelines.tex


