# GPT4 with Retrieval Augmentation (Chartwell docs)

In [1]:
import os
from pinecone import Pinecone
import openai
from PyPDF2 import PdfFileReader
import io
from IPython.display import Markdown
import io
from dotenv import load_dotenv

# Load environment variables 
load_dotenv()

pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
index = pc.Index("insurancedoc")
openai.api_key = os.getenv("OPENAI_API_KEY")

def read_text_file(file_path, encoding='utf-8'):
    try:
        with open(file_path, 'r', encoding=encoding) as file:
            return file.read()
    except UnicodeDecodeError:
        with open(file_path, 'r', encoding='latin-1') as file:
            return file.read()

def read_pdf_file(file_path):
    with open(file_path, 'rb') as file:
        reader = PdfFileReader(file)
        text = ""
        for page_num in range(reader.numPages):
            page = reader.getPage(page_num)
            text += page.extract_text()
        return text

def get_embeddings(text):
    response = openai.Embedding.create(
        input=text,
        model="text-embedding-ada-002"
    )
    return response['data'][0]['embedding']

def upload_documents_to_pinecone(file_paths):
    for file_path in file_paths:
        if file_path.endswith('.txt'):
            document_text = read_text_file(file_path)
        elif file_path.endswith('.pdf'):
            document_text = read_pdf_file(file_path)
        else:
            print(f"Unsupported file format: {file_path}")
            continue
        
        document_embedding = get_embeddings(document_text)
        document_id = os.path.basename(file_path)
        
        index.upsert([
            (document_id, document_embedding, {"text": document_text})
        ])
        print(f"Document '{document_id}' successfully added to Pinecone index.")

file_paths = [
    "/Users/peter/Desktop/Chartwell-Insurance-AI/docs/chart.txt",
    "/Users/peter/Desktop/Chartwell-Insurance-AI/docs/reb.txt"
]

upload_documents_to_pinecone(file_paths)

  from tqdm.autonotebook import tqdm


Document 'chart.txt' successfully added to Pinecone index.
Document 'reb.txt' successfully added to Pinecone index.


In [10]:
import openai
from pinecone import Pinecone
from IPython.display import Markdown
from dotenv import load_dotenv

# Load environment variables 
load_dotenv()

pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
index = pc.Index("insurancedoc")
openai.api_key = os.getenv("OPENAI_API_KEY")

def generate_embeddings(query, model="text-embedding-ada-002"):
    res = openai.Embedding.create(input=[query], model=model)
    return res['data'][0]['embedding']

# retrieve relevant contexts from Pinecone
def retrieve_contexts(index, vector, top_k=10):
    res = index.query(vector=vector, top_k=top_k, include_metadata=True)
    return [item['metadata']['text'] for item in res['matches']]

# filter contexts
def filter_contexts(contexts, keyword):
    return [context for context in contexts if keyword in context]

# query and keyword
query = "Who is Rebecca?"
keyword = "Rebecca"

xq = generate_embeddings(query)

# Retrieve and filter contexts
contexts = retrieve_contexts(index, vector=xq, top_k=10)
filtered_contexts = filter_contexts(contexts, keyword)

# Augment the query with the filtered contexts
augmented_query = "\n\n---\n\n".join(filtered_contexts) + "\n\n-----\n\n" + query

# system primer
primer = """You are a Q&A bot for an insurance company - Chartwell Insurance. A highly intelligent system that answers
user questions based on the information provided by the user above
each question. If the information cannot be found in the information
provided by the user, you truthfully say 'I don't know'. When providing answers, your tone is like speaking for our company.
"""

# Generate the response
res = openai.ChatCompletion.create(
    model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": primer},
        {"role": "user", "content": augmented_query}
    ]
)

display(Markdown(res['choices'][0]['message']['content']))

Rebecca Korach Woan is the principal and founder of Chartwell Insurance Services. She established the company to provide a customized approach to property and casualty insurance for high net worth individuals, who she believed were often inadequately served by traditional insurance intermediaries. Rebecca has a passion for art, which aligns with the interests of many of Chartwell's clients who are collectors. She enjoys exploring insurance contracts and terminology in depth. In addition to her role at Chartwell, she is involved in various organizations, including the Illinois Fair Plan and the Art Institute of Chicago. She holds a Bachelor of Arts degree from the University of Pennsylvania and an MBA in Finance from the University of Chicago Booth School. Rebecca resides in Chicago with her family.

In [2]:
import openai
from pinecone import Pinecone
from IPython.display import Markdown

from dotenv import load_dotenv

# Load environment variables 
load_dotenv()

pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
index = pc.Index("insurancedoc")
openai.api_key = os.getenv("OPENAI_API_KEY")

def generate_embeddings(query, model="text-embedding-ada-002"):
    res = openai.Embedding.create(input=[query], model=model)
    return res['data'][0]['embedding']

# retrieve relevant contexts from Pinecone
def retrieve_contexts(index, vector, top_k=10):
    res = index.query(vector=vector, top_k=top_k, include_metadata=True)
    return [item['metadata']['text'] for item in res['matches']]

# filter contexts
def filter_contexts(contexts, keyword):
    return [context for context in contexts if keyword in context]

# query and keyword
query = "What services does Chartwell Insurance Service provide?"
keyword = "Chartwell Insurance Service"

xq = generate_embeddings(query)

# Retrieve and filter contexts
contexts = retrieve_contexts(index, vector=xq, top_k=10)
filtered_contexts = filter_contexts(contexts, keyword)

# Augment the query with the filtered contexts
augmented_query = "\n\n---\n\n".join(filtered_contexts) + "\n\n-----\n\n" + query

# system primer
primer = """You are a Q&A bot for an insurance company - Chartwell Insurance. A highly intelligent system that answers
user questions based on the information provided by the user above
each question. If the information cannot be found in the information
provided by the user, you truthfully say 'I don't know'. When providing answers, your tone is like speaking for our company.
"""

# Generate the response
res = openai.ChatCompletion.create(
    model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": primer},
        {"role": "user", "content": augmented_query}
    ]
)

display(Markdown(res['choices'][0]['message']['content']))

Chartwell Insurance Services specializes in developing and maintaining comprehensive insurance strategies tailored for high net worth individuals and their families. We work with top insurance companies dedicated to this sector and offer a complete team of insurance specialists available to assist clients during emergencies both during and after business hours. Our focus is on protecting real and personal possessions while guarding against potential liability lawsuits. We aim to provide a customized approach to property and casualty insurance, ensuring that our clients receive the specialized attention they deserve.