# GPT4 with Retrieval Augmentation (Chartwell docs)

In [16]:
import os
from pinecone import Pinecone
import openai
from PyPDF2 import PdfFileReader
import io
from IPython.display import Markdown
import io
from dotenv import load_dotenv

# Load environment variables 
load_dotenv()

pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
index = pc.Index("insurancedoc")
openai.api_key = os.getenv("OPENAI_API_KEY")

def read_text_file(file_path, encoding='utf-8'):
    try:
        with open(file_path, 'r', encoding=encoding) as file:
            return file.read()
    except UnicodeDecodeError:
        with open(file_path, 'r', encoding='latin-1') as file:
            return file.read()

def read_pdf_file(file_path):
    with open(file_path, 'rb') as file:
        reader = PdfFileReader(file)
        text = ""
        for page_num in range(reader.numPages):
            page = reader.getPage(page_num)
            text += page.extract_text()
        return text

def get_embeddings(text):
    response = openai.Embedding.create(
        input=text,
        model="text-embedding-ada-002"
    )
    return response['data'][0]['embedding']

def upload_documents_to_pinecone(file_paths):
    for file_path in file_paths:
        if file_path.endswith('.txt'):
            document_text = read_text_file(file_path)
        elif file_path.endswith('.pdf'):
            document_text = read_pdf_file(file_path)
        else:
            print(f"Unsupported file format: {file_path}")
            continue
        
        document_embedding = get_embeddings(document_text)
        document_id = os.path.basename(file_path)
        
        index.upsert([
            (document_id, document_embedding, {"text": document_text})
        ])
        print(f"Document '{document_id}' successfully added to Pinecone index.")

file_paths = [
    "/Users/peter/Desktop/Chartwell-Insurance-AI/docs/chart.txt",
    "/Users/peter/Desktop/Chartwell-Insurance-AI/docs/reb.txt"
]

upload_documents_to_pinecone(file_paths)

Document 'chart.txt' successfully added to Pinecone index.
Document 'reb.txt' successfully added to Pinecone index.


In [2]:
import openai
from pinecone import Pinecone
from IPython.display import Markdown

pc = Pinecone(api_key="3f6ed6fe-57ee-48af-b5b8-268b75a22022")
index = pc.Index("insurancedoc")
openai.api_key = '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'

def generate_embeddings(query, model="text-embedding-ada-002"):
    res = openai.Embedding.create(input=[query], model=model)
    return res['data'][0]['embedding']

# retrieve relevant contexts from Pinecone
def retrieve_contexts(index, vector, top_k=10):
    res = index.query(vector=vector, top_k=top_k, include_metadata=True)
    return [item['metadata']['text'] for item in res['matches']]

# filter contexts
def filter_contexts(contexts, keyword):
    return [context for context in contexts if keyword in context]

# query and keyword
query = "What services does Chartwell Insurance Service provide?"
keyword = "Chartwell Insurance Service"

xq = generate_embeddings(query)

# Retrieve and filter contexts
contexts = retrieve_contexts(index, vector=xq, top_k=10)
filtered_contexts = filter_contexts(contexts, keyword)

# Augment the query with the filtered contexts
augmented_query = "\n\n---\n\n".join(filtered_contexts) + "\n\n-----\n\n" + query

# system primer
primer = """You are a Q&A bot for an insurance company - Chartwell Insurance. A highly intelligent system that answers
user questions based on the information provided by the user above
each question. If the information cannot be found in the information
provided by the user, you truthfully say 'I don't know'. When providing answers, your tone is like speaking for our company.
"""

# Generate the response
res = openai.ChatCompletion.create(
    model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": primer},
        {"role": "user", "content": augmented_query}
    ]
)

display(Markdown(res['choices'][0]['message']['content']))

Chartwell Insurance Services specializes in developing and maintaining comprehensive insurance strategies tailored for high net worth individuals and their families. We work with top insurance companies dedicated to this sector and offer a complete team of insurance specialists available to assist clients during emergencies both during and after business hours. Our focus is on protecting real and personal possessions while guarding against potential liability lawsuits. We aim to provide a customized approach to property and casualty insurance, ensuring that our clients receive the specialized attention they deserve.

### older version

In [1]:
from pinecone import Pinecone
import openai
pc = Pinecone(api_key="3f6ed6fe-57ee-48af-b5b8-268b75a22022")
index = pc.Index("insurancedoc")
openai.api_key = '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'

  from tqdm.autonotebook import tqdm


In [3]:
def read_document(file_path):
    with open(file_path, 'r') as file:
        return file.read()

# generate embeddings using OpenAI
def get_embeddings(text):
    response = openai.Embedding.create(
        input=text,
        model="text-embedding-ada-002"  
    )
    return response['data'][0]['embedding']

file_path = "/Users/peter/Desktop/Chartwell-Insurance-AI/docs/chart.txt"

document_text = read_document(file_path)

# Generate embeddings for the document
document_embedding = get_embeddings(document_text)

# Upsert the document into Pinecone index
index.upsert([
    ("document_id", document_embedding, {"text": document_text})
])

print("Document successfully added to Pinecone index.")

Document successfully added to Pinecone index.


In [101]:
query = "What is Chartwell Insurance Service?"
embed_model = "text-embedding-ada-002"
res = openai.Embedding.create(
    input=[query],
    engine=embed_model
)

# retrieve from Pinecone
xq = res['data'][0]['embedding']

# get relevant contexts (including the questions)
res = index.query(vector=xq, top_k=5, include_metadata=True)

In [102]:
res

{'matches': [{'id': 'document_id',
              'metadata': {'text': 'The team at Chartwell Insurance Service '
                                   'are experts in developing and\n'
                                   'maintaining full-picture insurance '
                                   'strategies and work with the best\n'
                                   'insurance companies dedicated to this '
                                   'sector. Because disaster, all too\n'
                                   'often, happens outside of business hours '
                                   'we have a complete team of\n'
                                   'insurance specialists who are available to '
                                   'our clients who are experiencing\n'
                                   'an emergency both during and after-hours. '
                                   'If you see insurance as we do,\n'
                                   'not as an unavoidable and unwanted '
  

In [103]:
contexts = [item['metadata']['text'] for item in res['matches']]

augmented_query = "\n\n---\n\n".join(contexts)+"\n\n-----\n\n"+query

In [104]:
print(augmented_query)

The team at Chartwell Insurance Service are experts in developing and
maintaining full-picture insurance strategies and work with the best
insurance companies dedicated to this sector. Because disaster, all too
often, happens outside of business hours we have a complete team of
insurance specialists who are available to our clients who are experiencing
an emergency both during and after-hours. If you see insurance as we do,
not as an unavoidable and unwanted expense, but as an important means
to protect your real and personal possessions and help guard against
assaults on your assets through liability lawsuits from aggressive third
parties you should contact our team at Chartwell Insurance Services.


-----

What is Chartwell Insurance Service?


In [105]:
primer = f"""You are Q&A bot. A highly intelligent system that answers
user questions based on the information provided by the user above
each question. If the information can not be found in the information
provided by the user you truthfully say "I don't know".
"""

res = openai.ChatCompletion.create(
    model="gpt-4",
    messages=[
        {"role": "system", "content": primer},
        {"role": "user", "content": augmented_query}
    ]
)

In [106]:
from IPython.display import Markdown

display(Markdown(res['choices'][0]['message']['content']))

Chartwell Insurance Service is a team of experts who develop and maintain comprehensive insurance strategies. They collaborate with top insurance companies specializing in this sector. They provide services to their clients both during business hours and after-hours, especially in cases of emergencies. Their perspective on insurance is not of an unwanted expense, but as a crucial means of protecting both real and personal possessions and defending against liability lawsuits from aggressive third parties.

In [111]:
query = "Who is Rebecca?"
embed_model = "text-embedding-ada-002"
res = openai.Embedding.create(
    input=[query],
    engine=embed_model
)

# retrieve from Pinecone
xq = res['data'][0]['embedding']

# get relevant contexts (including the questions)
res = index.query(vector=xq, top_k=5, include_metadata=True)
res
contexts = [item['metadata']['text'] for item in res['matches']]

augmented_query = "\n\n---\n\n".join(contexts)+"\n\n-----\n\n"+query
print(augmented_query)
primer = f"""You are Q&A bot. A highly intelligent system that answers
user questions based on the information provided by the user above
each question. If the information can not be found in the information
provided by the user you truthfully say "I don't know".
"""

res = openai.ChatCompletion.create(
    model="gpt-4",
    messages=[
        {"role": "system", "content": primer},
        {"role": "user", "content": augmented_query}
    ]
)
display(Markdown(res['choices'][0]['message']['content']))

The team at Chartwell Insurance Service are experts in developing and
maintaining full-picture insurance strategies and work with the best
insurance companies dedicated to this sector. Because disaster, all too
often, happens outside of business hours we have a complete team of
insurance specialists who are available to our clients who are experiencing
an emergency both during and after-hours. If you see insurance as we do,
not as an unavoidable and unwanted expense, but as an important means
to protect your real and personal possessions and help guard against
assaults on your assets through liability lawsuits from aggressive third
parties you should contact our team at Chartwell Insurance Services.


-----

Who is Rebecca?


The information provided does not mention a person named Rebecca. Therefore, I cannot provide any details about who Rebecca might be.

### Try to upload info about Rebecca

In [117]:
def read_document(file_path, encoding='utf-8'):
    try:
        with open(file_path, 'r', encoding=encoding) as file:
            return file.read()
    except UnicodeDecodeError:
        with open(file_path, 'r', encoding='latin-1') as file:
            return file.read()

# generate embeddings using OpenAI
def get_embeddings(text):
    response = openai.Embedding.create(
        input=text,
        model="text-embedding-ada-002"  
    )
    return response['data'][0]['embedding']

file_path = "/Users/peter/Desktop/Chartwell/reb.txt"

document_text = read_document(file_path)

# Generate embeddings for the document
document_embedding = get_embeddings(document_text)

# Upsert the document into Pinecone index
index.upsert([
    ("document_id", document_embedding, {"text": document_text})
])

print("Document successfully added to Pinecone index.")

Document successfully added to Pinecone index.


In [118]:
query = "Who is Rebecca?"
embed_model = "text-embedding-ada-002"
res = openai.Embedding.create(
    input=[query],
    engine=embed_model
)

# retrieve from Pinecone
xq = res['data'][0]['embedding']

# get relevant contexts (including the questions)
res = index.query(vector=xq, top_k=5, include_metadata=True)
res
contexts = [item['metadata']['text'] for item in res['matches']]

augmented_query = "\n\n---\n\n".join(contexts)+"\n\n-----\n\n"+query
print(augmented_query)
primer = f"""You are Q&A bot. A highly intelligent system that answers
user questions based on the information provided by the user above
each question. If the information can not be found in the information
provided by the user you truthfully say "I don't know".
"""

res = openai.ChatCompletion.create(
    model="gpt-4",
    messages=[
        {"role": "system", "content": primer},
        {"role": "user", "content": augmented_query}
    ]
)
display(Markdown(res['choices'][0]['message']['content']))

Rebecca Korach Woan is a principal and founder of Chartwell Insurance Services. She started Chartwell because she felt that high net worth individuals were often inadequately served by insurance intermediaries, and that these individuals and their families required a customized approach to property and casualty insurance which would be best handled by a dedicated, specialized brokerage. Her own interest in the art world complements the passion of Chartwell clients who are collectors; she understands the challenges of acquiring, protecting and maintaining collections. Rebecca says, ÒI am a bit of an insurance geek, I actually like reading contracts and delving into arcane insurance language.Ó
She currently serves on the governing committee of the Illinois Fair Plan and is a member of the Independent Insurance Agents of Illinois (IIA) and the Chicago Finance Exchange. She also serves on the Auxiliary Board of the Art Institute of Chicago and the ARZU Advisory Board and is a member of the

Rebecca Korach Woan is a principal and founder of Chartwell Insurance Services. She started Chartwell because she perceived a gap in the market where high net worth individuals were not adequately served by insurance intermediaries. She believes that these individuals need a customized approach to property and casualty insurance, which can be provided by a dedicated, specialized brokerage. Rebecca is knowledgeable in arts, which aligns with her clients who are collectors. Apart from being involved in insurance, she serves on various committees and boards, including the Illinois Fair Plan and the Independent Insurance Agents of Illinois (IIA). She is also on the Auxiliary Board of the Art Institute of Chicago, ARZU Advisory Board and is a member of the Arts Club of Chicago and the Woman’s Athletic Club. She studied International Relations and French at the University of Pennsylvania, and completed her MBA in Finance from the University of Chicago Booth School. She provides regular commentary on personal property and casualty insurance for various publications. She resides in Chicago with her family.