In [106]:
import pdfplumber
from pathlib import Path
import pandas as pd
from operator import itemgetter
import json
import tiktoken
import openai
import chromadb
from oauthlib.uri_validate import query

In [107]:
path = "./Principal-Sample-Life-Insurance-Policy.pdf"

In [108]:
with pdfplumber.open(path) as pdf:
    page = pdf.pages[2]
    text = page.extract_text()
    tables = page.extract_table()

In [109]:
print(text)

POLICY RIDER
GROUP INSURANCE
POLICY NO: S655
COVERAGE: Life
EMPLOYER: RHODE ISLAND JOHN DOE
Effective on the later of the Date of Issue of this Group Policy or March 1, 2005, the following
will apply to your Policy:
From time to time The Principal may offer or provide certain employer groups who apply
for coverage with The Principal a Financial Services Hotline and Grief Support Services or
any other value added service for the employees of that employer group. In addition, The
Principal may arrange for third party service providers (i.e., optometrists, health clubs), to
provide discounted goods and services to those employer groups who apply for coverage
with The Principal or who become insureds/enrollees of The Principal. While The
Principal has arranged these goods, services and/or third party provider discounts, the third
party service providers are liable to the applicants/insureds/enrollees for the provision of
such goods and/or services. The Principal is not responsible for the 

In [110]:
tables

In [111]:
def check_bboxes(word, table_bbox):
    # Check whether word is inside a table bbox.
    l = word["x0"], word["top"], word["x1"], word["bottom"]
    r = table_bbox
    return l[0] > r[0] and l[1] > r[1] and l[2] < r[2] and l[3] < r[3]

In [112]:
# Function to extract text from a PDF file.
# 1. Declare a variable p to store the iteration of the loop that will help us store page numbers alongside the text
# 2. Declare an empty list 'full_text' to store all the text files
# 3. Use pdfplumber to open the pdf pages one by one
# 4. Find the tables and their locations in the page
# 5. Extract the text from the tables in the variable 'tables'
# 6. Extract the regular words by calling the function check_bboxes() and checking whether words are present in the table or not
# 7. Use the cluster_objects utility to cluster non-table and table words together so that they retain the same chronology as in the original PDF
# 8. Declare an empty list 'lines' to store the page text
# 9. If a text element in present in the cluster, append it to 'lines', else if a table element is present, append the table
# 10. Append the page number and all lines to full_text, and increment 'p'
# 11. When the function has iterated over all pages, return the 'full_text' list


def extract_text_from_pdf(pdf_path):
    p = 0
    full_text = []

    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            page_no = f"Page {p+1}"
            text = page.extract_text()

            tables = page.find_tables()
            table_bboxes = [i.bbox for i in tables]
            tables = [{"table": i.extract(), "top": i.bbox[1]} for i in tables]
            non_table_words = [
                word
                for word in page.extract_words()
                if not any(
                    [check_bboxes(word, table_bbox) for table_bbox in table_bboxes]
                )
            ]
            lines = []

            for cluster in pdfplumber.utils.cluster_objects(
                non_table_words + tables, itemgetter("top"), tolerance=5
            ):

                if "text" in cluster[0]:
                    try:
                        lines.append(" ".join([i["text"] for i in cluster]))
                    except KeyError:
                        pass

                elif "table" in cluster[0]:
                    lines.append(json.dumps(cluster[0]["table"]))

            full_text.append([page_no, " ".join(lines)])
            p += 1

    return full_text

In [113]:
# data = []
pdf_path = Path(path)
extracted_text = extract_text_from_pdf(pdf_path)
extracted_text_df = pd.DataFrame(extracted_text, columns=["page_no.", "text"])
extracted_text_df["Document_name"] = pdf_path.name
extracted_text_df.ignore_index = True

In [114]:
extracted_text_df.text[2]

'POLICY RIDER GROUP INSURANCE POLICY NO: S655 COVERAGE: Life EMPLOYER: RHODE ISLAND JOHN DOE Effective on the later of the Date of Issue of this Group Policy or March 1, 2005, the following will apply to your Policy: From time to time The Principal may offer or provide certain employer groups who apply for coverage with The Principal a Financial Services Hotline and Grief Support Services or any other value added service for the employees of that employer group. In addition, The Principal may arrange for third party service providers (i.e., optometrists, health clubs), to provide discounted goods and services to those employer groups who apply for coverage with The Principal or who become insureds/enrollees of The Principal. While The Principal has arranged these goods, services and/or third party provider discounts, the third party service providers are liable to the applicants/insureds/enrollees for the provision of such goods and/or services. The Principal is not responsible for the

In [115]:
extracted_text_df["text_length"] = extracted_text_df["text"].map(
    lambda x: len(x.split())
)

In [116]:
extracted_text_df["text_length"]

0      30
1       5
2     230
3       5
4     110
     ... 
59    285
60    418
61    322
62      5
63      8
Name: text_length, Length: 64, dtype: int64

In [117]:
print(max(extracted_text_df["text_length"]))
print(min(extracted_text_df["text_length"]))

462
5


In [118]:
extracted_text_df = extracted_text_df[(extracted_text_df["text_length"] >= 10)]

In [119]:
extracted_text_df

Unnamed: 0,page_no.,text,Document_name,text_length
0,Page 1,DOROTHEA GLAUSE S655 RHODE ISLAND JOHN DOE 01/...,Principal-Sample-Life-Insurance-Policy.pdf,30
2,Page 3,POLICY RIDER GROUP INSURANCE POLICY NO: S655 C...,Principal-Sample-Life-Insurance-Policy.pdf,230
4,Page 5,PRINCIPAL LIFE INSURANCE COMPANY (called The P...,Principal-Sample-Life-Insurance-Policy.pdf,110
5,Page 6,TABLE OF CONTENTS PART I - DEFINITIONS PART II...,Principal-Sample-Life-Insurance-Policy.pdf,153
6,Page 7,Section A – Eligibility Member Life Insurance ...,Principal-Sample-Life-Insurance-Policy.pdf,176
7,Page 8,Section A - Member Life Insurance Schedule of ...,Principal-Sample-Life-Insurance-Policy.pdf,171
8,Page 9,P ART I - DEFINITIONS When used in this Group ...,Principal-Sample-Life-Insurance-Policy.pdf,387
9,Page 10,T he legally recognized union of two eligible ...,Principal-Sample-Life-Insurance-Policy.pdf,251
10,Page 11,(2) has been placed with the Member or spouse ...,Principal-Sample-Life-Insurance-Policy.pdf,299
11,Page 12,An institution that is licensed as a Hospital ...,Principal-Sample-Life-Insurance-Policy.pdf,352


In [120]:
min(extracted_text_df["text_length"])

30

In [121]:
extracted_text_df["Metadata"] = extracted_text_df.apply(
    lambda x: {
        "Document_Name": str(x["Document_name"][:-4]),
        "page_no.": str(x["page_no."]),
    },
    axis=1,
)

In [122]:
extracted_text_df

Unnamed: 0,page_no.,text,Document_name,text_length,Metadata
0,Page 1,DOROTHEA GLAUSE S655 RHODE ISLAND JOHN DOE 01/...,Principal-Sample-Life-Insurance-Policy.pdf,30,{'Document_Name': 'Principal-Sample-Life-Insur...
2,Page 3,POLICY RIDER GROUP INSURANCE POLICY NO: S655 C...,Principal-Sample-Life-Insurance-Policy.pdf,230,{'Document_Name': 'Principal-Sample-Life-Insur...
4,Page 5,PRINCIPAL LIFE INSURANCE COMPANY (called The P...,Principal-Sample-Life-Insurance-Policy.pdf,110,{'Document_Name': 'Principal-Sample-Life-Insur...
5,Page 6,TABLE OF CONTENTS PART I - DEFINITIONS PART II...,Principal-Sample-Life-Insurance-Policy.pdf,153,{'Document_Name': 'Principal-Sample-Life-Insur...
6,Page 7,Section A – Eligibility Member Life Insurance ...,Principal-Sample-Life-Insurance-Policy.pdf,176,{'Document_Name': 'Principal-Sample-Life-Insur...
7,Page 8,Section A - Member Life Insurance Schedule of ...,Principal-Sample-Life-Insurance-Policy.pdf,171,{'Document_Name': 'Principal-Sample-Life-Insur...
8,Page 9,P ART I - DEFINITIONS When used in this Group ...,Principal-Sample-Life-Insurance-Policy.pdf,387,{'Document_Name': 'Principal-Sample-Life-Insur...
9,Page 10,T he legally recognized union of two eligible ...,Principal-Sample-Life-Insurance-Policy.pdf,251,{'Document_Name': 'Principal-Sample-Life-Insur...
10,Page 11,(2) has been placed with the Member or spouse ...,Principal-Sample-Life-Insurance-Policy.pdf,299,{'Document_Name': 'Principal-Sample-Life-Insur...
11,Page 12,An institution that is licensed as a Hospital ...,Principal-Sample-Life-Insurance-Policy.pdf,352,{'Document_Name': 'Principal-Sample-Life-Insur...


In [123]:
extracted_text_df

Unnamed: 0,page_no.,text,Document_name,text_length,Metadata
0,Page 1,DOROTHEA GLAUSE S655 RHODE ISLAND JOHN DOE 01/...,Principal-Sample-Life-Insurance-Policy.pdf,30,{'Document_Name': 'Principal-Sample-Life-Insur...
2,Page 3,POLICY RIDER GROUP INSURANCE POLICY NO: S655 C...,Principal-Sample-Life-Insurance-Policy.pdf,230,{'Document_Name': 'Principal-Sample-Life-Insur...
4,Page 5,PRINCIPAL LIFE INSURANCE COMPANY (called The P...,Principal-Sample-Life-Insurance-Policy.pdf,110,{'Document_Name': 'Principal-Sample-Life-Insur...
5,Page 6,TABLE OF CONTENTS PART I - DEFINITIONS PART II...,Principal-Sample-Life-Insurance-Policy.pdf,153,{'Document_Name': 'Principal-Sample-Life-Insur...
6,Page 7,Section A – Eligibility Member Life Insurance ...,Principal-Sample-Life-Insurance-Policy.pdf,176,{'Document_Name': 'Principal-Sample-Life-Insur...
7,Page 8,Section A - Member Life Insurance Schedule of ...,Principal-Sample-Life-Insurance-Policy.pdf,171,{'Document_Name': 'Principal-Sample-Life-Insur...
8,Page 9,P ART I - DEFINITIONS When used in this Group ...,Principal-Sample-Life-Insurance-Policy.pdf,387,{'Document_Name': 'Principal-Sample-Life-Insur...
9,Page 10,T he legally recognized union of two eligible ...,Principal-Sample-Life-Insurance-Policy.pdf,251,{'Document_Name': 'Principal-Sample-Life-Insur...
10,Page 11,(2) has been placed with the Member or spouse ...,Principal-Sample-Life-Insurance-Policy.pdf,299,{'Document_Name': 'Principal-Sample-Life-Insur...
11,Page 12,An institution that is licensed as a Hospital ...,Principal-Sample-Life-Insurance-Policy.pdf,352,{'Document_Name': 'Principal-Sample-Life-Insur...


In [124]:
import os

open_ai_apikey = os.getenv("OPEN_AI_API_KEY")

In [125]:
from chromadb.utils.embedding_functions import OpenAIEmbeddingFunction

In [126]:
import chromadb

In [127]:
chroma_db_path = "./ChromaDB_Data"

In [128]:
client = chromadb.PersistentClient(chroma_db_path)

In [129]:
model = "text-embedding-ada-002"

In [130]:
embedding_function = OpenAIEmbeddingFunction(api_key=open_ai_apikey, model_name=model)

In [131]:
insurance_collection = client.get_or_create_collection(
    name="RAG_on_Insurance", embedding_function=embedding_function
)

In [132]:
documents_list = list(extracted_text_df["text"])
metadata_list = list(extracted_text_df["Metadata"])

In [133]:
insurance_collection.add(
    documents=documents_list,
    ids=[str(i) for i in range(len(documents_list))],
    metadatas=metadata_list,
)

In [134]:
insurance_collection.get(
    ids=["0", "1", "2"], include=["embeddings", "documents", "metadatas"]
)

{'ids': ['0', '1', '2'],
 'embeddings': array([[-2.24228799e-02,  1.87183432e-02, -2.72361692e-02, ...,
         -3.69149223e-02,  2.83710100e-03, -1.30930578e-03],
        [-1.32036693e-02,  8.89394712e-03, -4.63569583e-03, ...,
         -1.57016590e-02, -4.11756846e-05,  7.26064574e-03],
        [-1.21506359e-02,  1.41138397e-02, -3.34440940e-03, ...,
         -2.85195056e-02, -9.51092318e-03,  1.03134494e-02]],
       shape=(3, 1536)),
 'documents': ['DOROTHEA GLAUSE S655 RHODE ISLAND JOHN DOE 01/01/2014 711 HIGH STREET GEORGE RI 02903 GROUP POLICY FOR: RHODE ISLAND JOHN DOE ALL MEMBERS Group Member Life Insurance Print Date: 07/16/2014',
  'POLICY RIDER GROUP INSURANCE POLICY NO: S655 COVERAGE: Life EMPLOYER: RHODE ISLAND JOHN DOE Effective on the later of the Date of Issue of this Group Policy or March 1, 2005, the following will apply to your Policy: From time to time The Principal may offer or provide certain employer groups who apply for coverage with The Principal a Financial 

In [135]:
cache_collection = client.get_or_create_collection(
    name="insurance_cache", embedding_function=embedding_function
)

In [136]:
cache_collection.peek()

{'ids': ['what is the policy on accidental death coverage?',
  'What is the policy on fraud?'],
 'embeddings': array([[ 0.00575554,  0.0070032 , -0.00182219, ..., -0.01144989,
         -0.0139384 , -0.02806718],
        [ 0.00288425, -0.02002897, -0.00132154, ..., -0.00309917,
         -0.0103623 , -0.02798281]], shape=(2, 1536)),
 'documents': ['what is the policy on accidental death coverage?',
  'What is the policy on fraud?'],
 'uris': None,
 'included': ['metadatas', 'documents', 'embeddings'],
 'data': None,
 'metadatas': [{'documents2': 'Section A – Eligibility Member Life Insurance Article 1 Member Accidental Death and Dismemberment Insurance Article 2 Dependent Life Insurance Article 3 Section B - Effective Dates Member Life Insurance Article 1 Member Accidental Death and Dismemberment Insurance Article 2 Dependent Life Insurance Article 3 Section C - Individual Terminations Member Life Insurance Article 1 Member Accidental Death and Dismemberment Insurance Article 2 Dependent

In [137]:
query = input()
cache_results = cache_collection.query(query_texts=query, n_results=1)

In [138]:
cache_results

{'ids': [['What is the policy on fraud?']],
 'embeddings': None,
 'documents': [['What is the policy on fraud?']],
 'uris': None,
 'included': ['metadatas', 'documents', 'distances'],
 'data': None,
 'metadatas': [[{'documents5': "T he Principal may terminate the Policyholder's coverage on any premium due date if the Policyholder relocates to a state where this Group Policy is not marketed, by giving the Policyholder 31 days advanced notice in Writing. Article 4 - Policyholder Responsibility to Members If this Group Policy terminates for any reason, the Policyholder must: a. notify each Member of the effective date of the termination; and b. refund or otherwise account to each Member all contributions received or withheld from Members for premiums not actually paid to The Principal. This policy has been updated effective January 1, 2014 PART II - POLICY ADMINISTRATION GC 6005 Section C - Policy Termination, Page 2",
    'ids1': '15',
    'documents0': "a. be actively engaged in busines

In [139]:
threshold = 0.1
ids = []
print(query)
ids = []
documents = []
distances = []
metadatas = []
if cache_results["distances"][0] == [] or cache_results["distances"][0][0] > threshold:
    results = insurance_collection.query(query_texts=query, n_results=10)
    # print(results)
    Keys = []
    Values = []
    for key, value in results.items():
        if value is None:
            continue
        for i in range(len(results)):
            # print(value[0][i])
            Keys.append(str(key) + str(i))
            Values.append(str(value[0][i]))
    cache_collection.add(
        documents=[query], ids=[query], metadatas=dict(zip(Keys, Values))
    )
    print("Not found in cache. Found in main collection.")

    result_dict = {
        "Metadatas": results["metadatas"][0],
        "Documents": results["documents"][0],
        "Distances": results["distances"][0],
        "IDs": results["ids"][0],
    }
    results_df = pd.DataFrame.from_dict(result_dict)
    # results_df
elif cache_results["distances"][0][0] <= threshold:

    cache_result_dict = cache_results["metadatas"][0][0]
    for key, value in cache_result_dict.items():
        if "ids" in key:
            ids.append(value)
        elif "documents" in key:
            documents.append(value)
        elif "distances" in key:
            distances.append(value)
        elif "metadatas" in key:
            metadatas.append(value)

    print("Found in cache!")

    # Create a DataFrame
    results_df = pd.DataFrame(
        {
            "IDs": ids,
            "Documents": documents,
            "Distances": distances,
            "Metadatas": metadatas,
        }
    )

What is the policy on eye issues?
Not found in cache. Found in main collection.


In [140]:
results_df

Unnamed: 0,Metadatas,Documents,Distances,IDs
0,"{'page_no.': 'Page 19', 'Document_Name': 'Prin...",T he Principal has complete discretion to cons...,0.238446,16
1,{'Document_Name': 'Principal-Sample-Life-Insur...,a . A licensed Doctor of Medicine (M.D.) or Os...,0.239111,10
2,"{'page_no.': 'Page 17', 'Document_Name': 'Prin...",a. be actively engaged in business for profit ...,0.239433,14
3,"{'page_no.': 'Page 3', 'Document_Name': 'Princ...",POLICY RIDER GROUP INSURANCE POLICY NO: S655 C...,0.239679,1
4,{'Document_Name': 'Principal-Sample-Life-Insur...,c . a copy of the form which contains the stat...,0.240216,15
5,{'Document_Name': 'Principal-Sample-Life-Insur...,Section D - Policy Renewal Article 1 - Renewal...,0.245646,22
6,{'Document_Name': 'Principal-Sample-Life-Insur...,"f . claim requirements listed in PART IV, Sect...",0.245844,51
7,"{'page_no.': 'Page 24', 'Document_Name': 'Prin...",T he Principal may terminate the Policyholder'...,0.247299,21
8,"{'page_no.': 'Page 62', 'Document_Name': 'Prin...",A claimant may request an appeal of a claim de...,0.247675,59
9,{'Document_Name': 'Principal-Sample-Life-Insur...,b. a business assignment; or c. full-time stud...,0.249203,34


In [141]:
from sentence_transformers import CrossEncoder, util

In [142]:
cross_encoder = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")

In [143]:
scores = cross_encoder.predict(
    [
        [
            "Does the insurance cover diabetic patients?",
            "The insurance policy covers some pre-existing conditions including diabetes, heart diseases, etc. The policy does not howev",
        ],
        [
            "Does the insurance cover diabetic patients?",
            "The premium rates for various age groups are given as follows. Age group (<18 years): Premium rate",
        ],
    ]
)

In [144]:
scores

array([  3.8467631, -11.252879 ], dtype=float32)

In [145]:
cross_inputs = [[query, response] for response in results_df["Documents"]]
cross_rerank_scores = cross_encoder.predict(cross_inputs)

In [146]:
cross_rerank_scores

array([-10.665933,  -8.821062, -10.903672,  -9.603291, -10.592499,
       -10.948286,  -6.821826, -11.034771, -10.938913, -10.822127],
      dtype=float32)

In [147]:
results_df["Reranked_scores"] = cross_rerank_scores

In [148]:
results_df

Unnamed: 0,Metadatas,Documents,Distances,IDs,Reranked_scores
0,"{'page_no.': 'Page 19', 'Document_Name': 'Prin...",T he Principal has complete discretion to cons...,0.238446,16,-10.665933
1,{'Document_Name': 'Principal-Sample-Life-Insur...,a . A licensed Doctor of Medicine (M.D.) or Os...,0.239111,10,-8.821062
2,"{'page_no.': 'Page 17', 'Document_Name': 'Prin...",a. be actively engaged in business for profit ...,0.239433,14,-10.903672
3,"{'page_no.': 'Page 3', 'Document_Name': 'Princ...",POLICY RIDER GROUP INSURANCE POLICY NO: S655 C...,0.239679,1,-9.603291
4,{'Document_Name': 'Principal-Sample-Life-Insur...,c . a copy of the form which contains the stat...,0.240216,15,-10.592499
5,{'Document_Name': 'Principal-Sample-Life-Insur...,Section D - Policy Renewal Article 1 - Renewal...,0.245646,22,-10.948286
6,{'Document_Name': 'Principal-Sample-Life-Insur...,"f . claim requirements listed in PART IV, Sect...",0.245844,51,-6.821826
7,"{'page_no.': 'Page 24', 'Document_Name': 'Prin...",T he Principal may terminate the Policyholder'...,0.247299,21,-11.034771
8,"{'page_no.': 'Page 62', 'Document_Name': 'Prin...",A claimant may request an appeal of a claim de...,0.247675,59,-10.938913
9,{'Document_Name': 'Principal-Sample-Life-Insur...,b. a business assignment; or c. full-time stud...,0.249203,34,-10.822127


In [149]:
top_3_semantic = results_df.sort_values(by="Distances")
top_3_semantic[:3]

Unnamed: 0,Metadatas,Documents,Distances,IDs,Reranked_scores
0,"{'page_no.': 'Page 19', 'Document_Name': 'Prin...",T he Principal has complete discretion to cons...,0.238446,16,-10.665933
1,{'Document_Name': 'Principal-Sample-Life-Insur...,a . A licensed Doctor of Medicine (M.D.) or Os...,0.239111,10,-8.821062
2,"{'page_no.': 'Page 17', 'Document_Name': 'Prin...",a. be actively engaged in business for profit ...,0.239433,14,-10.903672


In [150]:
top_3_rerank = results_df.sort_values(by="Reranked_scores", ascending=False)
top_3_rerank[:3]

Unnamed: 0,Metadatas,Documents,Distances,IDs,Reranked_scores
6,{'Document_Name': 'Principal-Sample-Life-Insur...,"f . claim requirements listed in PART IV, Sect...",0.245844,51,-6.821826
1,{'Document_Name': 'Principal-Sample-Life-Insur...,a . A licensed Doctor of Medicine (M.D.) or Os...,0.239111,10,-8.821062
3,"{'page_no.': 'Page 3', 'Document_Name': 'Princ...",POLICY RIDER GROUP INSURANCE POLICY NO: S655 C...,0.239679,1,-9.603291


In [151]:
top_3_RAG = top_3_rerank[["Documents", "Metadatas"]][:3]

In [152]:
top_3_RAG

Unnamed: 0,Documents,Metadatas
6,"f . claim requirements listed in PART IV, Sect...",{'Document_Name': 'Principal-Sample-Life-Insur...
1,a . A licensed Doctor of Medicine (M.D.) or Os...,{'Document_Name': 'Principal-Sample-Life-Insur...
3,POLICY RIDER GROUP INSURANCE POLICY NO: S655 C...,"{'page_no.': 'Page 3', 'Document_Name': 'Princ..."


In [153]:
def generate_response(query, top_3_RAG):
    """
    Generate a response using GPT-3.5's ChatCompletion based on the user query and retrieved information.
    """
    messages = [
        {
            "role": "system",
            "content": "You are a helpful assistant in the insurance domain who can effectively answer user queries about insurance policies and documents.",
        },
        {
            "role": "user",
            "content": f"""You are a helpful assistant in the insurance domain who can effectively answer user queries about insurance policies and documents.
                                                You have a question asked by the user in '{query}' and you have some search results from a corpus of insurance documents in the dataframe '{top_3_RAG}'. These search results are essentially one page of an insurance document that may be relevant to the user query.

                                                The column 'documents' inside this dataframe contains the actual text from the policy document and the column 'metadata' contains the policy name and source page. The text inside the document may also contain tables in the format of a list of lists where each of the nested lists indicates a row.

                                                Use the documents in '{top_3_RAG}' to answer the query '{query}'. Frame an informative answer and also, use the dataframe to return the relevant policy names and page numbers as citations.

                                                Follow the guidelines below when performing the task.
                                                1. Try to provide relevant/accurate numbers if available.
                                                2. You don’t have to necessarily use all the information in the dataframe. Only choose information that is relevant.
                                                3. If the document text has tables with relevant information, please reformat the table and return the final information in a tabular in format.
                                                3. Use the Metadatas columns in the dataframe to retrieve and cite the policy name(s) and page numbers(s) as citation.
                                                4. If you can't provide the complete answer, please also provide any information that will help the user to search specific sections in the relevant cited documents.
                                                5. You are a customer facing assistant, so do not provide any information on internal workings, just answer the query directly.

                                                The generated response should answer the query directly addressing the user and avoiding additional information. If you think that the query is not relevant to the document, reply that the query is irrelevant. Provide the final response as a well-formatted and easily readable text along with the citation. Provide your complete response first with all information, and then provide the citations.
                                                """,
        },
    ]

    response = openai.chat.completions.create(model="gpt-3.5-turbo", messages=messages)

    return response.choices[0].message.content.split("\n")

In [154]:
response = generate_response(query, top_3_RAG)

In [155]:
print("\n".join(response))

The policy on eye issues states that a licensed Doctor of Medicine (M.D.) or Osteopathic Medicine (D.O.) needs to assess the eye condition. The specific claim requirements for eye issues can be found in PART IV, Section F of the insurance document "Principal-Sample-Life-Insurance-Policy." For further details and procedures regarding eye issues, refer to this section for accurate information.

**Citation:**
- Document Name: Principal-Sample-Life-Insurance-Policy
- Relevant Page Number: Page 3


In [156]:
# client.delete_collection('insurance_cache')

In [157]:
# client.delete_collection('RAG_on_Insurance')