# Process The Data 

## Load the needed libraries

In [18]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores.pinecone import Pinecone
from mlflow.deployments import get_deploy_client
from langchain_community.embeddings import MlflowEmbeddings
from domino_data.vectordb import DominoPineconeConfiguration


import pinecone

import os
import random
import warnings
warnings.filterwarnings('ignore')

In [4]:
# Set environment variables
PINECONE_ENV = os.getenv('PINECONE_API_ENV')

### Load and chunk the PDF document 

In [3]:
# Load and chunk PDF using pypdf into documents array 
# where each document contains the page content and metadata with page number.
loader = PyPDFLoader("/mnt/code/data/Northwind_Health_Plus_Benefits_Details.pdf")
texts = loader.load_and_split(RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0))

In [5]:
# Check the number of pages
print(f"There are {len(texts)} pages in the document")

There are 378 pages in the document


In [6]:
# Pick a sample page
print(texts[random.randint(0, len(texts))])

page_content='prior authorizat ion if it is determined that the services are medically necessary. However, \nplease contact Northwind Health Plus to ensure that the outpatient rehabilitative provider \nyou are visiting is an in -network provider.  \nLong -Term Care Services: Long -term care serv ices are covered without prior authorization \nif it is determined that the services are medically necessary. Please contact Northwind \nHealth Plus to ensure that the long -term care provider you are visiting is an in -network \nprovider.  \nHospice Care Services: Hospice care services are covered without prior authorization if it is \ndetermined that the services are medically necessary. Please contact Northwind Health Plus \nto ensure that the hospice care provider you are visiting is an in -network provider.  \nHome Hea lth Services: Home health services are covered without prior authorization if it is \ndetermined that the services are medically necessary. Please contact Northwind Health P

### Create embeddings to embed queries using Domino AI Gateway Endpoint in LangChain

In [19]:
embed = MlflowEmbeddings(
    target_uri=os.environ["DOMINO_MLFLOW_DEPLOYMENTS"],
    endpoint="embedding-ada-002ja2",
)

In [9]:
# Check index current stats:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.00318,
 'namespaces': {'': {'vector_count': 318}},
 'total_vector_count': 318}

### Initialize Pinecone client connection using a Domino-specific Environment

In [8]:
datasource_name = "nwh-benefits"

#Vector Access 
conf = DominoPineconeConfiguration(datasource=datasource_name)

# api_key variable is a mandatory non-empty field used for 
# the native pinecone python client initialization 
api_key = os.environ.get("DOMINO_VECTOR_DB_METADATA", datasource_name)

# Initialize pinecone
pinecone.init(
    api_key=api_key,
    environment=PINECONE_ENV,
    openapi_config=conf
)

# Previously created index
index_name = "nwh-plus-benefits"
index = pinecone.Index(index_name)

In [10]:
#
docsearch = Pinecone.from_texts([t.page_content for t in texts], embed.embed_query, index_name=index_name)

### Test with similarity search

In [17]:
# Ask your query
query = "Does my policy cover hearing?"
# Get the closest matches to create some context and information for the answer
docs = docsearch.similarity_search(query)

In [14]:
print(docs)

[Document(page_content='other expenses that you may incur as a result of an accident. However, it is important to \nnote that UM/UIM coverage only applies when the other driver is at fault, and does not \ncover damage to your vehicle or property.  \nThere are a few exceptions to the coverage provided by UM/UIM coverage. For example, it \ndoes not cover intentional acts of another driver, damage caused by an uninsured or \nunderinsured driver in a hit -and-run accident, or damage to your vehicle or property.  \nIt is important to understand the limits of your UM/UIM coverage and any exclusions that \nmay apply. You should make sure to review your policy thoroughly to ensure that you are \nproperly covered.  \nIn addit ion to understanding the limits of your coverage, there are other tips that you can \nuse to help protect yourself in the event of an accident.  \nFirst, make sure that you have a valid driver’s license and that the other driver does as well.'), Document(page_content='Exce