In [27]:
# importing the package
from langchain.vectorstores.cassandra import Cassandra
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
from langchain.llms import OpenAI
from langchain.embeddings import OpenAIEmbeddings
import os
from dotenv import load_dotenv

In [2]:
# support for dataset retrievel with Hugging Face
from datasets import load_dataset

# With CassIO, the engine powering the Astra DB integration in langchain.
# you will also initialize the DB connection:
import cassio

In [3]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader

In [8]:
# Replace the following with your Astra DB connection details and your OpenAI API Key.

In [9]:
#  Extract the data frm the PDF
def load_pdf(data):
    loader = DirectoryLoader(data,
                             glob="*.pdf",
                             loader_cls=PyPDFLoader)
    documents=loader.load()
    return documents

In [10]:
extracted = load_pdf(r"C:\Users\amits\Gen-AI-Projects\PDF-Query chatbot\data")

In [11]:
from  langchain.text_splitter import RecursiveCharacterTextSplitter
# Create text chunk
def text_split(extracted):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    text_chunks=text_splitter.split_documents(extracted)
    
    return text_chunks

In [12]:
textchunks = text_split(extracted)

In [13]:
len(textchunks)

3556

In [28]:
load_dotenv()

True

In [29]:
# initialize the connection to your database
cassio.init(token=os.getenv("ASTRADB_TOKEN"), database_id=os.getenv("ASTRADB_ID"))

In [30]:
llm=OpenAI(openai_api_key=os.getenv("OPENAI_API_KEY"))
embedding = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))

In [20]:
# create your langchain vector store .... backed by astra db

In [21]:
astra_vector_store = Cassandra(
    embedding=embedding,
    table_name='qa_mini_demo',
    session=None,
    keyspace=None
)

In [22]:
type(textchunks)

list

In [23]:
astra_vector_store.add_documents(textchunks)

astra_vector_index = VectorStoreIndexWrapper(vectorstore=astra_vector_store)

In [26]:
def query_and_print(query_string):
    result = astra_vector_index.query(query_string, llm=llm).strip()
    print(result)

# Example usage:
query_and_print('Fever symptoms')


Fever over 101°F (38.3°C), chills, malaise, abdominal pain, nausea, vomiting, diarrhea, anxiety, shortness of breath, confusion. Not all of these symptoms are usually present.
