In [75]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [76]:
# Extract data from pdf
def load_pdf_file(data):
    loader = DirectoryLoader(data, glob='*.pdf', loader_cls=PyPDFLoader)
    return loader.load()

In [77]:
extract_data = load_pdf_file(data='../Data/')

In [78]:
#split text
def text_split(extract_data):
    splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=500, chunk_overlap=20)
    return splitter.split_documents(extract_data)

In [79]:
text_chunks=text_split(extract_data)
print("LENGTH OF TEXT CHUNKS: ", len(text_chunks))

LENGTH OF TEXT CHUNKS:  800


In [80]:
from langchain.embeddings import HuggingFaceEmbeddings

In [81]:
def download_huggingface_embedding():
    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    return embeddings

In [82]:
embedding = download_huggingface_embedding()

In [83]:
query = "Hello world"
query_embedding = embedding.embed_query(query)
print("QUERY EMBEDDING: ", len(query_embedding))

QUERY EMBEDDING:  384


In [84]:
import cassio
from dotenv import load_dotenv
import os
load_dotenv()

ASTRA_DB_APPLICATION_TOKEN = os.getenv("ASTRA_DB_APPLICATION_TOKEN")
ASTRA_DB_ID_MULTI_AGENT = os.getenv("ASTRA_DB_ID_MULTI_AGENT")

ASTRA_DB_ID_MULTI_AGENT

'347103f4-38c3-48b1-99f5-e8fefdcd37f3'

In [85]:
cassio.init(token=ASTRA_DB_APPLICATION_TOKEN, database_id=ASTRA_DB_ID_MULTI_AGENT)

In [86]:
from langchain.vectorstores.cassandra import Cassandra

In [87]:
astra_vector_store = Cassandra(
    embedding=embedding,
    table_name="medi_tbl",
    session=None,
    keyspace=None
)

In [88]:
astra_vector_store.add_documents(documents=text_chunks)

['00ad4153f6e847928561617a0b8cecab',
 '1bb873adb0574fdea653df8de3631bbb',
 '8a447bd3a5d8468883dd3b61b16b2a92',
 '9ad3de4b4cb8463fa339b12f6d1b4342',
 '8b0e62a23ba949678ad698481d8fad1b',
 '5e642c5522fb4f82b436eb4157ea3864',
 'ab939a794e4046c2a24ba168a4905881',
 'e483f16d782a4019bd2d9ca54e19e4d5',
 '3d5c8cfe1d714bd8845918e3dfd9942f',
 '030d7befe91d41bd878f202053738dc7',
 '882dc338104b49c58a18f6747c10e42f',
 'ef01a308f5eb4529969e6b896fd86665',
 '3a4a53d89f394e33b705434fcd8c39a1',
 'e7ee41084bb647ac8032c4b47bc19355',
 'c2db8e35a2384ac1bedfe779daaba5ed',
 'f13538ea68104546bf87382f63e6894b',
 '3a690c4b20c74856988f2fbe23fc7492',
 '18966b00f8584459bf6c9fd35612c28e',
 '38c876f6bfb549bf9bcf6cd3b2dfb2ed',
 'cfa7df8672c64db2b80a1d46935b75e3',
 '8d84d69c745143668c11e2869fa31aae',
 '1eadb005a26c4d9dac6c004b2f1bc2a8',
 'e405ee3ad25b4a139594f0d51eeb439a',
 '0328d20f8ca4449d9b425fb6968398c6',
 'f2c9640b642c4b728b811750f3ed4484',
 '432a692623d9440ab87373ed3265b137',
 '9d0f05b146bf4dc29fb164e9af895c0f',
 

In [89]:
retriever = astra_vector_store.as_retriever()

In [90]:
query = "what is Transsphenoidal ?"
retriever.invoke(input=query, top_k=5)

[Document(id='03f94a476289404cb9aabf78160c52a6', metadata={'author': 'Tierney, Lawrence M.; Saint, Sanjay.; Whooley, Mary A.', 'creationdate': '2010-12-22T13:15:56+00:00', 'creator': 'PyPDF', 'keywords': '0071637907\r\n9780071637909', 'moddate': '2018-08-02T09:28:12+05:30', 'page': '185.0', 'page_label': '173', 'producer': 'PyPDF', 'source': '../Data/Current Essentials of Medicine(1)(1).pdf', 'subject': 'McGraw-Hill Medical', 'title': 'Current Essentials of Medicine', 'total_pages': '607.0'}, page_content='it is an antibody to phospholipid, which produces in vitro prolongation\nof the PTT in many cases.\nReference\nFrancis L, Perl A. Pharmacotherapy of systemic lupus erythematosus. Expert Opin\nPharmacother 2009;10:1481. [PMID: 19505215]'),
 Document(id='f9f865b4fa06456b95572a715588da73', metadata={'author': 'Tierney, Lawrence M.; Saint, Sanjay.; Whooley, Mary A.', 'creationdate': '2010-12-22T13:15:56+00:00', 'creator': 'PyPDF', 'keywords': '0071637907\r\n9780071637909', 'moddate': '20

In [91]:
from langchain_openai import ChatOpenAI
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

print(OPENAI_API_KEY)

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


In [95]:
llm = ChatOpenAI(api_key=OPENAI_API_KEY, model="gpt-4o", temperature=0.5, max_completion_tokens=500)

In [97]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

In [98]:
 sytem_prompt=(
        "You are an assistent for question and answer the task"
        "use the following piece of retrieved context to answer"
        "the question , you don't now the answer, say that you"
        "don't know . Use three sentences maximum and keep the "
        "answer concise"
        "\n\n"
        "{context}"
    )

In [100]:
prompt  = ChatPromptTemplate.from_messages(
    [
        ("system",sytem_prompt),
        ("human", "{input}")
    ]
)

In [105]:
question_answer_chain = create_stuff_documents_chain(llm=llm, prompt=prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [110]:
response = rag_chain.invoke({"input": "What is Psychiatric Disorders ?"})

In [111]:
response["answer"]

"Psychiatric disorders are mental health conditions characterized by disturbances in thought, emotion, or behavior that cause significant distress or impairment in functioning. They include a wide range of conditions such as mood disorders, anxiety disorders, psychotic disorders, and personality disorders. Diagnosis often involves assessing symptoms, history, and the impact on the individual's life."