In [1]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain import HuggingFaceHub
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import DirectoryLoader
import os 
from dotenv import load_dotenv

In [2]:
loader = DirectoryLoader('PDF_file', glob="./*.pdf", loader_cls=PyPDFLoader)
documents = loader.load()

In [3]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(documents)

In [4]:
len(texts)

69

In [5]:
texts[3]

Document(page_content='both concurrently and  longitudinally6. This suggests that social anxiety may contribute to poorer outcomes in \nindividuals with psychosis, even when present at subclinical levels.\nParanoia, the exaggerated belief that intentional harm is done or will be done by  others7, is a common symp-\ntom of psychosis. Paranoia can manifest in milder forms as ideas of social reference or more severe forms as \npersecutory delusions 8. Albeit being distinct phenomena, paranoia and social anxiety are both characterized by \nappraisals of social threat: paranoia concerns imminent and ongoing physical, psychological or social harms by \n others7, whereas social anxiety reflects worry about rejection, embarrassment and  scrutiny9. Among individuals \nwith first-episode psychosis, those with comorbid social anxiety disorder reported more persecutory threats than \nthose without  comorbidity3. Across non-patient and community samples, correlations between subclinical levels', me

In [6]:
_ = load_dotenv()

HUGGINGFACEHUB_API_TOKEN = os.environ["HUGGINGFACEHUB_API_TOKEN"]

llm=HuggingFaceHub(
    repo_id="HuggingFaceH4/zephyr-7b-beta", 
    model_kwargs={"temperature":0.2, "max_length":256},
    huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN
    )

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
from langchain.embeddings import HuggingFaceInstructEmbeddings

instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="BAAI/bge-base-en-v1.5",
                                                      model_kwargs={"device": "cuda"})

load INSTRUCTOR_Transformer
max_seq_length  512


In [8]:
persist_directory = 'db_HuggingFace'

embedding = instructor_embeddings

vectordb = Chroma.from_documents(documents=texts,
                                 embedding=embedding,
                                 persist_directory=persist_directory)

In [16]:
retriever = vectordb.as_retriever(search_kwargs={"k": 2})
docs = retriever.get_relevant_documents("What is paranoia?")

In [10]:
len(docs)

2

In [None]:
qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                  chain_type="stuff",
                                  retriever=retriever,
                                  return_source_documents=True)

In [22]:
def process_llm_response(llm_response):
    print(llm_response['result'])
    print(llm_response['source_documents'][0].metadata)

In [23]:
query = "What is paranoia?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

 Paranoia refers to a set of beliefs or thoughts that are irrational, suspicious, and often unfounded. It can range from mild to severe, and may involve feelings of being watched, followed, or persecuted. Paranoia is often associated with mental health conditions such as schizophrenia, anxiety disorders, and personality disorders. Research has shown that paranoia is related to factors such as social anxiety, shame, and negative beliefs about oneself and others. The study by Humphrey et al. (2021) suggests that paranoia is also associated with negative schema about the self and others. Further research is needed to better understand the causes and treatment of paranoia.
{'page': 8, 'source': 'PDF_file\\s41598-023-47912-0.pdf'}


In [None]:
# break it down
query = "How many young adults took part in this?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
query = "How do they measure Momentary social anxiety?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
query = "What is their data collection method?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
query = "What is ESM?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
query = "What is the result of this study?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
query = "What is the limitations of the current study?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
query = "What is the hypothesis of the study?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
query = "What is the final sample size of the study?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
query = "Where did the study take place?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
qa_chain.retriever.search_type , qa_chain.retriever.vectorstore

In [None]:
print(qa_chain.combine_documents_chain.llm_chain.prompt.template)

In [None]:
print(qa_chain.combine_documents_chain.llm_chain.prompt.messages[0].prompt.template)

In [None]:
print(qa_chain.combine_documents_chain.llm_chain.prompt.messages[1].prompt.template)