In [2]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers
from langchain.vectorstores import Weaviate
import weaviate
import os
import weaviate.classes as wvc
import requests

In [3]:
#Extract data from the PDF
def load_pdf(data):
    loader = DirectoryLoader(data,
                    glob="*.pdf",
                    loader_cls=PyPDFLoader)
    
    documents = loader.load()

    return documents

In [4]:
extracted_data = load_pdf("data/")

In [5]:
#Create text chunks
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(extracted_data)

    return text_chunks

In [6]:
text_chunks = text_split(extracted_data)
print("length of my chunk:", len(text_chunks))

length of my chunk: 62


In [7]:
#download embedding model
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [8]:
embeddings = download_hugging_face_embeddings()

  warn_deprecated(
  from tqdm.autonotebook import tqdm, trange


In [9]:
query_result = embeddings.embed_query("Hello world")
print("Length", len(query_result))


Length 384


In [10]:
docsearch= Weaviate.from_texts([t.page_content for t in text_chunks], embeddings, weaviate_url=INSERT_URL,weaviate_api_key=INSERT_API_KEY)

            your code to use Python client v4 `weaviate.WeaviateClient` connections and methods.

            For Python Client v4 usage, see: https://weaviate.io/developers/weaviate/client-libraries/python
            For code migration, see: https://weaviate.io/developers/weaviate/client-libraries/python/v3_v4_migration
            


In [11]:
query = "What is mental health"

docs=docsearch.similarity_search(query, k=3)

print("Result", docs)

Result [Document(page_content='Participating in educational and support groups for caregivers is a good way to manage your stress levels.  You are likely to meet others who are in a similar situation as you, and you can share and find ways to overcome your problems together.\nKeep up-to-date with knowledge about mental illnesses \nKeeping yourself updated with the latest information about mental illnesses can enable you to be an active'), Document(page_content='Common Challenges\nDifficulties in caring for a loved one with mental illness'), Document(page_content='DEAR CAREGIVER,\nWe understand that caring for your loved one with mental \nillness is not easy. As a caregiver, you may lack the necessary support or knowledge on how to manage your loved one’s symptoms or challenges as a result of the illness. Over time, you are at risk of feeling distressed and burnt out.\nThis Caregivers’ Guide is produced with the aim of giving you \nbasic but useful information on how to care for yoursel

In [12]:
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [13]:
PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs={"prompt": PROMPT}

In [14]:
llm=CTransformers(model="model/llama-2-7b-chat.ggmlv3.q2_K.bin",
                  model_type="llama",
                  config={'max_new_tokens':512,
                          'temperature':0.8})

In [15]:
qa=RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=docsearch.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True, 
    chain_type_kwargs=chain_type_kwargs)

In [16]:
while True:
    user_input=input(f"Input Prompt:")
    result=qa({"query": user_input})
    print("Response : ", result["result"])