In [13]:
%%writefile requirements.txt
langchain
langchain-community
llama-parse
fastembed
chromadb
python-dotenv
langchain-groq
chainlit
fastembed
unstructured[md]

Overwriting requirements.txt


In [14]:
!pip install -r requirements.txt



In [15]:
from llama_parse import LlamaParse

parser = LlamaParse(
    api_key="llx",  # can also be set in your env as LLAMA_CLOUD_API_KEY
    result_type="markdown",  # "markdown" and "text" are available
    verbose=True
)

In [16]:
#from google.colab import userdata

#llamaparse_api_key = userdata.get('llx-4bCLqC2vZtQPDWxLnoOKYw4WdtjUZtXu3Vs9HULFa6pyW5ZR')
#groq_api_key = userdata.get("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")

In [17]:
##### LLAMAPARSE #####
from llama_parse import LlamaParse

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.document_loaders import UnstructuredMarkdownLoader
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
#
from groq import Groq
from langchain_groq import ChatGroq
#
import joblib
import os
import nest_asyncio  # noqa: E402
nest_asyncio.apply()

In [18]:
#!mkdir data
#
def load_or_parse_data():
    data_file = "./data/parsed_data.pkl"

    if os.path.exists(data_file):
        # Load the parsed data from the file
        parsed_data = joblib.load(data_file)
    else:
        # Perform the parsing step and store the result in llama_parse_documents
        parsingInstructionUber10k = """The provided document is a based on Yoga Therapy Training - what it is, how it can aid
        in mitigating or curing ailments like Back Pain, Allergies etc..
        Try to be precise while answering the questions"""
        parser = LlamaParse(api_key="llx-4bCLqC2vZtQPDWxLnoOKYw4WdtjUZtXu3Vs9HULFa6pyW5ZR",
                            result_type="markdown",
                            parsing_instruction=parsingInstructionUber10k,
                            max_timeout=5000,)
        llama_parse_documents = parser.load_data("./data/yoga-therapy.pdf")


        # Save the parsed data to a file
        print("Saving the parse results in .pkl format ..........")
        joblib.dump(llama_parse_documents, data_file)

        # Set the parsed data to the variable
        parsed_data = llama_parse_documents

    return parsed_data

In [19]:
# Create vector database
def create_vector_database():
    """
    Creates a vector database using document loaders and embeddings.

    This function loads urls,
    splits the loaded documents into chunks, transforms them into embeddings using OllamaEmbeddings,
    and finally persists the embeddings into a Chroma vector database.

    """
    # Call the function to either load or parse the data
    llama_parse_documents = load_or_parse_data()
    print(llama_parse_documents[0].text[:300])

    with open('data/output.md','a',encoding='utf-8') as f:  # Open the file in append mode ('a')
        for doc in llama_parse_documents:
            f.write(doc.text + '\n')

    markdown_path = "data/output.md"

    # Attempt to read the file with error handling
    try:
        with open(markdown_path, 'r', encoding='utf-8', errors='ignore') as f:
            data = f.read()
    except UnicodeDecodeError as e:
        print(f"Error reading file: {e}")

    # Proceed with loading if no errors
    loader = UnstructuredMarkdownLoader(markdown_path)

   #loader = DirectoryLoader('data/', glob="**/*.md", show_progress=True)
    documents = loader.load()
    # Split loaded documents into chunks
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
    docs = text_splitter.split_documents(documents)

    #len(docs)
    print(f"length of documents loaded: {len(documents)}")
    print(f"total number of document chunks generated :{len(docs)}")
    #docs[0]

    # Initialize Embeddings
    embed_model = FastEmbedEmbeddings(model_name="BAAI/bge-base-en-v1.5")

    # Create and persist a Chroma vector database from the chunked documents
    vs = Chroma.from_documents(
        documents=docs,
        embedding=embed_model,
        persist_directory="chroma_db_llamaparse1",  # Local mode with in-memory storage only
        collection_name="rag"
    )

    #query it
    #query = "what is the agend of Financial Statements for 2022 ?"
    #found_doc = qdrant.similarity_search(query, k=3)
    #print(found_doc[0][:100])
    #print(qdrant.get())

    print('Vector DB created successfully !')
    return vs,embed_model

In [20]:
vs,embed_model = create_vector_database()

#

# Yoga Therapy Training

# Yoga Therapy Training

Yoga Therapy, presented by Nancy Wile at Weston Yoga, is a specialized form of yoga that can aid in mitigating or curing various ailments. It focuses on using yoga practices and techniques to address specific health issues and promote overall well
length of documents loaded: 1
total number of document chunks generated :138


Fetching 8 files: 100%|██████████| 8/8 [00:00<?, ?it/s]


Vector DB created successfully !


In [21]:
chat_model = ChatGroq(temperature=0,
                      model_name="mixtral-8x7b-32768",
                      api_key="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")

# chat_model = ChatGroq(temperature=0,
#                       model_name="mixtral-8x7b-32768",
#                       api_key="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")

In [22]:
vectorstore = Chroma(embedding_function=embed_model,
                    persist_directory="chroma_db_llamaparse1",
                    collection_name="rag")
#
retriever=vectorstore.as_retriever(search_kwargs={'k': 3})

In [23]:
custom_prompt_template = """Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [52]:
def set_custom_prompt():
    """
    Prompt template for QA retrieval for each vectorstore
    """
    prompt = PromptTemplate(template=custom_prompt_template,
                            input_variables=['context', 'question'])
    return prompt
#
prompt = set_custom_prompt()
prompt

########################### RESPONSE ###########################
PromptTemplate(input_variables=['context', 'question'], template=custom_prompt_template)

PromptTemplate(input_variables=['context', 'question'], template="Use the following pieces of information to answer the user's question.\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\n\nContext: {context}\nQuestion: {question}\n\nOnly return the helpful answer below and nothing else.\nHelpful answer:\n")

In [53]:
qa = RetrievalQA.from_chain_type(llm=chat_model,
                               chain_type="stuff",
                               retriever=retriever,
                               return_source_documents=True,
                               chain_type_kwargs={"prompt": prompt})

In [58]:
response = qa.invoke({"query": "I have anxiety"})

In [59]:
print(response['result'])

You can try Viloma Pranayama, a yoga therapy technique that can help relax the body and focus the mind. Here are the steps:

1. Lie straight and relaxed on your back. Close your eyes and lie quietly for a minute.
2. Exhale completely until lungs feel empty.
3. Inhale for 2 or 3 seconds, pause and hold the breath for 2 or 3 seconds.
4. Without exhaling, inhale again for 2 or 3 seconds.
5. Repeat this process until the lungs are completely full.
6. Hold the breath for a second or two.
7. Exhale for 2 or 3 seconds, pause and hold the breath for 2 or 3 seconds.
8. Repeat this process until the lungs feel completely empty.
9. Repeat the inhale and exhale 3-4 more times.
10. After your last exhalation, gradually relax your abdomen.

Additionally, practicing seated forward folds and body scan can also be helpful for anxiety. Seated forward folds can help quiet the mind and body, while the body scan can help relax different body parts.
