In [1]:
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain_groq import ChatGroq
import warnings
# Suppress all warnings
warnings.filterwarnings('ignore')

In [2]:
TEXT = ["Python is a versatile and widely used programming language known for its clean and readable syntax, which relies on indentation for code structure",
        "It is a general-purpose language suitable for web development, data analysis, AI, machine learning, and automation. Python offers an extensive standard library with modules covering a broad range of tasks, making it efficient for developers.",
        "It is cross-platform, running on Windows, macOS, Linux, and more, allowing for broad application compatibility."
        "Python has a large and active community that develops libraries, provides documentation, and offers support to newcomers.",
        "It has particularly gained popularity in data science and machine learning due to its ease of use and the availability of powerful libraries and frameworks."]

In [3]:
meta_data = [{"source": "document 1", "page": 1},
             {"source": "document 2", "page": 2},
             {"source": "document 3", "page": 3},
             {"source": "document 4", "page": 4}]

In [4]:
embedding_function = SentenceTransformerEmbeddings(
    model_name="all-MiniLM-L6-v2"
)

In [5]:
vector_db = Chroma.from_texts(
    texts=TEXT,
    embedding=embedding_function,
    metadatas=meta_data
)

In [6]:
combine_template = "Write a summary of the following text:\n\n{summaries}"

In [7]:
combine_prompt_template = PromptTemplate.from_template(
    template=combine_template)

In [8]:
question_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""

In [9]:
question_prompt_template = PromptTemplate.from_template(
    template=question_template)

In [10]:
import os
groq_api_key = os.environ["GROQ_API_KEY"]
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"

In [11]:
llm = ChatGroq(model="mixtral-8x7b-32768", temperature=0)

In [12]:
qa_chain = RetrievalQA.from_chain_type(
                                    llm=llm,
                                    retriever=vector_db.as_retriever(
                                    search_kwargs={'fetch_k': 4, 'k': 3}, search_type='mmr'),
                                    return_source_documents=True,
                                    chain_type="map_reduce",
                                    chain_type_kwargs={"question_prompt": question_prompt_template,
                                    "combine_prompt": combine_prompt_template}
                                    )
              

In [13]:
question = "What areas is Python mostly used"

In [14]:
response = qa_chain({"query": question})

In [15]:
print(response)

{'query': 'What areas is Python mostly used', 'result': "Python is a versatile and widely-used programming language, known for its simplicity and extensive libraries. It is commonly employed in areas such as web development, data analysis, artificial intelligence, machine learning, automation, scientific computing, and education. These areas often require cross-platform compatibility, and Python's large, active community provides extensive libraries, documentation, and support. Thanks for asking!", 'source_documents': [Document(page_content='It is a general-purpose language suitable for web development, data analysis, AI, machine learning, and automation. Python offers an extensive standard library with modules covering a broad range of tasks, making it efficient for developers.', metadata={'page': 2, 'source': 'document 2'}), Document(page_content='It is cross-platform, running on Windows, macOS, Linux, and more, allowing for broad application compatibility.Python has a large and acti

In [16]:
print("============================================")
print("====================Result==================")
print("============================================")
print(response["result"])


print("============================================")
print("===============Source Documents============")
print("============================================")

Python is a versatile and widely-used programming language, known for its simplicity and extensive libraries. It is commonly employed in areas such as web development, data analysis, artificial intelligence, machine learning, automation, scientific computing, and education. These areas often require cross-platform compatibility, and Python's large, active community provides extensive libraries, documentation, and support. Thanks for asking!


In [17]:
print(response["source_documents"][0])

page_content='It is a general-purpose language suitable for web development, data analysis, AI, machine learning, and automation. Python offers an extensive standard library with modules covering a broad range of tasks, making it efficient for developers.' metadata={'page': 2, 'source': 'document 2'}
