In [None]:
import os
import bs4
import cassio

from langchain_groq import ChatGroq
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.embeddings import OpenAIEmbeddings
from langchain.vectorstores.cassandra import Cassandra
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

from dotenv import load_dotenv
load_dotenv()

In [None]:
groq_api_key = os.environ['GROQ_API_KEY']

#connection of the ASTRA DB
ASTRA_DB_APPLICATION_TOKEN = "AstraCS:mifTZmAApXkXyN: enter the AstraCS:... string found in in your Token JSON file" 
ASTRA_DB_ID = "31d5fd09-8c1f-c-aee0bda20405"
cassio.init(token = ASTRA_DB_APPLICATION_TOKEN, database_id = ASTRA_DB_ID)

In [None]:
loader = WebBaseLoader(web_paths = ("https://lilianweng.github.io/posts/2023-06-23-agent/",),
                     bs_kwargs = dict(parse_only = bs4.SoupStrainer(
                         class_ = ("post-title", "post-content", "post-header")

                     )))

text_documents = loader.load()
text_documents

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
docs = text_splitter.split_documents(text_documents)
docs[:5]

In [None]:
#converting Data Into Vectors and store in AstraDB


os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
embeddings = OpenAIEmbeddings()
astra_vector_store = Cassandra(
    embedding = embeddings,
    table_name = "qa_mini_demo",
    session = None,
    keyspace = None

)

In [None]:
astra_vector_store.add_documents(docs)
print("Inserted %i headlines." % len(docs))

astra_vector_index = VectorStoreIndexWrapper(vectorstore=astra_vector_store)

In [None]:
llm = ChatGroq(groq_api_key = groq_api_key,
         model_name = "mixtral-8x7b-32768")

prompt = ChatPromptTemplate.from_template(

"""
Answer the following queries based only on the provided context. 
Think step by step before providing a detailed answer. 
I will tip you $1000 if the user finds the answer helpful. 
<context>
{context}
</context>
Question: {input}
"""
)

In [None]:
astra_vector_index.query("Chain of thought (CoT; Wei et al. 2022) has become a standard prompting technique", llm = llm)

In [None]:
retriever = astra_vector_store.as_retriever()
document_chain = create_stuff_documents_chain(llm, prompt)
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [None]:
response = retrieval_chain.invoke({"input":"Chain of thought (CoT; Wei et al. 2022) has become a standard prompting technique"})
response

In [None]:
response["answer"]