In [1]:
!pip install -q -U google-generativeai qdrant-client cohere streamlit python-dotenv langchain langchain-google-genai langchain-community

In [2]:
import os
import google.generativeai as genai
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Qdrant
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
import qdrant_client
from dotenv import load_dotenv
import cohere

# Load environment variables
load_dotenv()
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
cohere_api_key = os.getenv("COHERE_API_KEY")
qdrant_api_key = os.getenv("QDRANT_API_KEY")
qdrant_url = os.getenv("QDRANT_URL")


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Data Ingestion and Chunking
def process_and_chunk_data(text_data, chunk_size=1000, chunk_overlap=100):
    """
    Processes and chunks the text data.
    """
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap
    )
    docs = text_splitter.split_text(text_data)
    
    # Create metadata
    metadatas = [{
        "source": f"chunk-{i}",
        "title": "User Uploaded Data",
        "position": i
    } for i in range(len(docs))]
    
    return docs, metadatas

# Example usage (we will use this later in the Streamlit app)
# sample_text = "Your long text document goes here..."
# documents, metadatas = process_and_chunk_data(sample_text)
# print(f"Number of chunks: {len(documents)}")
# print(documents[0])
# print(metadatas[0])

In [None]:
# Vector Store (Qdrant)
def get_qdrant_vector_store(collection_name="user-data-collection"):
    """
    Initializes and returns a Qdrant vector store.
    """
    client = qdrant_client.QdrantClient(
        qdrant_url,
        api_key=qdrant_api_key
    )
    
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

    # Check if collection exists, if not create it
    try:
        client.get_collection(collection_name=collection_name)
    except Exception as e:
        # If collection does not exist, create it
        client.recreate_collection(
            collection_name=collection_name,
            vectors_config=qdrant_client.http.models.VectorParams(size=768, distance=qdrant_client.http.models.Distance.COSINE)
        )
        print(f"Collection '{collection_name}' created.")

    vector_store = Qdrant(
        client=client,
        collection_name=collection_name,
        embeddings=embeddings,
    )
    
    return vector_store

def upsert_data_to_qdrant(documents, metadatas, collection_name="user-data-collection", batch_size=32):
    """
    Upserts documents and metadatas to the Qdrant collection in batches.
    """
    vector_store = get_qdrant_vector_store(collection_name)
    for i in range(0, len(documents), batch_size):
        batch_docs = documents[i:i+batch_size]
        batch_metadatas = metadatas[i:i+batch_size]
        vector_store.add_texts(batch_docs, batch_metadatas)
    print(f"Upserted {len(documents)} chunks to Qdrant collection '{collection_name}'.")

# Example usage (we will use this later in the Streamlit app)
# upsert_data_to_qdrant(documents, metadatas)

In [5]:
# Retriever and Reranker
def get_retriever(collection_name="user-data-collection", top_k=5):
    """
    Initializes and returns a retriever for the Qdrant vector store.
    """
    vector_store = get_qdrant_vector_store(collection_name)
    retriever = vector_store.as_retriever(search_kwargs={"k": top_k})
    return retriever

def rerank_documents(query, documents, top_n=3):
    """
    Reranks the retrieved documents using Cohere's rerank API.
    """
    co = cohere.Client(cohere_api_key)
    
    docs = [doc.page_content for doc in documents]
    results = co.rerank(query=query, documents=docs, top_n=top_n, model="rerank-english-v2.0")
    
    reranked_docs = [documents[result.index] for result in results.results]
    return reranked_docs

# Example usage (we will use this later in the Streamlit app)
# retriever = get_retriever()
# retrieved_docs = retriever.get_relevant_documents("your query")
# reranked_docs = rerank_documents("your query", retrieved_docs)
# for i, doc in enumerate(reranked_docs):
#     print(f"Reranked Doc {i+1}:")
#     print(doc.page_content)
#     print(doc.metadata)
#     print("-" * 20)

In [6]:
# LLM and Answering
def get_conversational_chain():
    """
    Initializes and returns a conversational retrieval QA chain.
    """
    prompt_template = """
    Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
    provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
    Context:\n {context}?\n
    Question: \n{question}\n

    Answer:
    """
    
    model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
    
    prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
    chain = RetrievalQA.from_chain_type(
        llm=model,
        chain_type="stuff",
        retriever=get_retriever(),
        chain_type_kwargs={"prompt": prompt},
        return_source_documents=True
    )
    
    return chain

def generate_response(query, chain):
    """
    Generates a response to the user's query using the conversational chain.
    """
    result = chain({"query": query})
    
    answer = result["result"]
    source_documents = result["source_documents"]
    
    # Create citations
    citations = []
    for i, doc in enumerate(source_documents):
        citation = f"[{i+1}] {doc.metadata.get('title', 'Unknown Title')} (chunk {doc.metadata.get('position', 'N/A')})"
        citations.append(citation)
        
    # Add citations to the answer
    full_response = f"{answer}\n\n**Sources:**\n" + "\n".join(citations)
    
    return full_response

# Example usage (we will use this later in the Streamlit app)
# chain = get_conversational_chain()
# response = generate_response("your query", chain)
# print(response)

In [7]:
%%writefile app.py
import streamlit as st
import os
import time
import google.generativeai as genai
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Qdrant
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
import qdrant_client
from dotenv import load_dotenv
import cohere

# --- Helper Functions ---

# Load environment variables
load_dotenv()
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
cohere_api_key = os.getenv("COHERE_API_KEY")
qdrant_api_key = os.getenv("QDRANT_API_KEY")
qdrant_url = os.getenv("QDRANT_URL")

# Data Ingestion and Chunking
def process_and_chunk_data(text_data, chunk_size=1000, chunk_overlap=100):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    docs = text_splitter.split_text(text_data)
    metadatas = [{"source": f"chunk-{i}", "title": "User Uploaded Data", "position": i} for i in range(len(docs))]
    return docs, metadatas

# Vector Store (Qdrant)
def get_qdrant_vector_store(collection_name="user-data-collection"):
    client = qdrant_client.QdrantClient(qdrant_url, api_key=qdrant_api_key)
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    vector_store = Qdrant(client=client, collection_name=collection_name, embeddings=embeddings)
    return vector_store

def upsert_data_to_qdrant(documents, metadatas, collection_name="user-data-collection"):
    vector_store = get_qdrant_vector_store(collection_name)
    vector_store.add_texts(documents, metadatas)

# Retriever and Reranker
def get_retriever(collection_name="user-data-collection", top_k=5):
    vector_store = get_qdrant_vector_store(collection_name)
    return vector_store.as_retriever(search_kwargs={"k": top_k})

def rerank_documents(query, documents, top_n=3):
    co = cohere.Client(cohere_api_key)
    docs = [doc.page_content for doc in documents]
    results = co.rerank(query=query, documents=docs, top_n=top_n, model="rerank-english-v2.0")
    return [documents[result.index] for result in results.results]

# LLM and Answering
def get_conversational_chain():
    prompt_template = """
    Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
    provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
    Context:\n {context}?\n
    Question: \n{question}\n

    Answer:
    """
    model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
    prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
    chain = RetrievalQA.from_chain_type(llm=model, chain_type="stuff", retriever=get_retriever(), chain_type_kwargs={"prompt": prompt}, return_source_documents=True)
    return chain

def generate_response(query, chain):
    result = chain({"query": query})
    answer = result["result"]
    source_documents = result["source_documents"]
    
    citations = [f"[{i+1}] {doc.metadata.get('title', 'Unknown Title')} (chunk {doc.metadata.get('position', 'N/A')})" for i, doc in enumerate(source_documents)]
    full_response = f"{answer}\n\n**Sources:**\n" + "\n".join(citations)
    
    return full_response, source_documents

# --- Streamlit App ---

st.set_page_config(page_title="RAG Chatbot", layout="wide")
st.title("RAG Chatbot: Qdrant, Gemini, and Cohere")

# --- Sidebar for Data Ingestion ---
with st.sidebar:
    st.header("Data Ingestion")
    uploaded_file = st.file_uploader("Upload a text file", type=["txt"])
    text_input = st.text_area("Or paste your text here", height=200)
    
    if st.button("Process and Store Data"):
        if uploaded_file is not None:
            text_data = uploaded_file.read().decode("utf-8")
            st.info("Processing uploaded file.")
        elif text_input.strip():
            text_data = text_input
            st.info("Processing pasted text.")
        else:
            st.warning("Please upload a file or paste text to process.")
            st.stop()

        with st.spinner("Chunking and upserting data to Qdrant..."):
            start_time = time.time()
            documents, metadatas = process_and_chunk_data(text_data)
            upsert_data_to_qdrant(documents, metadatas)
            end_time = time.time()
            
            st.success(f"Successfully processed and stored {len(documents)} chunks in {end_time - start_time:.2f} seconds.")
            st.session_state.data_processed = True

# --- Main Chat Interface ---
st.header("Ask Questions")

if "messages" not in st.session_state:
    st.session_state.messages = []

if "data_processed" not in st.session_state:
    st.session_state.data_processed = False

if not st.session_state.data_processed:
    st.warning("Please process some data first using the sidebar.")

for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

if query := st.chat_input("What is your question?"):
    if not st.session_state.data_processed:
        st.warning("Please process data before asking questions.")
        st.stop()

    st.session_state.messages.append({"role": "user", "content": query})
    with st.chat_message("user"):
        st.markdown(query)

    with st.chat_message("assistant"):
        with st.spinner("Thinking..."):
            start_time = time.time()
            
            # Retrieval
            retriever = get_retriever()
            retrieved_docs = retriever.get_relevant_documents(query)
            
            # Reranking
            reranked_docs = rerank_documents(query, retrieved_docs)
            
            # Answering
            chain = get_conversational_chain()
            # We need to pass the reranked docs as context to the chain
            # The current chain setup doesn't directly support this.
            # Let's modify how we generate the response.
            
            context = "\n\n".join([doc.page_content for doc in reranked_docs])
            prompt_template = """
            Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
            provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
            Context:\n {context}?\n
            Question: \n{question}\n

            Answer:
            """
            model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
            prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
            
            from langchain.chains.llm import LLMChain
            llm_chain = LLMChain(llm=model, prompt=prompt)
            answer = llm_chain.run({"context": context, "question": query})

            # Create citations from reranked docs
            citations = [f"[{i+1}] {doc.metadata.get('title', 'Unknown Title')} (chunk {doc.metadata.get('position', 'N/A')})" for i, doc in enumerate(reranked_docs)]
            full_response = f"{answer}\n\n**Sources:**\n" + "\n".join(citations)

            end_time = time.time()

            st.markdown(full_response)
            
            # Display timing and cost (rough estimate)
            st.info(f"Response generated in {end_time - start_time:.2f} seconds.")
            # Rough token count and cost - replace with actuals if needed
            # This is a very rough estimation
            input_tokens = len(query.split()) + len(context.split())
            output_tokens = len(answer.split())
            st.info(f"Estimated tokens: {input_tokens + output_tokens}")


    st.session_state.messages.append({"role": "assistant", "content": full_response})

    with st.expander("Show Reranked Sources"):
        for i, doc in enumerate(reranked_docs):
            st.write(f"**Source [{i+1}]**")
            st.write(f"**Title:** {doc.metadata.get('title', 'N/A')}")
            st.write(f"**Chunk Position:** {doc.metadata.get('position', 'N/A')}")
            st.write(doc.page_content)
            st.divider()


Writing app.py
