Collecting langchain-community
  Downloading langchain_community-0.3.27-py3-none-any.whl.metadata (2.9 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.10.1-py3-none-any.whl.metadata (3.4 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.1-py3-none-any.whl.metadata (9.4 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading mypy_extensions-1.1.0-py3-n

Collecting langchain-qdrant
  Downloading langchain_qdrant-0.2.0-py3-none-any.whl.metadata (1.8 kB)
Collecting qdrant-client
  Downloading qdrant_client-1.14.3-py3-none-any.whl.metadata (10 kB)
Collecting portalocker<3.0.0,>=2.7.0 (from qdrant-client)
  Downloading portalocker-2.10.1-py3-none-any.whl.metadata (8.5 kB)
Downloading langchain_qdrant-0.2.0-py3-none-any.whl (23 kB)
Downloading qdrant_client-1.14.3-py3-none-any.whl (328 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m329.0/329.0 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading portalocker-2.10.1-py3-none-any.whl (18 kB)
Installing collected packages: portalocker, qdrant-client, langchain-qdrant
Successfully installed langchain-qdrant-0.2.0 portalocker-2.10.1 qdrant-client-1.14.3


Collecting streamlit
  Downloading streamlit-1.46.1-py3-none-any.whl.metadata (9.0 kB)
Collecting chromadb
  Downloading chromadb-1.0.15-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.0 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Downloading pybase64-1.4.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.4 kB)
Collecting posthog<6.0.0,>=2.4.0 (from chromadb)
  Downloading posthog-5.4.0-py3-none-any.whl.metadata (5.7 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.22.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28

In [8]:
import os
from fastapi import FastAPI, Depends, HTTPException, status
from fastapi.security import OAuth2PasswordBearer
from pydantic import BaseModel
from dotenv import load_dotenv
from typing import List

from sentence_transformers import SentenceTransformer
from langchain.vectorstores import Qdrant
from langchain.embeddings import HuggingFaceEmbeddings
from qdrant_client import QdrantClient
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline


In [4]:


# Load Mistral 8B
model_name = "mistralai/Mistral-8B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
mistral_pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512)

# --- User & Role Setup (same as before) ---
USER_DB = {...}
ROLE_COLLECTIONS = {...}

app = FastAPI()
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")

def authenticate_user(token: str = Depends(oauth2_scheme)):
    username = token
    user = USER_DB.get(username)
    if not user:
        raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid credentials")
    return {"username": username, "role": user["role"]}

class QueryRequest(BaseModel):
    query: str

@app.post("/query")
def query_rag(request: QueryRequest, user=Depends(authenticate_user)):
    role = user["role"]
    allowed_collections = ROLE_COLLECTIONS.get(role, [])
    if not allowed_collections:
        raise HTTPException(status_code=403, detail="No access to any data")

    all_docs = []
    for collection in allowed_collections:
        vectorstore = Chroma(persist_directory=f"./db/{collection}", embedding_function=OpenAIEmbeddings())
        retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
        docs = retriever.get_relevant_documents(request.query)
        all_docs.extend(docs)

    if not all_docs:
        return {"answer": "No relevant data found for your role.", "sources": []}

    context = "\n".join([doc.page_content for doc in all_docs])
    sources = [doc.metadata.get("source", "unknown") for doc in all_docs]

    prompt = (
        f"Answer the following question using only the provided context. Cite sources.\n\n"
        f"Context:\n{context}\n\nQuestion: {request.query}\nAnswer:"
    )
    # Use Mistral 8B for generation
    mistral_output = mistral_pipe(prompt)[0]['generated_text'].split("Answer:")[-1].strip()
    return {"answer": mistral_output, "sources": sources}


Collecting python-dotenv
  Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 kB)
Downloading python_dotenv-1.1.1-py3-none-any.whl (20 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.1.1


ModuleNotFoundError: Module langchain_community.vectorstores not found. Please install langchain-community to access this module. You can install it using `pip install -U langchain-community`

In [None]:
# -*- coding: utf-8 -*-
# RBAC RAG Chatbot: FastAPI backend + Streamlit UI (simplified)

# Install dependencies:
# !pip install fastapi uvicorn streamlit langchain chromadb python-dotenv openai

import os
from fastapi import FastAPI, Depends, HTTPException, status
from fastapi.security import OAuth2PasswordBearer
from pydantic import BaseModel
from dotenv import load_dotenv
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from typing import List

load_dotenv()
app = FastAPI()
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")

# --- Mock user-role mapping ---
USER_DB = {
    "alice": {"password": "finance123", "role": "finance"},
    "bob": {"password": "marketing123", "role": "marketing"},
    "carol": {"password": "hr123", "role": "hr"},
    "dan": {"password": "eng123", "role": "engineering"},
    "eve": {"password": "exec123", "role": "c_level"},
    "frank": {"password": "emp123", "role": "employee"},
}

ROLE_COLLECTIONS = {
    "finance": ["finance_docs"],
    "marketing": ["marketing_docs"],
    "hr": ["hr_docs"],
    "engineering": ["engineering_docs"],
    "c_level": ["finance_docs", "marketing_docs", "hr_docs", "engineering_docs", "general_docs"],
    "employee": ["general_docs"],
}

# --- Authentication & Role Assignment ---
def authenticate_user(token: str = Depends(oauth2_scheme)):
    # In production, use JWT or OAuth2
    username = token  # For demo, token is username
    user = USER_DB.get(username)
    if not user:
        raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid credentials")
    return {"username": username, "role": user["role"]}

# --- Data Retrieval & RAG Pipeline ---
class QueryRequest(BaseModel):
    query: str

@app.post("/query")
def query_rag(request: QueryRequest, user=Depends(authenticate_user)):
    role = user["role"]
    allowed_collections = ROLE_COLLECTIONS.get(role, [])
    if not allowed_collections:
        raise HTTPException(status_code=403, detail="No access to any data")

    # Example: Use Chroma vectorstore for each collection
    all_docs = []
    for collection in allowed_collections:
        vectorstore = Chroma(persist_directory=f"./db/{collection}", embedding_function=OpenAIEmbeddings())
        retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
        docs = retriever.get_relevant_documents(request.query)
        all_docs.extend(docs)

    if not all_docs:
        return {"answer": "No relevant data found for your role.", "sources": []}

    # Build context for LLM
    context = "\n".join([doc.page_content for doc in all_docs])
    sources = [doc.metadata.get("source", "unknown") for doc in all_docs]

    llm = OpenAI(model="gpt-3.5-turbo", temperature=0)
    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=None)
    answer = qa_chain.combine_documents_chain.llm_chain.llm(
        f"Answer the following question using only the provided context. Cite sources.\n\nContext:\n{context}\n\nQuestion: {request.query}\nAnswer:"
    )
    return {"answer": answer, "sources": sources}

# --- Streamlit UI (run separately as streamlit_app.py) ---
# Save this as streamlit_app.py and run: streamlit run streamlit_app.py

"""
import streamlit as st
import requests

st.title("FinSolve RBAC RAG Chatbot")

username = st.text_input("Username")
password = st.text_input("Password", type="password")
query = st.text_input("Ask your question:")

if st.button("Submit"):
    # For demo, token is username
    headers = {"Authorization": f"Bearer {username}"}
    response = requests.post(
        "http://localhost:8000/query",
        json={"query": query},
        headers=headers
    )
    if response.status_code == 200:
        data = response.json()
        st.markdown(f"**Answer:** {data['answer']}")
        st.markdown(f"**Sources:** {', '.join(data['sources'])}")
    else:
        st.error(response.json().get("detail", "Error"))
"""

# To run backend: uvicorn main:app --reload
# To run frontend: streamlit run streamlit_app.py


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [1]:
# Install dependencies
!pip install -q fastapi uvicorn streamlit chromadb transformers accelerate langchain langchain-community langchain-qdrant qdrant-client pyngrok


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m47.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.5/19.5 MB[0m [31m45.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m44.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m329.0/329.0 kB[0m [31m25.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m284.2/284.2 kB[0m [31m16.0 MB/s[0m eta [3

In [5]:
import os
from fastapi import FastAPI, Depends, HTTPException, status
from fastapi.security import OAuth2PasswordBearer
from pydantic import BaseModel

from dotenv import load_dotenv
from typing import List

from sentence_transformers import SentenceTransformer
from langchain.vectorstores import Qdrant
from langchain.embeddings import HuggingFaceEmbeddings
from qdrant_client import QdrantClient
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline


In [9]:
from huggingface_hub import login
login(new_session=True)

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [6]:
# MiniLM for embeddings
embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)


  embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [7]:
qdrant_url = "http://localhost:6333"

# Example: dynamically select collection per user role
def get_vectorstore(collection_name):
    return Qdrant.from_existing_collection(
        embeddings=embeddings,
        collection_name=collection_name,
        url=qdrant_url
    )


In [10]:
model_name = "mistralai/Mistral-7B-Instruct-v0.3"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
mistral_pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512)


tokenizer_config.json:   0%|          | 0.00/141k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/587k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/601 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.55G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Device set to use cuda:0


In [11]:
USER_DB = {
    "alice": {"password": "finance123", "role": "finance"},
    "bob": {"password": "marketing123", "role": "marketing"},
    "carol": {"password": "hr123", "role": "hr"},
    "dan": {"password": "eng123", "role": "engineering"},
    "eve": {"password": "exec123", "role": "c_level"},
    "frank": {"password": "emp123", "role": "employee"},
}

ROLE_COLLECTIONS = {
    "finance": ["finance_docs"],
    "marketing": ["marketing_docs"],
    "hr": ["hr_docs"],
    "engineering": ["engineering_docs"],
    "c_level": ["finance_docs", "marketing_docs", "hr_docs", "engineering_docs", "general_docs"],
    "employee": ["general_docs"],
}

In [12]:


app = FastAPI()
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")

def authenticate_user(token: str = Depends(oauth2_scheme)):
    username = token
    user = USER_DB.get(username)
    if not user:
        raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid credentials")
    return {"username": username, "role": user["role"]}

class QueryRequest(BaseModel):
    query: str


In [13]:
@app.post("/query")
def query_rag(request: QueryRequest, user=Depends(authenticate_user)):
    role = user["role"]
    allowed_collections = ROLE_COLLECTIONS.get(role, [])
    if not allowed_collections:
        raise HTTPException(status_code=403, detail="No access to any data")

    all_docs = []
    for collection in allowed_collections:
        vectorstore = get_vectorstore(collection)
        retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
        docs = retriever.get_relevant_documents(request.query)
        all_docs.extend(docs)

    if not all_docs:
        return {"answer": "No relevant data found for your role.", "sources": []}

    context = "\n".join([doc.page_content for doc in all_docs])
    sources = [doc.metadata.get("source", "unknown") for doc in all_docs]

    prompt = (
        f"Answer the following question using only the provided context. Cite sources.\n\n"
        f"Context:\n{context}\n\nQuestion: {request.query}\nAnswer:"
    )
    mistral_output = mistral_pipe(prompt)[0]['generated_text'].split("Answer:")[-1].strip()
    return {"answer": mistral_output, "sources": sources}


In [None]:
#!pip install pyngrok


In [14]:
from pyngrok import ngrok
ngrok.set_auth_token("2zlAbCICGR52XgeZBMhf9IqaSjc_3p5gELGQRhrZhpKZ5oLh5")




In [15]:
from pyngrok import ngrok

# Open a tunnel on the port your FastAPI app will run (e.g., 8000)
public_url = ngrok.connect(8000)
print(f"Public URL: {public_url.public_url}")


Public URL: https://e27c0aed551e.ngrok-free.app


In [16]:
import uvicorn
from pyngrok import ngrok

# Expose port 8000
public_url = ngrok.connect(8000)
print(f"Public FastAPI URL: {public_url}")

# Start FastAPI app in background
!uvicorn main:app --host 0.0.0.0 --port 8000 --reload --workers 1 &


Public FastAPI URL: NgrokTunnel: "https://af4b9d30c8a5.ngrok-free.app" -> "http://localhost:8000"
[32mINFO[0m:     Will watch for changes in these directories: ['/content']
[32mINFO[0m:     Uvicorn running on [1mhttp://0.0.0.0:8000[0m (Press CTRL+C to quit)
[32mINFO[0m:     Started reloader process [[36m[1m5234[0m] using [36m[1mWatchFiles[0m
[31mERROR[0m:    Error loading ASGI app. Could not import module "main".




[32mINFO[0m:     Stopping reloader process [[36m[1m5234[0m]


In [19]:
import requests

url = "https://af4b9d30c8a5.ngrok-free.app"
headers = {"Authorization": "Bearer alice"}  # or "bob" depending on your USER_DB
data = {"query": "What is retrieval-augmented generation?"}

response = requests.post(url, json=data, headers=headers)
print(response.json())


JSONDecodeError: Expecting value: line 1 column 1 (char 0)