In [1]:
!pip install streamlit sentence-transformers transformers bert-score chromadb rouge-score beautifulsoup4

Collecting streamlit
  Downloading streamlit-1.47.1-py3-none-any.whl.metadata (9.0 kB)
Collecting bert-score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Collecting chromadb
  Downloading chromadb-1.0.15-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.0 kB)
Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Downloading pybase64-1.4.2-cp311-cp311-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl.metadata (8.7 kB)
Collecting posthog<6.0.0,>=2.

In [2]:
!pip install -q streamlit pyngrok


In [3]:
!pip install -q groq

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/131.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m131.1/131.1 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [11]:
%%writefile rag_module.py
import requests
from bs4 import BeautifulSoup
from sentence_transformers import SentenceTransformer, util
from transformers import pipeline
from sklearn.metrics import f1_score
from rouge_score import rouge_scorer
from bert_score import score as bert_score
import chromadb
import torch
import numpy as np
import uuid

class RAGModule:
    def __init__(self, groq_api_key):
        self.groq_api_key = groq_api_key
        self.embedder = SentenceTransformer("all-MiniLM-L6-v2")
        self.client = chromadb.Client()
        self.collection = self.client.get_or_create_collection("rag_docs")
        self.reranker = pipeline("text-classification", model="cross-encoder/ms-marco-MiniLM-L-6-v2")

    def scrape_urls(self, urls):
        docs = []
        for url in urls:
            try:
                res = requests.get(url, timeout=10)
                soup = BeautifulSoup(res.text, "html.parser")
                text = " ".join([p.get_text() for p in soup.find_all("p")])
                docs.append(text)
            except Exception as e:
                print(f"Error scraping {url}: {e}")
        return docs

    def chunk_and_store(self, docs, chunk_size=500):
        # ✅ Properly clear the collection
        all_ids = self.collection.get()['ids']
        if all_ids:
            self.collection.delete(ids=all_ids)

        for doc in docs:
            for i in range(0, len(doc), chunk_size):
                chunk = doc[i:i + chunk_size]
                emb = self.embedder.encode(chunk)
                self.collection.add(
                    documents=[chunk],
                    embeddings=[emb],
                    ids=[str(uuid.uuid4())]
                )

    def retrieve(self, query, top_k=5):
        query_emb = self.embedder.encode(query)
        results = self.collection.query(query_embeddings=[query_emb], n_results=15)
        chunks = results['documents'][0]

        # Rerank
        reranked = sorted(chunks, key=lambda x: self.reranker(f"{query} [SEP] {x}")[0]['score'], reverse=True)
        return reranked[:top_k]

    def generate_answer(self, query, context_chunks):
        context = "\n".join(context_chunks)
        prompt = f"Context:\n{context}\n\nQuestion:\n{query}\n\nAnswer:"
        response = requests.post(
            "https://api.groq.com/openai/v1/chat/completions",
            headers={"Authorization": f"Bearer {self.groq_api_key}"},
            json={
                "model": "llama3-8b-8192",
                "messages": [{"role": "user", "content": prompt}],
                "temperature": 0.3
            }
        )
        return response.json()["choices"][0]["message"]["content"]

    def evaluate_answer(self, query, answer, context_chunks):
        references = [" ".join(context_chunks)]

        # F1 Score
        ref_tokens = references[0].split()
        ans_tokens = answer.split()
        common = set(ref_tokens) & set(ans_tokens)
        f1 = (2 * len(common)) / (len(ref_tokens) + len(ans_tokens) + 1e-8)

        # ROUGE-L
        rouge = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
        rouge_l = rouge.score(references[0], answer)['rougeL'].fmeasure

        # BERTScore
        P, R, F1 = bert_score([answer], [references[0]], lang="en", verbose=False)
        bert = F1[0].item()

        # Cosine
        emb1 = self.embedder.encode(answer)
        emb2 = self.embedder.encode(references[0])
        cosine = float(util.cos_sim(emb1, emb2)[0][0])

        # LLM-as-a-Judge
        judge_prompt = f"""Question: {query}
Candidate Answer: {answer}
Context: {references[0]}
Does the answer correctly and completely answer the question based on the context? Reply with "Yes" or "No"."""
        judge_resp = requests.post(
            "https://api.groq.com/openai/v1/chat/completions",
            headers={"Authorization": f"Bearer {self.groq_api_key}"},
            json={
                "model": "llama3-8b-8192",
                "messages": [{"role": "user", "content": judge_prompt}],
                "temperature": 0
            }
        )
        llm_verdict = judge_resp.json()["choices"][0]["message"]["content"].strip()

        return {
            "F1 Score": round(f1, 3),
            "ROUGE-L": round(rouge_l, 3),
            "Cosine Similarity": round(cosine, 3),
            "BERTScore": round(bert, 3),
            "LLM-as-a-Judge": llm_verdict
        }


Overwriting rag_module.py


In [7]:
%%writefile app.py
import streamlit as st
from rag_module import RAGModule

st.set_page_config(page_title="Whizzplore – Chat RAG UI", layout="wide")

if "chat_history" not in st.session_state:
    st.session_state.chat_history = []

st.title("📘 Whizzplore: Your AI-Powered RAG Chat")

groq_api_key = st.text_input("🔐 Groq API Key:", type="password")

if groq_api_key:
    rag = RAGModule(groq_api_key)

    with st.sidebar:
        st.header("📚 Ingest Documents")
        urls_input = st.text_area("Paste up to 10 URLs (one per line):", height=200)
        if st.button("Ingest URLs"):
            urls = [u.strip() for u in urls_input.splitlines() if u.strip()][:10]
            docs = rag.scrape_urls(urls)
            rag.chunk_and_store(docs)
            st.success(f"{len(docs)} documents ingested ✅")

    st.sidebar.markdown("---")
    st.sidebar.header("⚙️ Chat Controls")
    clear_history = st.sidebar.button("🗑️ Clear Chat History")
    history_filter = st.sidebar.text_input("Filter previous Q/A by keyword")

    if clear_history:
        st.session_state.chat_history = []

    question = st.text_input("❓ Ask a question based on ingested content:")
    if st.button("Generate Answer") and question.strip():
        chunks = rag.retrieve(question)
        answer = rag.generate_answer(question, chunks)
        metrics = rag.evaluate_answer(question, answer, chunks)

        st.session_state.chat_history.append({
            "question": question,
            "answer": answer,
            "metrics": metrics
        })

    st.markdown("---")
    st.header("💬 Chat History")
    # Apply filter if any
    history = st.session_state.chat_history
    if history_filter.strip():
        history = [
            entry for entry in history
            if history_filter.lower() in entry["question"].lower()
            or history_filter.lower() in entry["answer"].lower()
        ]

    if not history:
        st.info("No past conversations found.")
    else:
        for entry in reversed(history):
            st.markdown(f"**Q:** {entry['question']}")
            st.markdown(f"**A:** {entry['answer']}")
            with st.expander("View evaluation metrics"):
                st.write(entry["metrics"])

else:
    st.info("Please enter your Groq API Key to begin.")


Writing app.py


In [12]:

from pyngrok import ngrok, conf
import time
import os

# Set authtoken
conf.get_default().auth_token = "30oXm17s9UmJtmQc7zhaRgWEHhk_6W4GkNki4mjHdhVHmdGjH"

# Kill any previous tunnel if running
ngrok.kill()

# Start Streamlit app
!streamlit run app.py &> /dev/null &

# Wait for the server to start
time.sleep(3)

# Start new tunnel
public_url = ngrok.connect(8501)
print(f"✅ Whizzplore is live at: {public_url}")



✅ Whizzplore is live at: NgrokTunnel: "https://ad83d325094d.ngrok-free.app" -> "http://localhost:8501"
