<a href="https://colab.research.google.com/github/joashlsk/mm-rag-test/blob/main/main_notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import streamlit as st
from pinecone import Pinecone
from groq import Groq
from fastembed import TextEmbedding
import time

# --- CONFIGURATION (Secrets) ---
# In Streamlit Cloud, you will set these in the "Secrets" menu, not hardcode them.
# For local testing, you can uncomment below (but don't commit to GitHub!):
# os.environ["GROQ_API_KEY"] = "your-groq-key"
# os.environ["PINECONE_API_KEY"] = "your-pinecone-key"

# --- 1. SETUP UI ---
st.set_page_config(page_title="Project Qwen RAG", layout="wide")
st.title("ü§ñ Qwen 2.5-32B Enterprise RAG")
st.markdown("Running on **Groq** (LLM) + **Pinecone** (Vector DB) + **Streamlit** (UI)")

# --- 2. INITIALIZE CLIENTS ---
# Initialize Groq (The Brain)
try:
    groq_client = Groq(api_key=st.secrets["GROQ_API_KEY"])
except:
    st.error("üîë Groq API Key missing! Set it in Streamlit Secrets.")

# Initialize Pinecone (The Memory)
try:
    pc = Pinecone(api_key=st.secrets["PINECONE_API_KEY"])
    index_name = "rag-index"

    # Check if index exists, connect to it
    if index_name not in pc.list_indexes().names():
        st.warning(f"Index '{index_name}' not found. Please create it in Pinecone Console (Dim: 384).")
    index = pc.Index(index_name)
except:
    st.error("üîë Pinecone API Key missing! Set it in Streamlit Secrets.")

# Initialize Embeddings (The Translator)
# We use FastEmbed because it runs on CPU (Free) and is faster than downloading big models
@st.cache_resource
def get_embedding_model():
    return TextEmbedding(model_name="BAAI/bge-small-en-v1.5")

embed_model = get_embedding_model()

# --- 3. SIDEBAR: DATA INGESTION ---
with st.sidebar:
    st.header("Upload Knowledge")
    uploaded_file = st.file_uploader("Upload a .txt file", type="txt")

    if uploaded_file and st.button("Ingest Data"):
        with st.spinner("Embedding & Indexing..."):
            # 1. Read Text
            text = uploaded_file.read().decode("utf-8")
            chunks = [text[i:i+500] for i in range(0, len(text), 500)] # Simple chunking

            # 2. Embed Text (Turn into numbers)
            embeddings = list(embed_model.embed(chunks))

            # 3. Upload to Pinecone
            vectors = []
            for i, (chunk, emb) in enumerate(zip(chunks, embeddings)):
                vectors.append({
                    "id": f"chunk_{i}_{int(time.time())}",
                    "values": emb.tolist(),
                    "metadata": {"text": chunk}
                })

            index.upsert(vectors=vectors)
            st.success(f"‚úÖ Indexed {len(chunks)} chunks!")

# --- 4. MAIN CHAT INTERFACE ---
if "messages" not in st.session_state:
    st.session_state.messages = []

# Display history
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

# Handle User Input
if prompt := st.chat_input("Ask about your uploaded data..."):
    # 1. Show User Message
    st.session_state.messages.append({"role": "user", "content": prompt})
    with st.chat_message("user"):
        st.markdown(prompt)

    # 2. RETRIEVAL (The "R" in RAG)
    with st.spinner("Thinking..."):
        # Embed the query
        query_embedding = list(embed_model.embed([prompt]))[0].tolist()

        # Search Pinecone
        search_results = index.query(
            vector=query_embedding,
            top_k=3,
            include_metadata=True
        )

        # Combine Context
        context_text = "\n\n".join([match['metadata']['text'] for match in search_results['matches']])

        # 3. GENERATION (The "G" in RAG) using Qwen 2.5
        messages = [
            {"role": "system", "content": "You are a helpful assistant. Use the Context below to answer the user."},
            {"role": "user", "content": f"Context: {context_text}\n\nQuestion: {prompt}"}
        ]

        chat_completion = groq_client.chat.completions.create(
            messages=messages,
            model="qwen-2.5-32b", # <--- Using your requested model
            temperature=0.5,
            max_tokens=1024,
        )

        response = chat_completion.choices[0].message.content

    # 4. Show AI Response
    with st.chat_message("assistant"):
        st.markdown(response)
    st.session_state.messages.append({"role": "assistant", "content": response})

[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m4.5/4.5 MB[0m [31m36.0 MB/s[0m eta [36m0:00:00[0m
[?25h‚úÖ Libraries Installed.
