This has been tested in colab environment

In [None]:
import os
os.environ["OPENAI_API_KEY"] = "enter-api-key-here"

In [None]:

# ======== Install dependencies ========
!pip install -q sentence-transformers faiss-cpu torch datasets evaluate rouge-score openai gradio

import os
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from openai import OpenAI
import gradio as gr

print("=== Step 1: Dependencies imported ===")

# ======== OpenAI Setup ========
openai_api_key = os.environ.get("OPENAI_API_KEY", "your-api-key-here")
client = OpenAI(api_key=openai_api_key)
print("=== Step 2: OpenAI client configured ===")

# ======== Sample Training Documents ========
print("=== Step 3: Loading training documents ===")
enhanced_sample_texts = {
    "space_missions.txt": """
    The Apollo 11 mission launched on July 16, 1969, and landed the first humans on the Moon on July 20, 1969.
    The crew consisted of exactly three astronauts: Neil Armstrong (Commander), Buzz Aldrin (Lunar Module Pilot),
    and Michael Collins (Command Module Pilot). Neil Armstrong was the first person to walk on the Moon,
    followed by Buzz Aldrin. Michael Collins remained in lunar orbit aboard the command module Columbia.
    The mission lasted 8 days, 3 hours, 18 minutes, and 35 seconds. There was no fourth crew member on Apollo 11.
    """,
    "landmarks_architecture.txt": """
    The Eiffel Tower is a wrought-iron lattice tower located on the Champ de Mars in Paris, France.
    Construction began in 1887 and was completed in 1889 for the 1889 World's Fair.
    """,
    "programming_technologies.txt": """
    Python was created by Guido van Rossum and first released in 1991.
    It emphasizes code readability with its notable use of significant whitespace.
    """,
    "science_discoveries.txt": """
    Penicillin was discovered by Alexander Fleming in 1928 when he noticed that a mold had killed bacteria in his lab.
    """,
    "historical_events.txt": """
    World War II lasted from 1939 to 1945 and involved most of the world's nations.
    The war ended with the surrender of Germany on May 8, 1945 (Victory in Europe Day)
    and Japan on August 15, 1945, following the atomic bombings of Hiroshima and Nagasaki.
    """
}
print(f"=== Step 3 Complete: Loaded {len(enhanced_sample_texts)} documents ===")

# ======== Prepare Corpus and FAISS Index ========
print("=== Step 4: Loading SentenceTransformer embedding model ===")
embedder = SentenceTransformer("all-MiniLM-L6-v2")
print("Embedding model loaded.")

corpus, sources = [], []
print("=== Step 5: Preparing corpus for FAISS index ===")
for src, text in enhanced_sample_texts.items():
    for line in text.strip().split("\n"):
        line = line.strip()
        if line:
            corpus.append(line)
            sources.append(src)
print(f"Corpus prepared with {len(corpus)} lines.")

print("=== Step 6: Generating embeddings for corpus ===")
embeddings = embedder.encode(corpus, convert_to_numpy=True)
print("Embeddings generated.")

print("=== Step 7: Creating FAISS index ===")
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)
print("FAISS index created and embeddings added.")

# ======== RAG Retrieval + OpenAI Chat Function ========
VERBOSE = True
chat_history = []

def chat_fn(user_query, chat_history_state=None
            ):
    global chat_history


    # Combine previous queries/answers into context
    history_text = ""
    for q, a in chat_history[-3:]:
        history_text += f"Previous Q: {q}\nPrevious A: {a}\n"

    # FAISS retrieval
    # Create retrieval query using last 3 turns + current query
    retrieval_query = " ".join([f"{q} {a}" for q, a in chat_history[-3:]] + [user_query])
    q_emb = embedder.encode([retrieval_query], convert_to_numpy=True)
    D, I = index.search(q_emb, k=5)
    retrieved_chunks = [(corpus[i], sources[i], D[0][j]) for j, i in enumerate(I[0])]
    context_text = "\n".join([f"[{src}] {chunk}" for chunk, src, _ in retrieved_chunks])



    if VERBOSE:
        print(f"\n=== Processing Query: {user_query} ===")
        print("Retrieved Chunks (with L2 distance scores):")
        for chunk, src, score in retrieved_chunks:
            print(f"[{src}] Score: {score:.4f} | {chunk}")
        import sys; sys.stdout.flush()

    # LLM prompt
    messages = [
        {"role": "system", "content": "You are a helpful assistant. Only answer based on the provided context. If the context does not contain the answer, say 'I don't know.'"},
        {"role": "user", "content": f"{history_text}\nCurrent Query: {user_query}\nContext:\n{context_text}"}
    ]
    response = client.chat.completions.create(
        model="gpt-4",
        messages=messages,
        temperature=0,
        max_tokens=300
    )

    answer = response.choices[0].message.content.strip()

    # Update global chat history
    chat_history.append((user_query, answer))

    return answer


# ======== Launch Gradio Interface ========
print("=== Step 8: Creating Gradio ChatInterface ===")
model_name = "gpt-4"
chat_interface = gr.ChatInterface(
    fn=chat_fn,
    title=f"Multi-Turn RAG Demo ({model_name})",
    description="Ask questions about space missions, landmarks, programming, science, or historical events. The system will show retrieved chunks and provide answers.",
    examples=[
        "Who were the astronauts on Apollo 11?",
        "When was the Eiffel Tower built?",
        "Who created Python?",
        "When did World War II end?"
    ],
    theme="soft",
    css="""
    .gradio-chatbox {
        min-height: 500px;  /* increase height as desired */
    }
  """

)
print("✓ Gradio interface created")

print("\n=== Step 9: Launching Gradio app ===")
if __name__ == "__main__":
    print("🚀 Starting Gradio server...")
    chat_interface.launch(share=True, debug=True)