In [None]:
# works responses not great - better rag needed
# Install required packages
!pip install langchain huggingface_hub pandas faiss-cpu numpy transformers torch accelerate bitsandbytes gradio

import pandas as pd
import numpy as np
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import gradio as gr

def load_embeddings(csv_path):
    try:
        df = pd.read_csv(csv_path)
        
        # Verify required columns exist
        if 'Cleaned_Ideas' not in df.columns or 'Embeddings' not in df.columns:
            raise ValueError("CSV must contain 'Cleaned_Ideas' and 'Embeddings' columns")
            
        # Convert embeddings with proper handling
        df['Embeddings'] = df['Embeddings'].apply(
            lambda x: np.fromstring(
                x.strip("[]").replace("\n", ""),
                sep=", ",
                dtype=np.float32
            )
        )
        
        # Validate embedding dimensions (RoBERTa-base has 768 dimensions)
        expected_dim = 768
        valid_embeddings = df['Embeddings'].apply(lambda x: len(x) == expected_dim)
        if not valid_embeddings.all():
            invalid_count = len(df) - valid_embeddings.sum()
            raise ValueError(f"{invalid_count} entries have invalid embedding dimensions")
            
        return df['Cleaned_Ideas'].tolist(), np.array(df['Embeddings'].tolist())
        
    except Exception as e:
        print(f"Error loading embeddings: {str(e)}")
        raise

# 1. Load texts and embeddings from CSV
texts, embeddings = load_embeddings("ideas_with_embeddings.csv")

# 2. Create FAISS vector store using a Hugging Face embedding model (using roberta-base)
try:
    embedding_model = HuggingFaceEmbeddings(
        model_name="roberta-base",
        model_kwargs={'device': 'cpu'},
        # Removed "show_progress_bar" to avoid duplicate parameter issues.
        encode_kwargs={'normalize_embeddings': False}
    )
    
    # Create FAISS index; each entry is a tuple (text, embedding)
    vector_store = FAISS.from_embeddings(
        text_embeddings=list(zip(texts, embeddings)),
        embedding=embedding_model,
        normalize_L2=True  # Improves cosine similarity calculations
    )
    print(f"FAISS index created with {vector_store.index.ntotal} entries")
    
except Exception as e:
    print(f"Vector store creation failed: {str(e)}")
    raise

# 3. Load a smaller Hugging Face model in safetensors format (EleutherAI/gpt-neo-125M)
model_name = "EleutherAI/gpt-neo-125M"
try:
    # For a smaller model, we typically use full precision
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="auto",
        trust_remote_code=True
    )
    print("GPT-Neo 125M model loaded successfully.")
    
except Exception as e:
    print(f"Model loading failed: {str(e)}")
    raise

# Create the text-generation pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=256,
    temperature=0.7,
    top_p=0.9,
    repetition_penalty=1.1,
    do_sample=True,
    return_full_text=False,
    pad_token_id=tokenizer.eos_token_id
)

llm = HuggingFacePipeline(pipeline=pipe)

# 4. Define a custom prompt template
template = """### Instruction:
Analyze this philosophical concept using the provided context. 
If unsure, state "I don't have sufficient information."

### Context:
{context}

### Question:
{question}

### Response:
"""

prompt = PromptTemplate(
    template=template,
    input_variables=["context", "question"],
    template_format="f-string"
)

# 5. Set up a Production-grade Retrieval QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vector_store.as_retriever(
        search_type="similarity",
        search_kwargs={"k": 5, "score_threshold": 0.4}
    ),
    return_source_documents=True,
    chain_type_kwargs={
        "prompt": prompt,
        "document_prompt": PromptTemplate(
            input_variables=["page_content"],
            template="{page_content}"
        )
    },
    verbose=True
)

# 6. Function to process queries for Gradio interface
def process_query(query):
    if not query.strip():
        return "Please enter a valid question", ""
    
    try:
        result = qa_chain({"query": query})
        
        # Process the response
        response = result['result'].split("### Response:")[-1].strip()
        
        # Format source information
        sources_info = "\n\n**Sources:**\n"
        for i, doc in enumerate(result['source_documents'][:3], 1):
            excerpt = doc.page_content[:150].replace("\n", " ") + "..."
            score = doc.metadata.get('score', 0)
            sources_info += f"{i}. {excerpt} (Score: {score:.2f})\n\n"
            
        return response, sources_info
    
    except Exception as e:
        return f"Error processing request: {str(e)}", ""

# 7. Create Gradio interface
def create_gradio_interface():
    with gr.Blocks(title="Philosophical Concepts RAG") as demo:
        gr.Markdown("# Philosophical Concepts RAG System")
        gr.Markdown("Ask questions about philosophical concepts and get answers based on the knowledge base.")
        
        with gr.Row():
            with gr.Column(scale=4):
                query_input = gr.Textbox(
                    label="Your Question",
                    placeholder="Enter your philosophical question here...",
                    lines=2
                )
                submit_btn = gr.Button("Submit", variant="primary")
            
        with gr.Row():
            with gr.Column(scale=3):
                response_output = gr.Markdown(label="Response")
            with gr.Column(scale=2):
                sources_output = gr.Markdown(label="Sources")
                
        submit_btn.click(
            fn=process_query,
            inputs=[query_input],
            outputs=[response_output, sources_output]
        )
        
        gr.Markdown("## Examples")
        examples = gr.Examples(
            examples=[
                ["What is the concept of dualism?"],
                ["Explain Kant's categorical imperative."],
                ["How does existentialism view freedom?"]
            ],
            inputs=query_input
        )
        
    return demo

# 8. Run the chat interface with Gradio
if __name__ == "__main__":
    demo = create_gradio_interface()
    demo.launch(share=True)  # share=True creates a public link

Collecting gradio
  Downloading gradio-5.25.2-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting ffmpy (from gradio)
  Using cached ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Using cached gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Using cached groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting huggingface_hub
  Downloading huggingface_hub-0.30.2-py3-none-any.whl.metadata (13 kB)
Collecting pydub (from gradio)
  Using cached pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Using cached python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (from gradio)
  Using cached safehttpx-0.1.6-py3-none-any.whl.metadata (4.2 kB)
Collecting semantic-version~=2.0 (from gradio)
  Using cached sema