In [None]:
# old v - pop up 
# works responses not great - better rag needed
# Install required packages
!pip install langchain huggingface_hub pandas faiss-cpu numpy transformers torch accelerate bitsandbytes

import pandas as pd
import numpy as np
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch

def load_embeddings(csv_path):
    try:
        df = pd.read_csv(csv_path)
        
        # Verify required columns exist
        if 'Cleaned_Ideas' not in df.columns or 'Embeddings' not in df.columns:
            raise ValueError("CSV must contain 'Cleaned_Ideas' and 'Embeddings' columns")
            
        # Convert embeddings with proper handling
        df['Embeddings'] = df['Embeddings'].apply(
            lambda x: np.fromstring(
                x.strip("[]").replace("\n", ""),
                sep=", ",
                dtype=np.float32
            )
        )
        
        # Validate embedding dimensions (RoBERTa-base has 768 dimensions)
        expected_dim = 768
        valid_embeddings = df['Embeddings'].apply(lambda x: len(x) == expected_dim)
        if not valid_embeddings.all():
            invalid_count = len(df) - valid_embeddings.sum()
            raise ValueError(f"{invalid_count} entries have invalid embedding dimensions")
            
        return df['Cleaned_Ideas'].tolist(), np.array(df['Embeddings'].tolist())
        
    except Exception as e:
        print(f"Error loading embeddings: {str(e)}")
        raise

# 1. Load texts and embeddings from CSV
texts, embeddings = load_embeddings("ideas_with_embeddings.csv")

# 2. Create FAISS vector store using a Hugging Face embedding model (using roberta-base)
try:
    embedding_model = HuggingFaceEmbeddings(
        model_name="roberta-base",
        model_kwargs={'device': 'cpu'},
        # Removed "show_progress_bar" to avoid duplicate parameter issues.
        encode_kwargs={'normalize_embeddings': False}
    )
    
    # Create FAISS index; each entry is a tuple (text, embedding)
    vector_store = FAISS.from_embeddings(
        text_embeddings=list(zip(texts, embeddings)),
        embedding=embedding_model,
        normalize_L2=True  # Improves cosine similarity calculations
    )
    print(f"FAISS index created with {vector_store.index.ntotal} entries")
    
except Exception as e:
    print(f"Vector store creation failed: {str(e)}")
    raise

# 3. Load a smaller Hugging Face model in safetensors format (EleutherAI/gpt-neo-125M)
model_name = "EleutherAI/gpt-neo-125M"
try:
    # For a smaller model, we typically use full precision
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="auto",
        trust_remote_code=True
    )
    print("GPT-Neo 125M model loaded successfully.")
    
except Exception as e:
    print(f"Model loading failed: {str(e)}")
    raise

# Create the text-generation pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=256,
    temperature=0.7,
    top_p=0.9,
    repetition_penalty=1.1,
    do_sample=True,
    return_full_text=False,
    pad_token_id=tokenizer.eos_token_id
)

llm = HuggingFacePipeline(pipeline=pipe)

# 4. Define a custom prompt template
template = """### Instruction:
Analyze this philosophical concept using the provided context. 
If unsure, state "I don't have sufficient information."

### Context:
{context}

### Question:
{question}

### Response:
"""

prompt = PromptTemplate(
    template=template,
    input_variables=["context", "question"],
    template_format="f-string"
)

# 5. Set up a Production-grade Retrieval QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vector_store.as_retriever(
        search_type="similarity",
        search_kwargs={"k": 5, "score_threshold": 0.4}
    ),
    return_source_documents=True,
    chain_type_kwargs={
        "prompt": prompt,
        "document_prompt": PromptTemplate(
            input_variables=["page_content"],
            template="{page_content}"
        )
    },
    verbose=True
)

# 6. Enhanced chat interface for interactive querying
def run_chat():
    print("Chatbot initialized. Type 'exit' to quit.")
    while True:
        try:
            query = input("\nUser: ").strip()
            if query.lower() in ["exit", "quit"]:
                break
                
            if not query:
                print("Please enter a valid question")
                continue
                
            result = qa_chain({"query": query})
            
            # Process the response
            response = result['result'].split("### Assistant Response:")[-1].strip()
            print(f"\nAssistant: {response}")
            
            # Display top source excerpts
            print("\nTop Sources:")
            for i, doc in enumerate(result['source_documents'][:3], 1):
                excerpt = doc.page_content[:150].replace("\n", " ") + "..."
                score = doc.metadata.get('score', 0)
                print(f"{i}. {excerpt} (Score: {score:.2f})")
                
        except KeyboardInterrupt:
            print("\nExiting chat.")
            break
        except Exception as e:
            print(f"Error processing request: {str(e)}")

if __name__ == "__main__":
    run_chat()
