In [None]:
#1 Imports, CSV Loading, and Semantic Search Setup

import pandas as pd
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import dspy
from sentence_transformers import SentenceTransformer, util
import numpy as np

# Optimized for M2 Mac (or CPU)
huggingface_model = 'distilgpt2'
csv_path = "./151_ideas_updated.csv"

try:
    data = pd.read_csv(csv_path, on_bad_lines='skip')
    data = data.dropna().reset_index(drop=True)
    if 'Ideas' not in data.columns:
        raise ValueError("Column 'Ideas' not found in CSV.")
except FileNotFoundError:
    print(f"Error: CSV file not found at {csv_path}")
    exit(1)
except ValueError as e:
    print(f"Error loading CSV: {e}")
    exit(1)

# Semantic Search Setup (do this ONCE)
try:
    embedder = SentenceTransformer('all-mpnet-base-v2')
    idea_embeddings = embedder.encode(data['Ideas'].tolist())
except Exception as e:
    print(f"Error setting up embeddings: {e}")
    exit(1)

def get_relevant_context(query, top_k=2):
    # Encode the query using the embedder
    query_embedding = embedder.encode(query)
    
    # Calculate cosine similarities with precomputed idea embeddings
    similarities = util.cos_sim(query_embedding, idea_embeddings)[0]
    
    # Sort indices by descending similarity and fetch the top_k results
    top_indices = np.argsort(-similarities.numpy())[:top_k]  # Negate for descending order
    
    # Retrieve and return the top ideas from the data
    return [data.loc[i, 'Ideas'] for i in top_indices]




In [17]:
#2 Model Loading, DSPy Setup, and Chatbot Signature

try:
    tokenizer = AutoTokenizer.from_pretrained(huggingface_model)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
    model = AutoModelForCausalLM.from_pretrained(
        huggingface_model,
        torch_dtype=torch.float16 if device == "mps" else torch.float32,
        low_cpu_mem_usage=True
    ).to(device)
    text_generator = pipeline(
        'text-generation',
        model=model,
        tokenizer=tokenizer,
        device=device
    )
except Exception as e:
    print(f"Model loading error: {e}")
    import traceback
    traceback.print_exc()
    exit(1)

# Custom Language Model for DSPy
class SimpleLLM(dspy.LM):
    def __init__(self, generator):
        self.generator = generator

    def __call__(self, prompt, max_length=150, num_return_sequences=1, do_sample=True, temperature=0.7):
        try:
            responses = self.generator(
                prompt,
                max_length=max_length,
                num_return_sequences=num_return_sequences,
                do_sample=do_sample,
                temperature=temperature
            )
            return responses[0]['generated_text']
        except Exception as e:
            print(f"Generation error: {e}")
            return "I'm having trouble generating a response."


# Define chatbot signature with CoT prompting
class ChatbotSignature(dspy.Signature):
    """Generate a helpful and concise response to a user's query, using context from the dataset and thinking step by step."""
    query = dspy.InputField(desc="The user's query.")
    context = dspy.InputField(desc="Relevant context from the dataset.")
    response = dspy.OutputField(desc="A helpful and relevant answer, explained step by step.")

# Configure DSPy
dspy.settings.configure(lm=SimpleLLM(text_generator))

# Create prediction module
chatbot = dspy.Predict(ChatbotSignature)

In [18]:
#3 Interactive Chat Loop
def chat():
    print("DSPy Chatbot: Hello! I'm ready to help. Type 'exit' to quit.")
    
    simulated_inputs = iter([
        "Hello, what is DSPy?",  # Example inputs
        "Tell me more about full expression.",
        "exit"
    ])
    
    while True:
        try:
            user_input = next(simulated_inputs, 'exit')
            print(f"You: {user_input}")
            
            if user_input.lower() == 'exit':
                print("DSPy Chatbot: Goodbye!")
                break

            relevant_context = get_relevant_context(user_input)
            context_string = "\n".join(relevant_context)

            # Construct prompt
            enhanced_prompt = f"""Context from dataset:
{context_string}

User query: {user_input}

Let's think step by step to provide a helpful response:"""

            # Generate response
            response = chatbot(query=user_input, context=context_string)
            print("DSPy Chatbot:", response.response)

        except Exception as e:
            print(f"Chat error: {e}")


if __name__ == "__main__":
    chat()


DSPy Chatbot: Hello! I'm ready to help. Type 'exit' to quit.
You: Hello, what is DSPy?
Chat error: 'SimpleLLM' object has no attribute 'kwargs'
You: Tell me more about full expression.
Chat error: 'SimpleLLM' object has no attribute 'kwargs'
You: exit
DSPy Chatbot: Goodbye!
