In [None]:
#!/usr/bin/env python3
"""
Text Embedding Generator

This script generates and combines embeddings from multiple models (Ollama API and SentenceTransformer)
for text data, optimized for modern Python environments in 2025.

Features:
- Asynchronous and parallel processing
- GPU acceleration
- Robust error handling and logging
- Configurable parameters via environment variables or config files
- Progress tracking
"""

import os
import pickle
import time
import logging
import asyncio
import json
from dataclasses import dataclass, field
from functools import partial
from pathlib import Path
from typing import Dict, List, Optional, Tuple, Union, Any

import httpx
import numpy as np
import torch
from sentence_transformers import SentenceTransformer
from tqdm.asyncio import tqdm_asyncio
import yaml
from contextlib import asynccontextmanager

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    handlers=[
        logging.FileHandler("embedding_generator.log"),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger("embedding_generator")

@dataclass
class EmbeddingConfig:
    """Configuration for embedding generation process."""
    input_file: Path
    output_file: Path
    ollama_model: str = "distilroberta"
    ollama_url: str = "http://localhost:11434/api/embeddings"
    sentence_transformer_model: str = "all-MiniLM-L6-v2"
    batch_size: int = 32
    max_retries: int = 3
    retry_delay: float = 1.0
    use_gpu: bool = True
    num_workers: int = 4
    timeout: int = 60  # HTTP request timeout in seconds
    cache_dir: Optional[Path] = None
    save_interval: int = 100  # Save progress every N items
    
    @classmethod
    def from_yaml(cls, config_path: Union[str, Path]) -> "EmbeddingConfig":
        """Load configuration from YAML file."""
        config_path = Path(config_path)
        if not config_path.exists():
            raise FileNotFoundError(f"Config file not found: {config_path}")
        
        with open(config_path, "r") as f:
            config_data = yaml.safe_load(f)
        
        # Convert string paths to Path objects
        for path_field in ["input_file", "output_file", "cache_dir"]:
            if path_field in config_data and config_data[path_field]:
                config_data[path_field] = Path(config_data[path_field])
        
        return cls(**config_data)

    @classmethod
    def from_env(cls) -> "EmbeddingConfig":
        """Load configuration from environment variables."""
        return cls(
            input_file=Path(os.getenv("EMBEDDING_INPUT_FILE", "input.pkl")),
            output_file=Path(os.getenv("EMBEDDING_OUTPUT_FILE", "embeddings.pkl")),
            ollama_model=os.getenv("EMBEDDING_OLLAMA_MODEL", "distilroberta"),
            ollama_url=os.getenv("EMBEDDING_OLLAMA_URL", "http://localhost:11434/api/embeddings"),
            sentence_transformer_model=os.getenv("EMBEDDING_SENTENCE_TRANSFORMER_MODEL", "all-MiniLM-L6-v2"),
            batch_size=int(os.getenv("EMBEDDING_BATCH_SIZE", "32")),
            max_retries=int(os.getenv("EMBEDDING_MAX_RETRIES", "3")),
            retry_delay=float(os.getenv("EMBEDDING_RETRY_DELAY", "1.0")),
            use_gpu=os.getenv("EMBEDDING_USE_GPU", "true").lower() in ("true", "1", "yes"),
            num_workers=int(os.getenv("EMBEDDING_NUM_WORKERS", "4")),
            timeout=int(os.getenv("EMBEDDING_HTTP_TIMEOUT", "60")),
            cache_dir=Path(os.getenv("EMBEDDING_CACHE_DIR")) if os.getenv("EMBEDDING_CACHE_DIR") else None,
            save_interval=int(os.getenv("EMBEDDING_SAVE_INTERVAL", "100")),
        )


class EmbeddingCache:
    """Cache for storing and retrieving embeddings."""
    
    def __init__(self, cache_dir: Optional[Path] = None):
        self.cache_dir = cache_dir
        self.in_memory_cache = {}
        
        if cache_dir:
            cache_dir.mkdir(parents=True, exist_ok=True)
            self.ollama_cache_path = cache_dir / "ollama_cache.pkl"
            self.st_cache_path = cache_dir / "sentence_transformer_cache.pkl"
            
            # Load existing caches if they exist
            self.ollama_cache = self._load_cache(self.ollama_cache_path)
            self.st_cache = self._load_cache(self.st_cache_path)
        else:
            self.ollama_cache = {}
            self.st_cache = {}
    
    def _load_cache(self, path: Path) -> Dict[str, np.ndarray]:
        """Load cache from disk if it exists."""
        if path.exists():
            try:
                with open(path, "rb") as f:
                    return pickle.load(f)
            except Exception as e:
                logger.warning(f"Failed to load cache from {path}: {e}")
        return {}
    
    def save_cache(self) -> None:
        """Save caches to disk."""
        if not self.cache_dir:
            return
        
        try:
            with open(self.ollama_cache_path, "wb") as f:
                pickle.dump(self.ollama_cache, f)
            
            with open(self.st_cache_path, "wb") as f:
                pickle.dump(self.st_cache, f)
                
            logger.info(f"Caches saved to {self.cache_dir}")
        except Exception as e:
            logger.error(f"Failed to save caches: {e}")
    
    def get_ollama_embedding(self, text: str) -> Optional[np.ndarray]:
        """Get Ollama embedding from cache."""
        return self.ollama_cache.get(text)
    
    def set_ollama_embedding(self, text: str, embedding: np.ndarray) -> None:
        """Set Ollama embedding in cache."""
        self.ollama_cache[text] = embedding
    
    def get_st_embedding(self, text: str) -> Optional[np.ndarray]:
        """Get SentenceTransformer embedding from cache."""
        return self.st_cache.get(text)
    
    def set_st_embedding(self, text: str, embedding: np.ndarray) -> None:
        """Set SentenceTransformer embedding in cache."""
        self.st_cache[text] = embedding


class EmbeddingGenerator:
    """Generates and combines embeddings from multiple models."""
    
    def __init__(self, config: EmbeddingConfig):
        self.config = config
        self.cache = EmbeddingCache(config.cache_dir)
        
        # Initialize SentenceTransformer
        device = "cuda" if config.use_gpu and torch.cuda.is_available() else "cpu"
        logger.info(f"Using device: {device} for SentenceTransformer")
        self.st_model = SentenceTransformer(config.sentence_transformer_model, device=device)
        
        # For batch processing with sentence-transformers
        self.st_model.max_seq_length = 512  # Adjust as needed
    
    @asynccontextmanager
    async def get_client(self):
        """Context manager for httpx client."""
        async with httpx.AsyncClient(timeout=self.config.timeout) as client:
            yield client
    
    async def generate_ollama_embedding(self, text: str, client: httpx.AsyncClient) -> np.ndarray:
        """Generate embedding using Ollama API."""
        cached = self.cache.get_ollama_embedding(text)
        if cached is not None:
            return cached
        
        for attempt in range(self.config.max_retries):
            try:
                response = await client.post(
                    self.config.ollama_url,
                    json={"model": self.config.ollama_model, "prompt": text}
                )
                response.raise_for_status()
                data = response.json()
                embedding = np.array(data.get("embedding", []))
                
                if embedding.size == 0:
                    raise ValueError("Empty embedding received from Ollama API")
                
                self.cache.set_ollama_embedding(text, embedding)
                return embedding
            
            except Exception as e:
                if attempt < self.config.max_retries - 1:
                    delay = self.config.retry_delay * (2 ** attempt)  # Exponential backoff
                    logger.warning(f"Ollama API error (attempt {attempt+1}/{self.config.max_retries}): {e}. Retrying in {delay:.2f}s")
                    await asyncio.sleep(delay)
                else:
                    logger.error(f"Failed to generate Ollama embedding after {self.config.max_retries} attempts: {e}")
                    # Return zero vector of appropriate size on failure
                    # Using a size of 768 as a default for many transformer-based models
                    return np.zeros(768)
    
    def generate_sentence_transformer_embedding(self, text: str) -> np.ndarray:
        """Generate embedding using SentenceTransformer."""
        cached = self.cache.get_st_embedding(text)
        if cached is not None:
            return cached
        
        try:
            # SentenceTransformer automatically handles GPU acceleration if available
            embedding = self.st_model.encode(text, convert_to_numpy=True)
            self.cache.set_st_embedding(text, embedding)
            return embedding
        except Exception as e:
            logger.error(f"Failed to generate SentenceTransformer embedding: {e}")
            # Return zero vector of appropriate size
            return np.zeros(self.st_model.get_sentence_embedding_dimension())
    
    async def generate_combined_embedding(self, text: str, client: httpx.AsyncClient) -> np.ndarray:
        """Generate combined embedding from both models."""
        ollama_embedding = await self.generate_ollama_embedding(text, client)
        st_embedding = self.generate_sentence_transformer_embedding(text)
        
        # Combine embeddings (simple concatenation, can be extended with other methods)
        return np.concatenate([ollama_embedding, st_embedding])
    
    async def process_batch(self, texts: List[str], client: httpx.AsyncClient) -> List[np.ndarray]:
        """Process a batch of texts in parallel."""
        tasks = []
        for text in texts:
            task = self.generate_combined_embedding(text, client)
            tasks.append(task)
        
        return await asyncio.gather(*tasks)
    
    async def process_data(self, data: Dict[str, str]) -> Dict[str, np.ndarray]:
        """Process all text data and generate embeddings."""
        result = {}
        keys = list(data.keys())
        total_batches = (len(keys) + self.config.batch_size - 1) // self.config.batch_size
        
        async with self.get_client() as client:
            for i in range(0, len(keys), self.config.batch_size):
                batch_keys = keys[i:i + self.config.batch_size]
                batch_texts = [data[k] for k in batch_keys]
                
                batch_result = await self.process_batch(batch_texts, client)
                
                for key, embedding in zip(batch_keys, batch_result):
                    result[key] = embedding
                
                # Save progress at intervals
                if i > 0 and i % self.config.save_interval == 0:
                    self._save_checkpoint(result)
                    self.cache.save_cache()
        
        return result
    
    def _save_checkpoint(self, current_results: Dict[str, np.ndarray]) -> None:
        """Save current progress to a checkpoint file."""
        checkpoint_path = self.config.output_file.with_suffix(".checkpoint.pkl")
        try:
            with open(checkpoint_path, "wb") as f:
                pickle.dump(current_results, f)
            logger.info(f"Checkpoint saved: {len(current_results)} items processed")
        except Exception as e:
            logger.error(f"Failed to save checkpoint: {e}")


async def main():
    """Main function to run the embedding generator."""
    # Load configuration (prioritize config file, fall back to env vars)
    config_path = Path("embedding_config.yaml")
    if config_path.exists():
        config = EmbeddingConfig.from_yaml(config_path)
        logger.info(f"Loaded configuration from {config_path}")
    else:
        config = EmbeddingConfig.from_env()
        logger.info("Loaded configuration from environment variables")
    
    logger.info(f"Input file: {config.input_file}")
    logger.info(f"Output file: {config.output_file}")
    
    # Check if input file exists
    if not config.input_file.exists():
        logger.error(f"Input file not found: {config.input_file}")
        return 1
    
    # Load input data
    try:
        with open(config.input_file, "rb") as f:
            data = pickle.load(f)
        logger.info(f"Loaded {len(data)} items from {config.input_file}")
    except Exception as e:
        logger.error(f"Failed to load input file: {e}")
        return 1
    
    # Check for checkpoint
    checkpoint_path = config.output_file.with_suffix(".checkpoint.pkl")
    result = {}
    if checkpoint_path.exists():
        try:
            with open(checkpoint_path, "rb") as f:
                result = pickle.load(f)
            logger.info(f"Resuming from checkpoint with {len(result)} items already processed")
            
            # Filter data to process only remaining items
            data = {k: v for k, v in data.items() if k not in result}
            logger.info(f"{len(data)} items remaining to process")
        except Exception as e:
            logger.warning(f"Failed to load checkpoint, starting from scratch: {e}")
    
    # Initialize and run the embedding generator
    start_time = time.time()
    generator = EmbeddingGenerator(config)
    
    if not data:
        logger.info("No new data to process")
    else:
        logger.info(f"Starting embedding generation for {len(data)} items")
        
        # Process data
        new_results = await generator.process_data(data)
        result.update(new_results)
        
        # Save cache for future runs
        generator.cache.save_cache()
    
    # Save final results
    try:
        with open(config.output_file, "wb") as f:
            pickle.dump(result, f)
        
        # Remove checkpoint file if exists
        if checkpoint_path.exists():
            checkpoint_path.unlink()
        
        elapsed_time = time.time() - start_time
        logger.info(f"Completed successfully. Processed {len(result)} items in {elapsed_time:.2f} seconds")
        logger.info(f"Results saved to {config.output_file}")
    except Exception as e:
        logger.error(f"Failed to save results: {e}")
        return 1
    
    return 0


if __name__ == "__main__":
    try:
        exit_code = asyncio.run(main())
        exit(exit_code)
    except KeyboardInterrupt:
        logger.info("Process interrupted by user")
        exit(130)
    except Exception as e:
        logger.critical(f"Unhandled exception: {e}", exc_info=True)
        exit(1)

In [1]:
#!/usr/bin/env python3
"""
CSV Embedding Generator

This script generates embeddings for text data from a CSV file using HuggingFace models,
optimized for modern Python environments.

Features:
- Batch processing
- GPU acceleration
- Robust error handling and logging
- Configurable parameters via environment variables or config files
- Progress tracking and checkpointing
"""

import os
import logging
import asyncio
from dataclasses import dataclass, field
from pathlib import Path
from typing import List, Optional, Union

import pandas as pd
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from tqdm import tqdm
import yaml

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    handlers=[
        logging.FileHandler("embedding_generator.log"),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger("csv_embedding_generator")

@dataclass
class EmbeddingConfig:
    """Configuration for embedding generation process."""
    input_file: Path
    output_file: Path
    huggingface_model: str = "sentence-transformers/all-MiniLM-L6-v2"
    batch_size: int = 32
    use_gpu: bool = True
    save_interval: int = 100  # Save progress every N batches
    text_column: str = "articles"
    
    @classmethod
    def from_yaml(cls, config_path: Union[str, Path]) -> "EmbeddingConfig":
        """Load configuration from YAML file."""
        config_path = Path(config_path)
        if not config_path.exists():
            raise FileNotFoundError(f"Config file not found: {config_path}")
        
        with open(config_path, "r") as f:
            config_data = yaml.safe_load(f)
        
        # Convert string paths to Path objects
        for path_field in ["input_file", "output_file"]:
            if config_data.get(path_field):
                config_data[path_field] = Path(config_data[path_field])
        
        return cls(**config_data)

    @classmethod
    def from_env(cls) -> "EmbeddingConfig":
        """Load configuration from environment variables."""
        return cls(
            input_file=Path(os.getenv("EMBEDDING_INPUT_FILE", "articles.csv")),
            output_file=Path(os.getenv("EMBEDDING_OUTPUT_FILE", "embeddings.csv")),
            huggingface_model=os.getenv("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2"),
            batch_size=int(os.getenv("EMBEDDING_BATCH_SIZE", "32")),
            use_gpu=os.getenv("EMBEDDING_USE_GPU", "true").lower() in ("true", "1", "yes"),
            save_interval=int(os.getenv("EMBEDDING_SAVE_INTERVAL", "100")),
            text_column=os.getenv("EMBEDDING_TEXT_COLUMN", "articles"),
        )


class EmbeddingGenerator:
    """Generates embeddings using HuggingFace models."""
    
    def __init__(self, config: EmbeddingConfig):
        self.config = config
        self.model = HuggingFaceEmbeddings(model_name=config.huggingface_model)
        
        if config.use_gpu:
            logger.info("Using GPU acceleration")
            self.model.client = self.model.client.to("cuda")

    def generate_batch(self, texts: List[str]) -> List[List[float]]:
        """Generate embeddings for a batch of texts."""
        return self.model.embed_documents(texts)


async def main():
    """Main function to run the embedding generator."""
    # Load configuration
    config_path = Path("embedding_config.yaml")
    if config_path.exists():
        config = EmbeddingConfig.from_yaml(config_path)
        logger.info(f"Loaded configuration from {config_path}")
    else:
        config = EmbeddingConfig.from_env()
        logger.info("Loaded configuration from environment variables")
    
    logger.info(f"Input file: {config.input_file}")
    logger.info(f"Output file: {config.output_file}")
    
    # Check if input file exists
    if not config.input_file.exists():
        logger.error(f"Input file not found: {config.input_file}")
        return 1

    # Load CSV data
    try:
        df = pd.read_csv("151_ideas_updated2.csv")
        logger.info(f"Loaded {len(df)} rows from CSV")
        
        if config.text_column not in df.columns:
            raise ValueError(f"CSV file missing required column: {config.text_column}")
            
        texts = df[config.text_column].tolist()
    except Exception as e:
        logger.error(f"Failed to load input data: {e}")
        return 1

    # Initialize embedding generator
    generator = EmbeddingGenerator(config)
    
    # Checkpoint setup
    checkpoint_path = config.output_file.with_suffix(".checkpoint.csv")
    start_idx = 0
    
    # Resume from checkpoint if available
    if checkpoint_path.exists():
        try:
            checkpoint_df = pd.read_csv(checkpoint_path)
            start_idx = len(checkpoint_df)
            df.iloc[:start_idx] = checkpoint_df
            logger.info(f"Resuming from checkpoint at row {start_idx}")
        except Exception as e:
            logger.warning(f"Failed to load checkpoint: {e}")

    # Process batches with progress bar
    try:
        with tqdm(total=len(df), initial=start_idx, desc="Generating embeddings") as pbar:
            for batch_start in range(start_idx, len(df), config.batch_size):
                batch_end = min(batch_start + config.batch_size, len(df))
                batch_texts = texts[batch_start:batch_end]
                
                # Generate embeddings
                embeddings = generator.generate_batch(batch_texts)
                
                # Update DataFrame
                df.loc[batch_start:batch_end-1, "embeddings"] = embeddings
                
                # Update progress
                pbar.update(len(batch_texts))
                
                # Save checkpoint
                if (batch_end // config.batch_size) % config.save_interval == 0:
                    df.to_csv(checkpoint_path, index=False)
                    logger.info(f"Saved checkpoint at row {batch_end}")

        # Save final results
        df.to_csv(config.output_file, index=False)
        if checkpoint_path.exists():
            checkpoint_path.unlink()
        logger.info(f"Successfully processed {len(df)} rows")
        
    except Exception as e:
        logger.error(f"Processing failed: {e}")
        logger.info(f"Saving final checkpoint to {checkpoint_path}")
        df.to_csv(checkpoint_path, index=False)
        return 1

    return 0


if __name__ == "__main__":
    try:
        exit_code = asyncio.run(main())
        exit(exit_code)
    except KeyboardInterrupt:
        logger.info("Process interrupted by user")
        exit(130)
    except Exception as e:
        logger.critical(f"Unhandled exception: {e}", exc_info=True)
        exit(1)

2025-03-02 14:27:10,643 - csv_embedding_generator - CRITICAL - Unhandled exception: asyncio.run() cannot be called from a running event loop
Traceback (most recent call last):
  File "/var/folders/49/6ydqkbq172ngzt6p49xfm6b00000gn/T/ipykernel_2495/2335792938.py", line 184, in <module>
    exit_code = asyncio.run(main())
                ^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/Caskroom/miniforge/base/envs/nunu24/lib/python3.12/asyncio/runners.py", line 190, in run
    raise RuntimeError(
RuntimeError: asyncio.run() cannot be called from a running event loop
  exit(1)


In [2]:
import pandas as pd
import re
from transformers import AutoTokenizer, AutoModel
import torch
import numpy as np

# Step 1: Load data from CSV
df = pd.read_csv('151_ideas_updated2.csv')

# Step 2: Text preprocessing
def preprocess_text(text):
    if not isinstance(text, str):
        return ""
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    text = re.sub(r'\s+', ' ', text)     # Remove extra whitespace
    return text.strip()

# Apply preprocessing
df['Cleaned_Ideas'] = df['Ideas'].apply(preprocess_text)

# Step 3: Initialize RoBERTa model
model_name = 'roberta-base'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

# Step 4: Embedding generation function
def get_roberta_embeddings(texts, batch_size=32):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    
    embeddings = []
    for i in range(0, len(texts), batch_size):
        batch = texts[i:i+batch_size]
        inputs = tokenizer(
            batch,
            padding=True,
            truncation=True,
            max_length=512,
            return_tensors='pt'
        ).to(device)
        
        with torch.no_grad():
            outputs = model(**inputs)
        
        # Use mean pooling for sentence embeddings
        batch_embeddings = outputs.last_hidden_state.mean(dim=1).cpu().numpy()
        embeddings.extend(batch_embeddings)
    
    return np.array(embeddings)

# Step 5: Generate embeddings in batches
text_list = df['Cleaned_Ideas'].tolist()
embeddings = get_roberta_embeddings(text_list)

# Step 6: Save embeddings back to DataFrame
df['Embeddings'] = embeddings.tolist()

# Step 7: Save to new CSV
df.to_csv('ideas_with_embeddings.csv', index=False)

print("Embeddings generated and saved successfully!")

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Embeddings generated and saved successfully!


In [4]:
from huggingface_hub import hf_hub_download
model_path = hf_hub_download(
    repo_id="bartowski/Llama-3.2-3B-Instruct-uncensored-GGUF",
    filename="llama-3.2-3b-instruct-uncensored.Q4_K_M.gguf"
)

EntryNotFoundError: 404 Client Error. (Request ID: Root=1-67c4b83a-54fb0709422d4ed82cb266b8;1c2c4067-e0a0-4b69-be1d-8d677c1844cb)

Entry Not Found for url: https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-uncensored-GGUF/resolve/main/llama-3.2-3b-instruct-uncensored.Q4_K_M.gguf.

In [6]:
# First install required packages
!pip install langchain huggingface_hub pandas faiss-cpu numpy transformers torch

import pandas as pd
import numpy as np
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# 1. Load and prepare embeddings
def load_embeddings(csv_path):
    df = pd.read_csv(csv_path)
    
    # Convert string embeddings to numpy arrays
    df['embedding'] = df['embedding'].apply(
        lambda x: np.fromstring(x.strip("[]"), sep=", ", dtype=np.float32)
    
    texts = df['text_column'].tolist()  # Replace with your text column name
    embeddings = np.array(df['embedding'].tolist())
    
    return texts, embeddings

texts, embeddings = load_embeddings("ideas_with_embeddings.csv")

# 2. Create FAISS vector store with your RoBERTa embeddings
# Initialize with matching RoBERTa config
embedding_model = HuggingFaceEmbeddings(
    model_name="roberta-base",  # Use the exact model you used for embeddings
    model_kwargs={'device': 'cpu'},
    encode_kwargs={'normalize_embeddings': False}
)

vector_store = FAISS.from_embeddings(
    text_embeddings=list(zip(texts, embeddings)),
    embedding=embedding_model
)

# 3. Set up DeepSeek-R1-1.5B
model_name = "deepseek-ai/deepseek-llm-1.5b-chat"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=256,
    temperature=0.7,
    top_p=0.9,
    device_map="auto",  # Will use GPU if available
)

llm = HuggingFacePipeline(pipeline=pipe)

# 4. Create custom prompt template for DeepSeek
template = """### Instruction:
Use the following context to answer the question. 
If you don't know the answer, say you don't know. Keep answers concise.

Context: {context}

### Question:
{question}

### Response:
"""

prompt = PromptTemplate(
    template=template,
    input_variables=["context", "question"]
)

# 5. Build RAG chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vector_store.as_retriever(search_kwargs={"k": 3}),
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt}
)

# 6. Run the chatbot
while True:
    query = input("\nUser: ")
    if query.lower() in ["exit", "quit"]:
        break
    
    result = qa_chain({"query": query})
    print(f"\nAssistant: {result['result']}")
    print("\nSources:")
    for doc in result['source_documents']:
        print(f"- {doc.page_content[:100]}...")

SyntaxError: '(' was never closed (2343090427.py, line 18)

In [None]:
# New Cell: RAG System using Existing Embeddings
from langchain.chains import RetrievalQA
from langchain.llms import LlamaCpp
from langchain.prompts import PromptTemplate
from langchain.vectorstores import FAISS
import numpy as np
from ipywidgets import interact, widgets, Layout, Output, VBox, HTML

# Convert DataFrame embeddings to numpy arrays
df['Embeddings'] = df['Embeddings'].apply(eval).apply(np.array)

# Create FAISS vector store directly from existing embeddings
vector_store = FAISS.from_embeddings(
    text_embeddings=zip(df['Cleaned_Ideas'].tolist(), df['Embeddings'].tolist()),
    embedding=HuggingFaceEmbeddings()  # Dummy embedding, not actually used
)

# Initialize Llama-3.2-3B model (make sure you've downloaded the GGUF file)
model_path = "llama-3.2-3b-instruct-uncensored.Q4_K_M.gguf"  # Update with actual path

llm = LlamaCpp(
    model_path=model_path,
    temperature=0.7,
    max_tokens=2000,
    n_ctx=4096,
    n_gpu_layers=40,  # Adjust based on your GPU
    n_batch=512,
    verbose=False,
)

# Custom prompt template for Llama-3.2
prompt_template = """[INST] <<SYS>>
You are a helpful AI assistant that provides ideas based on context.
Use the following pieces of context to answer the question at the end.
<</SYS>>

Context: {context}
Question: {question} 
[/INST]"""

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

# Create retrieval chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vector_store.as_retriever(search_kwargs={"k": 3}),
    chain_type_kwargs={"prompt": PROMPT},
    return_source_documents=True
)

# React-style UI components
output = Output()
question_input = widgets.Text(
    placeholder='Enter your question...',
    layout=Layout(width='80%', height='40px')
)
submit_button = widgets.Button(description="Ask", button_style='success')

def on_submit(_):
    with output:
        output.clear_output()
        query = question_input.value
        if query:
            # Preprocess query same as original data
            cleaned_query = preprocess_text(query)
            
            result = qa_chain({"query": cleaned_query})
            
            # Display formatted response
            html = f"""
            <div style='padding: 15px; border-radius: 8px; background: #f0f3f6; margin: 15px 0;'>
                <h4 style='color: #2c3e50;'>Question:</h4>
                <p>{query}</p>
                <h4 style='color: #2c3e50; margin-top: 15px;'>Answer:</h4>
                <p>{result['result']}</p>
            </div>
            <div style='margin-top: 20px; background: #f8f9fa; padding: 15px; border-radius: 8px;'>
                <h4 style='color: #2c3e50;'>Relevant Ideas:</h4>
                <ul>
            """
            
            for doc in result['source_documents']:
                html += f"<li style='margin: 8px 0;'>{doc.page_content}</li>"
            
            html += "</ul></div>"
            display(HTML(html))
            question_input.value = ''

submit_button.on_click(on_submit)

# Display the UI
display(VBox([
    HTML("<h1 style='color: #2c3e50; margin-bottom: 20px;'>Ideas RAG Chatbot</h1>"),
    question_input,
    submit_button,
    output
]))