In [None]:
import google.generativeai as genai
import logging
import pandas as pd
import os
from sentence_transformers import SentenceTransformer, util
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
import numpy as np
import csv
from ratelimit import limits, sleep_and_retry

# Enable logging for debugging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Configure Gemini API
API_KEY = ''
genai.configure(api_key=API_KEY)

# Load sentence transformer for semantic similarity
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

# CSV file to store uses and check duplicates
CSV_FILE = 'paperclip_uses.csv'

In [None]:
# Rate limit: 15 calls per minute (60 seconds / 15 = 4 seconds per call)
CALLS_PER_MINUTE = 15
PERIOD = 60  # seconds

# Prompts
CATEGORY_GENERATION_PROMPT = """
You are a creative problem-solver in a conversation about innovative categories for uses of a paperclip.

Previous categories you suggested:
{previous_categories}

Generate 5 new unique categories for uses, each in 3-5 words, that are highly distinct from previous ones. Format as a plain text list, one category per line. Ensure novelty, specificity, and diversity (e.g., mechanical, structural, artistic, household, organizational, or recreational uses).

Examples of desired categories:
- Miniature crafting tools
- Household organization aids
- Artistic sculpture components
- Mechanical linkage solutions

IMPORTANT: Do NOT create categories related to electrical conductivity, circuits, electronics, or any electrical applications. Focus strictly on non-electrical uses.
"""

USE_GENERATION_PROMPT = """
Continuing our conversation about uses for a small, bendable metal wire (3-5 cm long, thin, non-conductive).

Previous uses you suggested:
{previous_uses}

Now, focusing on the category: {category}, generate 5 new unique uses, each 1-5 words, that are distinct from the previous ones. Avoid mentioning paperclips. Format as a plain text list, one use per line. Ensure novelty and specificity.

IMPORTANT: Do not create uses related to electrical conductivity, circuits, electronics, or any electrical applications. Focus on mechanical, structural, artistic, household, or other non-electrical uses.
"""

class WireUsesGenerator:
    def __init__(self, target_uses=1000, similarity_threshold=0.75):
        self.target_uses = target_uses
        self.similarity_threshold = similarity_threshold
        self.existing_uses = []
        self.existing_embeddings = None
        self.categories = []
        self.previous_uses = []
        self.previous_categories = []
        self.category_retries = {}

    def load_existing_data(self):
        """Load existing uses and categories from CSV with error handling and filtering"""
        electrical_keywords = ['circuit', 'electrical', 'conductive', 'electronics', 'current', 'wireless', 'battery', 'voltage']
        try:
            if os.path.exists(CSV_FILE):
                df = pd.read_csv(CSV_FILE, on_bad_lines='skip')
                if 'Use' not in df.columns or 'Category' not in df.columns:
                    logging.warning(f"{CSV_FILE} exists but has no 'Use' or 'Category' column. Treating as empty.")
                    return
                df = df[~df['Category'].str.lower().str.contains('|'.join(electrical_keywords), na=False)]
                self.existing_uses = df['Use'].astype(str).tolist()
                self.categories = df['Category'].astype(str).unique().tolist()
                self.existing_embeddings = embedding_model.encode(
                    self.existing_uses, 
                    convert_to_tensor=True,
                    show_progress_bar=False
                ) if self.existing_uses else None
                logging.info(f"Loaded {len(self.existing_uses)} existing uses and {len(self.categories)} categories after filtering")
            else:
                logging.info("No existing data found, starting fresh")
        except Exception as e:
            logging.error(f"Error loading {CSV_FILE}: {e}. Consider resetting the CSV file.")
            self.existing_uses = []
            self.existing_embeddings = None
            self.categories = []

    def is_unique_use(self, use):
        if not self.existing_uses:
            return True
        use_embedding = embedding_model.encode([use], convert_to_tensor=True)
        similarities = util.cos_sim(use_embedding, self.existing_embeddings)[0]
        max_similarity = similarities.max().item()
        if max_similarity >= self.similarity_threshold:
            logging.debug(f"Rejected (similarity {max_similarity:.2f}): {use}")
            return False
        return True

    def is_unique_category(self, category):
        if not self.categories:
            return True
        category_embedding = embedding_model.encode([category], convert_to_tensor=True)
        category_embeddings = embedding_model.encode(self.categories, convert_to_tensor=True)
        similarities = util.cos_sim(category_embedding, category_embeddings)[0]
        max_similarity = similarities.max().item()
        if max_similarity >= self.similarity_threshold:
            logging.debug(f"Rejected category (similarity {max_similarity:.2f}): {category}")
            return False
        return True

    @sleep_and_retry
    @limits(calls=CALLS_PER_MINUTE, period=PERIOD)
    @retry(
        stop=stop_after_attempt(5),
        wait=wait_exponential(multiplier=1, min=2, max=15),
        retry=retry_if_exception_type(Exception)
    )
    def generate_batch(self, prompt):
        """Generate a batch from Gemini with retry logic and rate limiting"""
        try:
            logging.info("Calling Gemini API...")
            model = genai.GenerativeModel('gemini-2.5-flash-lite')
            response = model.generate_content(prompt)
            items = [item.strip() for item in response.text.strip().split('\n') if item.strip()]
            items = [item.split('. ', 1)[-1] if '. ' in item[:5] else item for item in items]
            logging.info(f"Generated {len(items)} raw items")
            return items
        except Exception as e:
            logging.error(f"API call failed: {e}")
            raise

    def generate_categories(self):
        """Have the LLM generate new categories, using limited previous ones in context"""
        electrical_keywords = ['circuit', 'electrical', 'conductive', 'electronics', 'current', 'wireless', 'battery', 'voltage']
        previous_categories_text = "\n".join(self.previous_categories[-5:]) if self.previous_categories else "None"
        prompt = CATEGORY_GENERATION_PROMPT.format(previous_categories=previous_categories_text)
        new_categories = self.generate_batch(prompt)
        
        unique_categories = []
        for cat in new_categories:
            if any(keyword in cat.lower() for keyword in electrical_keywords):
                logging.debug(f"Rejected electrical category: {cat}")
                continue
            if cat not in self.categories and self.is_unique_category(cat):
                unique_categories.append(cat)
                self.categories.append(cat)
                self.previous_categories.append(cat)
        
        self.previous_categories = self.previous_categories[-10:]
        logging.info(f"Generated {len(unique_categories)} new unique categories")
        return unique_categories

    def process_and_save_batch(self, new_uses, category):
        batch_data = []
        added_count = 0
        for use in new_uses:
            use = use.replace(',', '')
            word_count = len(use.split())
            if word_count > 5 or len(use) > 50:
                logging.debug(f"Rejected (invalid format): {use}")
                continue
            if use not in self.existing_uses and self.is_unique_use(use):
                self.existing_uses.append(use)
                self.previous_uses.append(use)
                new_embedding = embedding_model.encode([use], convert_to_tensor=True)
                if self.existing_embeddings is None:
                    self.existing_embeddings = new_embedding
                else:
                    self.existing_embeddings = np.vstack([self.existing_embeddings, new_embedding])
                batch_data.append({'Use': use, 'Category': category})
                added_count += 1
        if batch_data:
            df_batch = pd.DataFrame(batch_data)
            if os.path.exists(CSV_FILE):
                df_batch.to_csv(CSV_FILE, mode='a', header=False, index=False, quoting=csv.QUOTE_NONNUMERIC)
            else:
                df_batch.to_csv(CSV_FILE, index=False, quoting=csv.QUOTE_NONNUMERIC)
            logging.info(f"✓ Saved {added_count} new uses from '{category}' (Total: {len(self.existing_uses)})")
        else:
            logging.warning(f"No unique uses from this batch")
        return added_count

    def generate_all(self):
        self.load_existing_data()
        while len(self.existing_uses) < self.target_uses:
            if not self.categories:
                new_cats = self.generate_categories()
                self.categories.extend(new_cats)
                if not new_cats:
                    logging.warning("Failed to generate new categories. Stopping.")
                    break
            category = self.categories.pop(0)
            try:
                previous_uses_text = "\n".join(self.previous_uses[-5:]) if self.previous_uses else "None"
                prompt = USE_GENERATION_PROMPT.format(previous_uses=previous_uses_text, category=category)
                new_uses = self.generate_batch(prompt)
                added = self.process_and_save_batch(new_uses, category)
                if added == 0:
                    self.category_retries[category] = self.category_retries.get(category, 0) + 1
                    if self.category_retries[category] < 3:
                        self.categories.append(category)
                        logging.info(f"Requeued category '{category}' (retry {self.category_retries[category]})")
                    else:
                        logging.warning(f"Discarded category '{category}' after {self.category_retries[category]} retries")
            except Exception as e:
                logging.error(f"Failed for category '{category}': {e}")
                self.category_retries[category] = self.category_retries.get(category, 0) + 1
                if self.category_retries[category] < 3:
                    self.categories.append(category)
                    logging.info(f"Requeued category '{category}' (retry {self.category_retries[category]})")
                else:
                    logging.warning(f"Discarded category '{category}' after {self.category_retries[category]} retries")
                continue
        return pd.read_csv(CSV_FILE, on_bad_lines='skip') if os.path.exists(CSV_FILE) else pd.DataFrame()

if __name__ == "__main__":
    try:
        generator = WireUsesGenerator(target_uses=1000, similarity_threshold=0.75)
        df = generator.generate_all()
        print("\n" + "="*60)
        print("CREATIVE USES FOR A SMALL METAL WIRE")
        print("="*60)
        if not df.empty:
            print(f"\n✓ Total unique uses generated: {len(df)}")
            print(f"✓ Categories covered: {df['Category'].nunique()}")
            print(f"\n📊 Uses per category:")
            print(df['Category'].value_counts().head(10).to_string())
            print(f"\n📝 Sample uses:")
            print(df['Use'].sample(min(20, len(df))).to_string(index=False))
        else:
            print("No uses generated.")
    except Exception as e:
        print(f"Error: {e}")
        logging.error(f"Full error details: {e}")