In [None]:
# Import required libraries for API calls, data handling, and rate limiting
import google.generativeai as genai
import logging
import pandas as pd
import os
import numpy as np
import csv
from sentence_transformers import SentenceTransformer, util
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
from ratelimit import limits, sleep_and_retry

# Configure logging for debugging and tracking progress
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Configure Gemini API with hardcoded key
API_KEY = 'Your-Api-Key'  # Replace with your actual Gemini API key
if not API_KEY:
    raise ValueError("API_KEY must be provided")
genai.configure(api_key=API_KEY)

# Initialize sentence transformer for semantic similarity checks
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

# Define constants
CSV_FILE = 'paperclip_uses.csv'
CALLS_PER_MINUTE = 15  # API rate limit: 15 calls per minute
PERIOD = 60  # Rate limit period in seconds
BATCH_SIZE = 15  # Number of uses to generate and validate per cycle
TARGET_USES = 15 
SIMILARITY_THRESHOLD = 0.75

In [None]:
# Define prompts for category generation, use generation, and validation
# These are kept separate to avoid cross-contamination

# Prompt for generating new categories
CATEGORY_GENERATION_PROMPT = """
You are a creative problem-solver in a conversation about innovative categories for uses of a small, bendable metal wire (3-5 cm long, thin, non-conductive).

Previous categories you suggested:
{previous_categories}

Generate 5 new unique categories for uses, each in 3-5 words, that are highly distinct from previous ones. Format as a plain text list, one category per line. Ensure novelty, specificity, and diversity (e.g., mechanical, structural, artistic, household, organizational, or recreational uses).

Examples of desired categories:
- Miniature crafting tools
- Household organization aids
- Artistic sculpture components
- Mechanical linkage solutions

IMPORTANT: Do NOT create categories related to electrical conductivity, circuits, electronics, or any electrical applications. Focus strictly on non-electrical uses.
"""

# Prompt for generating uses within a category
USE_GENERATION_PROMPT = """
Continuing our conversation about uses for a small, bendable metal wire (3-5 cm long, thin, non-conductive).

Previous uses you suggested:
{previous_uses}

Now, focusing on the category: {category}, generate 5 new unique uses, each 1-5 words, that are distinct from the previous ones. Avoid mentioning paperclips. Format as a plain text list, one use per line. Ensure novelty and specificity.

IMPORTANT: Do not create uses related to electrical conductivity, circuits, electronics, or any electrical applications. Focus on mechanical, structural, artistic, household, or other non-electrical uses.
"""

# Prompt for validating uses
VALIDATION_PROMPT = """
You are an expert validator for creative uses of a small, bendable metal wire (3-5 cm long, thin, non-conductive).

Evaluate the following use: "{use}" in the category: "{category}"

Determine if this is a valid, feasible, and novel use for such a wire. Consider whether the use is practical and makes sense given the wire's properties (small, bendable, non-conductive). Avoid electrical or unrealistic uses.

Output in this exact format:
Valid: Yes/No
Explanation: [Your brief explanation]
"""


In [None]:
# Class to handle generation of categories and uses
class WireUsesGenerator:
    def __init__(self, similarity_threshold):
        """Initialize generator with empty state for uses and categories"""
        self.similarity_threshold = similarity_threshold
        self.existing_uses = []  # List of all uses in CSV
        self.existing_embeddings = None  # Embeddings for duplicate checks
        self.categories = []  # Available categories
        self.previous_uses = []  # Recent uses for prompt context
        self.previous_categories = []  # Recent categories for prompt context
        self.category_retries = {}  # Track retries per category

    def load_existing_data(self):
        """Load existing uses and categories from CSV, filtering out electrical ones"""
        electrical_keywords = ['circuit', 'electrical', 'conductive', 'electronics', 'current', 'wireless', 'battery', 'voltage']
        try:
            if os.path.exists(CSV_FILE):
                df = pd.read_csv(CSV_FILE, on_bad_lines='skip')
                if 'Use' not in df.columns or 'Category' not in df.columns:
                    logging.warning(f"{CSV_FILE} missing required columns. Starting fresh.")
                    return
                # Filter out electrical-related entries
                df = df[~df['Category'].str.lower().str.contains('|'.join(electrical_keywords), na=False)]
                self.existing_uses = df['Use'].astype(str).tolist()
                self.categories = df['Category'].astype(str).unique().tolist()
                self.existing_embeddings = embedding_model.encode(
                    self.existing_uses, 
                    convert_to_tensor=True,
                    show_progress_bar=False
                ) if self.existing_uses else None
                logging.info(f"Loaded {len(self.existing_uses)} existing uses and {len(self.categories)} categories")
            else:
                logging.info("No existing data found, starting fresh")
        except Exception as e:
            logging.error(f"Error loading {CSV_FILE}: {e}")
            self.existing_uses = []
            self.existing_embeddings = None
            self.categories = []

    def is_unique_use(self, use):
        """Check if a use is unique using semantic similarity"""
        if not self.existing_uses:
            return True
        use_embedding = embedding_model.encode([use], convert_to_tensor=True)
        similarities = util.cos_sim(use_embedding, self.existing_embeddings)[0]
        max_similarity = similarities.max().item()
        if max_similarity >= self.similarity_threshold:
            logging.debug(f"Rejected use (similarity {max_similarity:.2f}): {use}")
            return False
        return True

    def is_unique_category(self, category):
        """Check if a category is unique using semantic similarity"""
        if not self.categories:
            return True
        category_embedding = embedding_model.encode([category], convert_to_tensor=True)
        category_embeddings = embedding_model.encode(self.categories, convert_to_tensor=True)
        similarities = util.cos_sim(category_embedding, category_embeddings)[0]
        max_similarity = similarities.max().item()
        if max_similarity >= self.similarity_threshold:
            logging.debug(f"Rejected category (similarity {max_similarity:.2f}): {category}")
            return False
        return True

    @sleep_and_retry
    @limits(calls=CALLS_PER_MINUTE, period=PERIOD)
    @retry(
        stop=stop_after_attempt(5),
        wait=wait_exponential(multiplier=1, min=2, max=15),
        retry=retry_if_exception_type(Exception)
    )
    def generate_batch(self, prompt):
        """Generate a batch of items (categories or uses) via Gemini API"""
        try:
            logging.info("Calling Gemini API for generation...")
            model = genai.GenerativeModel('gemini-2.5-flash-lite')
            response = model.generate_content(prompt)
            items = [item.strip() for item in response.text.strip().split('\n') if item.strip()]
            # Remove numbering if present (e.g., '1. Item' -> 'Item')
            items = [item.split('. ', 1)[-1] if '. ' in item[:5] else item for item in items]
            logging.info(f"Generated {len(items)} raw items")
            return items
        except Exception as e:
            logging.error(f"API call failed: {e}")
            raise

    def generate_categories(self):
        """Generate new unique categories, filtering out electrical ones"""
        electrical_keywords = ['circuit', 'electrical', 'conductive', 'electronics', 'current', 'wireless', 'battery', 'voltage']
        previous_categories_text = "\n".join(self.previous_categories[-5:]) if self.previous_categories else "None"
        prompt = CATEGORY_GENERATION_PROMPT.format(previous_categories=previous_categories_text)
        new_categories = self.generate_batch(prompt)
        
        unique_categories = []
        for cat in new_categories:
            if any(keyword in cat.lower() for keyword in electrical_keywords):
                logging.debug(f"Rejected electrical category: {cat}")
                continue
            if cat not in self.categories and self.is_unique_category(cat):
                unique_categories.append(cat)
                self.categories.append(cat)
                self.previous_categories.append(cat)
        
        self.previous_categories = self.previous_categories[-10:]  # Limit context
        logging.info(f"Generated {len(unique_categories)} new unique categories")
        return unique_categories

    def generate_uses(self, category, count=5):
        """Generate new uses for a given category"""
        previous_uses_text = "\n".join(self.previous_uses[-5:]) if self.previous_uses else "None"
        prompt = USE_GENERATION_PROMPT.format(previous_uses=previous_uses_text, category=category)
        new_uses = self.generate_batch(prompt)
        
        unique_uses = []
        for use in new_uses[:count]:
            use = use.replace(',', '')  # Clean up commas
            word_count = len(use.split())
            if word_count > 5 or len(use) > 50:
                logging.debug(f"Rejected use (invalid format): {use}")
                continue
            if use not in self.existing_uses and self.is_unique_use(use):
                unique_uses.append(use)
                self.existing_uses.append(use)
                self.previous_uses.append(use)
                new_embedding = embedding_model.encode([use], convert_to_tensor=True)
                if self.existing_embeddings is None:
                    self.existing_embeddings = new_embedding
                else:
                    self.existing_embeddings = np.vstack([self.existing_embeddings, new_embedding])
        
        self.previous_uses = self.previous_uses[-10:]  # Limit context
        logging.info(f"Generated {len(unique_uses)} unique uses for '{category}'")
        return unique_uses

    def save_uses(self, batch_data):
        """Save a batch of uses to CSV"""
        if not batch_data:
            return 0
        df_batch = pd.DataFrame(batch_data)
        try:
            if os.path.exists(CSV_FILE):
                df_existing = pd.read_csv(CSV_FILE, on_bad_lines='skip')
                if 'Valid' not in df_existing.columns:
                    df_existing['Valid'] = ''
                if 'Explanation' not in df_existing.columns:
                    df_existing['Explanation'] = ''
                df_batch['Valid'] = ''
                df_batch['Explanation'] = ''
                df_combined = pd.concat([df_existing, df_batch], ignore_index=True)
                df_combined.to_csv(CSV_FILE, index=False, quoting=csv.QUOTE_NONNUMERIC)
            else:
                df_batch['Valid'] = ''
                df_batch['Explanation'] = ''
                df_batch.to_csv(CSV_FILE, index=False, quoting=csv.QUOTE_NONNUMERIC)
            logging.info(f"Saved {len(batch_data)} new uses to {CSV_FILE}")
            return len(batch_data)
        except Exception as e:
            logging.error(f"Error saving to {CSV_FILE}: {e}")
            return 0

In [None]:
# Class to handle validation of uses
class WireUsesValidator:
    def __init__(self):
        """Initialize validator"""
        self.validated_data = []  # Track validated uses

    def load_data(self):
        """Load CSV and ensure required columns exist"""
        try:
            if not os.path.exists(CSV_FILE):
                logging.error(f"{CSV_FILE} does not exist.")
                return pd.DataFrame(columns=['Use', 'Category', 'Valid', 'Explanation'])
            df = pd.read_csv(CSV_FILE, on_bad_lines='skip')
            if 'Use' not in df.columns or 'Category' not in df.columns:
                logging.error(f"{CSV_FILE} missing required columns.")
                return pd.DataFrame(columns=['Use', 'Category', 'Valid', 'Explanation'])
            # Ensure Valid and Explanation columns exist
            if 'Valid' not in df.columns:
                df['Valid'] = ''
            if 'Explanation' not in df.columns:
                df['Explanation'] = ''
            # Clean columns
            df['Valid'] = df['Valid'].fillna('').astype(str).str.strip()
            df['Explanation'] = df['Explanation'].fillna('').astype(str).str.strip()
            logging.info(f"Loaded {len(df)} uses for validation")
            return df
        except Exception as e:
            logging.error(f"Error loading {CSV_FILE}: {e}")
            return pd.DataFrame(columns=['Use', 'Category', 'Valid', 'Explanation'])

    @sleep_and_retry
    @limits(calls=CALLS_PER_MINUTE, period=PERIOD)
    @retry(
        stop=stop_after_attempt(5),
        wait=wait_exponential(multiplier=1, min=2, max=15),
        retry=retry_if_exception_type(Exception)
    )
    def validate_use(self, use, category):
        """Validate a single use using Gemini API"""
        try:
            prompt = VALIDATION_PROMPT.format(use=use, category=category)
            logging.info(f"Validating use: {use}")
            model = genai.GenerativeModel('gemini-2.5-flash-lite')
            response = model.generate_content(prompt)
            return response.text.strip()
        except Exception as e:
            logging.error(f"Validation failed for '{use}': {e}")
            raise

    def parse_validation_response(self, response):
        """Parse Gemini API response into Valid and Explanation"""
        lines = response.split('\n')
        valid = 'No'
        explanation = 'No response parsed'
        for line in lines:
            if line.startswith('Valid:'):
                valid = line.split(':', 1)[1].strip()
            elif line.startswith('Explanation:'):
                explanation = line.split(':', 1)[1].strip()
        return valid, explanation

    def validate_batch(self, df_to_validate):
        """Validate a batch of uses and update the DataFrame"""
        if df_to_validate.empty:
            logging.info("No uses to validate in this batch")
            return df_to_validate
        
        df = df_to_validate.copy()
        for index, row in df.iterrows():
            use = row['Use']
            category = row['Category']
            try:
                raw_response = self.validate_use(use, category)
                valid, explanation = self.parse_validation_response(raw_response)
                df.at[index, 'Valid'] = valid.strip()
                df.at[index, 'Explanation'] = explanation.strip()
            except Exception as e:
                logging.warning(f"Failed to validate '{use}': {e}. Marking as invalid.")
                df.at[index, 'Valid'] = 'No'
                df.at[index, 'Explanation'] = f"Validation failed: {str(e)}"
        
        # Save the validated batch
        try:
            df.to_csv(CSV_FILE, index=False, quoting=csv.QUOTE_NONNUMERIC)
            logging.info(f"Saved {len(df)} validated uses to {CSV_FILE}")
        except Exception as e:
            logging.error(f"Error saving validated batch: {e}")
        return df

In [None]:
# Coordinates generation and validation in batches, ensuring no cross-contamination
class WireUsesManager:
    def __init__(self, target_uses, batch_size, similarity_threshold):
        """Initialize manager with generator and validator"""
        self.target_uses = target_uses
        self.batch_size = batch_size
        self.generator = WireUsesGenerator(similarity_threshold)
        self.validator = WireUsesValidator()

    def generate_and_validate_batch(self):
        """Generate and validate a single batch of uses"""
        # Step 1: Generate batch
        self.generator.load_existing_data()
        batch_data = []
        added_count = 0
        
        while added_count < self.batch_size and len(self.generator.existing_uses) < self.target_uses:
            # Generate categories if none available
            if not self.generator.categories:
                new_cats = self.generator.generate_categories()
                self.generator.categories.extend(new_cats)
                if not new_cats:
                    logging.warning("Failed to generate new categories. Stopping batch.")
                    break
            
            # Pick a category and generate uses
            category = self.generator.categories.pop(0)
            try:
                new_uses = self.generator.generate_uses(category, count=5)
                for use in new_uses:
                    batch_data.append({'Use': use, 'Category': category})
                    added_count += 1
                # Save batch if enough uses collected
                if added_count >= self.batch_size:
                    self.generator.save_uses(batch_data)
                    break
                # Retry category if no unique uses generated
                if not new_uses:
                    self.generator.category_retries[category] = self.generator.category_retries.get(category, 0) + 1
                    if self.generator.category_retries[category] < 3:
                        self.generator.categories.append(category)
                        logging.info(f"Requeued category '{category}' (retry {self.generator.category_retries[category]})")
                    else:
                        logging.warning(f"Discarded category '{category}' after 3 retries")
            except Exception as e:
                logging.error(f"Failed for category '{category}': {e}")
                self.generator.category_retries[category] = self.generator.category_retries.get(category, 0) + 1
                if self.generator.category_retries[category] < 3:
                    self.generator.categories.append(category)
                    logging.info(f"Requeued category '{category}' (retry {self.generator.category_retries[category]})")
                else:
                    logging.warning(f"Discarded category '{category}' after 3 retries")

        # Step 2: Validate the batch
        df = self.validator.load_data()
        if df.empty:
            return df
        # Select only new uses (those with empty Valid/Explanation)
        df_to_validate = df[(df['Valid'] == '') | (df['Explanation'] == '')].head(self.batch_size)
        logging.info(f"Validating {len(df_to_validate)} uses in this batch")
        df_validated = self.validator.validate_batch(df_to_validate)
        
        # Update full DataFrame with validated rows
        if not df_validated.empty:
            df.loc[df_validated.index] = df_validated
            try:
                df.to_csv(CSV_FILE, index=False, quoting=csv.QUOTE_NONNUMERIC)
                logging.info(f"Final save for batch: Updated {CSV_FILE}")
            except Exception as e:
                logging.error(f"Error saving batch to {CSV_FILE}: {e}")
        
        return df

    def run(self):
        """Run generation and validation in batches until target is reached"""
        while True:
            df = self.generate_and_validate_batch()
            if df.empty or len(df) >= self.target_uses:
                break
        
        # Final summary
        if not df.empty:
            print("\n" + "="*60)
            print("CREATIVE USES FOR A SMALL METAL WIRE")
            print("="*60)
            print(f"\n✓ Total unique uses: {len(df)}")
            print(f"✓ Valid uses: {len(df[df['Valid'] == 'Yes'])}")
            print(f"✓ Categories covered: {df['Category'].nunique()}")
            print(f"\n📊 Uses per category:")
            print(df['Category'].value_counts().head(10).to_string())
            print(f"\n📝 Sample validated uses:")
            print(df[['Use', 'Category', 'Valid', 'Explanation']].sample(min(20, len(df))).to_string(index=False))
        else:
            print("No uses generated or validated.")
        return df

In [None]:
# Execute the batch generation and validation process
if __name__ == "__main__":
    try:
        # Initialize and run the manager
        manager = WireUsesManager(
            target_uses=TARGET_USES,
            batch_size=BATCH_SIZE,
            similarity_threshold=SIMILARITY_THRESHOLD
        )
        df = manager.run()
    except Exception as e:
        print(f"Error: {e}")
        logging.error(f"Full error details: {e}")