In [None]:
import google.generativeai as genai
import logging
import pandas as pd
import os
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
from ratelimit import limits, sleep_and_retry
import csv

# Enable logging for debugging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Configure Gemini API
API_KEY = ''  # Replace with your actual API key or use os.getenv
genai.configure(api_key=API_KEY)

# CSV file to read and update
CSV_FILE = 'paperclip_uses.csv'

In [None]:
# Rate limit: 15 calls per minute (60 seconds / 15 = 4 seconds per call)
CALLS_PER_MINUTE = 15
PERIOD = 60  # seconds

# Save after every 15 validations
SAVE_INTERVAL = 15

# Prompt for validating a use
VALIDATION_PROMPT = """
You are an expert validator for creative uses of a small, bendable metal wire (3-5 cm long, thin, conductive), similar to a paperclip but without mentioning 'paperclip'.

Evaluate the following use: "{use}" in the category: "{category}"

Determine if this is a valid, feasible, and novel use for such a wire. Consider whether the use is practical and makes sense given the wire's properties (small, bendable, non-conductive). Avoid electrical or unrealistic uses.

Output in this exact format:
Valid: Yes/No
Explanation: [Your brief explanation]
"""

class WireUsesValidator:
    def __init__(self):
        self.validated_data = []

    def load_data(self):
        """Load uses and categories from the input CSV"""
        if not os.path.exists(CSV_FILE):
            logging.error(f"{CSV_FILE} does not exist. Run the generation script first.")
            return pd.DataFrame()
        
        try:
            df = pd.read_csv(CSV_FILE, on_bad_lines='skip')
            if 'Use' not in df.columns or 'Category' not in df.columns:
                logging.error(f"{CSV_FILE} missing required columns.")
                return pd.DataFrame()
            
            # Ensure Valid and Explanation columns exist, initialize as empty strings
            if 'Valid' not in df.columns:
                df['Valid'] = ''
            if 'Explanation' not in df.columns:
                df['Explanation'] = ''
            
            # Clean columns to handle NaN and whitespace
            df['Valid'] = df['Valid'].fillna('').astype(str).str.strip()
            df['Explanation'] = df['Explanation'].fillna('').astype(str).str.strip()
            
            logging.info(f"Loaded {len(df)} uses for validation")
            return df
        except Exception as e:
            logging.error(f"Error loading {CSV_FILE}: {e}")
            return pd.DataFrame()

    @sleep_and_retry
    @limits(calls=CALLS_PER_MINUTE, period=PERIOD)
    @retry(
        stop=stop_after_attempt(5),
        wait=wait_exponential(multiplier=1, min=2, max=15),
        retry=retry_if_exception_type(Exception)
    )
    def validate_use(self, use, category):
        """Validate a single use using Gemini API"""
        try:
            prompt = VALIDATION_PROMPT.format(use=use, category=category)
            logging.info(f"Validating use: {use}")
            model = genai.GenerativeModel('gemini-2.5-flash-lite')
            response = model.generate_content(prompt)
            return response.text.strip()
        except Exception as e:
            logging.error(f"Validation failed for '{use}': {e}")
            raise

    def parse_validation_response(self, response):
        """Parse the LLM response into structured data"""
        lines = response.split('\n')
        valid = 'No'
        explanation = 'No response parsed'
        
        for line in lines:
            if line.startswith('Valid:'):
                valid = line.split(':', 1)[1].strip()
            elif line.startswith('Explanation:'):
                explanation = line.split(':', 1)[1].strip()
        
        return valid, explanation

    def validate_all(self):
        """Validate uses without existing validations and update the existing CSV"""
        df = self.load_data()
        if df.empty:
            return pd.DataFrame()
        
        # Identify rows needing validation (Valid or Explanation is empty)
        df_to_validate = df[(df['Valid'] == '') | (df['Explanation'] == '')]
        logging.info(f"Found {len(df_to_validate)} uses needing validation")
        
        # Log skipped rows for debugging
        skipped_rows = df[(df['Valid'] != '') & (df['Explanation'] != '')]
        for index, row in skipped_rows.iterrows():
            logging.debug(f"Skipping already validated row {index}: Use='{row['Use']}', Valid='{row['Valid']}', Explanation='{row['Explanation']}'")
        
        validation_count = 0
        for index, row in df_to_validate.iterrows():
            use = row['Use']
            category = row['Category']
            try:
                raw_response = self.validate_use(use, category)
                valid, explanation = self.parse_validation_response(raw_response)
                df.at[index, 'Valid'] = valid.strip()
                df.at[index, 'Explanation'] = explanation.strip()
            except Exception as e:
                logging.warning(f"Failed to validate '{use}': {e}. Marking as invalid.")
                df.at[index, 'Valid'] = 'No'
                df.at[index, 'Explanation'] = f"Validation failed: {str(e)}"
            
            validation_count += 1
            # Save every SAVE_INTERVAL validations
            if validation_count % SAVE_INTERVAL == 0:
                try:
                    df.to_csv(CSV_FILE, index=False, quoting=csv.QUOTE_NONNUMERIC)
                    logging.info(f"Incremental save after {validation_count} validations to {CSV_FILE}")
                except Exception as e:
                    logging.error(f"Error during incremental save after {validation_count} validations: {e}")
        
        # Final save
        try:
            df.to_csv(CSV_FILE, index=False, quoting=csv.QUOTE_NONNUMERIC)
            logging.info(f"Final save: Updated {CSV_FILE} with {len(df)} validated uses")
        except Exception as e:
            logging.error(f"Error saving {CSV_FILE}: {e}")
        
        return df

# Run the script
if __name__ == "__main__":
    try:
        validator = WireUsesValidator()
        df_validated = validator.validate_all()
        
        print("\n" + "="*60)
        print("VALIDATED CREATIVE USES FOR A SMALL METAL WIRE")
        print("="*60)
        
        if not df_validated.empty:
            print(f"\n✓ Total uses validated: {len(df_validated)}")
            print(f"✓ Valid uses: {len(df_validated[df_validated['Valid'] == 'Yes'])}")
            print(f"✓ Categories covered: {df_validated['Category'].nunique()}")
            print(f"\n📊 Uses per category:")
            print(df_validated['Category'].value_counts().head(10).to_string())
            print(f"\n📝 Sample validated uses:")
            print(df_validated[['Use', 'Category', 'Valid', 'Explanation']].sample(min(20, len(df_validated))).to_string(index=False))
        else:
            print("No uses validated.")
            
    except Exception as e:
        print(f"Error: {e}")
        logging.error(f"Full error details: {e}")