# üé≠ Gemini Mood Caption Generator - Research Execution (English)

## Script to run automated English caption generation using 4 different prompting techniques:
1. **Zero-Shot** - Direct instruction without examples
2. **Few-Shot** - Instruction with example captions
3. **Chain-of-Thought** - Step-by-step analysis approach
4. **Persona** - Role-playing as an Influencer Specialist

## Research Design:
- **Dataset**: `data/raw/filenames_with_mood.csv`
- **Images per mood**: 40 images
- **Total unique images**: 120 images
- **Processing**: Each image processed with 4 techniques
- **Total captions**: 480 English captions
- **Output**: `data/hasil_mood_captions_prompting_techniques_english.csv`

In [None]:
# Import libraries
import os
import pandas as pd
import google.generativeai as genai
from PIL import Image
import time
from tqdm import tqdm
import warnings
import logging
from typing import Dict, List, Optional, Tuple
import json
from datetime import datetime
import gc
import random

warnings.filterwarnings('ignore')

# Set random seed for reproducibility
random.seed(42)

print("‚úÖ Libraries imported successfully!")
print("üá∫üá∏ Configured for English caption generation")

In [None]:
# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('mood_caption_prompting_techniques_english.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

# Set working directory to project root
os.chdir('../../')
print(f"Current working directory: {os.getcwd()}")
print("‚úÖ Logging setup completed!")

In [None]:
# ==========================================
# RESEARCH CONFIGURATION
# ==========================================
API_KEY = "AIzaSyABAI_PQAryjzvw7UIeStI_Lbl13douv04"

CONFIG = {
    'csv_input': "data/raw/filenames_with_mood.csv",
    'folder_gambar': "scaled_images",
    'output_file': "data/hasil_mood_captions_prompting_techniques_english.csv",
    'log_file': "data/mood_processing_prompting_techniques_english_log.json",
    'selection_file': "data/mood_prompting_techniques_english_selection.json",
    
    # Research configuration - each image processed with 4 prompting techniques
    'images_per_mood': 40,                # 40 images per mood for research
    'total_unique_images': 120,           # 40 x 3 moods = 120 unique images
    'total_captions': 480,                # 120 images x 4 techniques = 480 English captions
    'random_seed': 42,                    # For reproducibility
    
    # Rate limiting for free tier
    'base_delay': 2.0,
    'max_delay': 20.0,
    'retry_attempts': 3,
    'batch_size': 1,
    
    # Memory management
    'max_image_size': (1024, 1024),
    'gc_interval': 5,
    
    # Mood configuration - according to filenames_with_mood.csv dataset
    'moods': {
        'mood_1': 'joy',
        'mood_2': 'sad', 
        'mood_3': 'surprised'
    },
    
    # Prompting techniques
    'prompting_techniques': [
        'zero-shot',
        'few-shot', 
        'chain-of-thought',
        'persona'
    ]
}

# Gemini configuration
genai.configure(api_key=API_KEY)
model = genai.GenerativeModel('gemini-1.5-flash')

print("üé≠ Gemini Mood Caption Generator - English Research Configuration")
print("=" * 60)
print(f"üìä Dataset: {CONFIG['csv_input']}")
print(f"üìä Images per mood: {CONFIG['images_per_mood']}")
print(f"üìä Total unique images: {CONFIG['total_unique_images']}")
print(f"üìä Total English captions: {CONFIG['total_captions']}")
print(f"üìä Techniques: {', '.join(CONFIG['prompting_techniques'])}")
print(f"üìä Moods: {', '.join(CONFIG['moods'].values())}")
print(f"‚è±Ô∏è Estimated time: {CONFIG['total_captions'] * 3 / 60:.1f} minutes")
print("=" * 60)
print("‚úÖ Configuration completed!")

In [None]:
# ==========================================
# ENGLISH PROMPTING TECHNIQUES FUNCTIONS
# ==========================================

def get_prompting_technique_prompt(mood: str, technique: str) -> str:
    """Generate optimized English prompt for specific mood using different prompting techniques"""
    
    prompting_techniques = {
        "zero-shot": {
            "joy": "Create a short English caption for this image with a joyful and cheerful mood.",
            "sad": "Create a short English caption for this image with a sad and melancholic mood.",
            "surprised": "Create a short English caption for this image with a surprised and amazed mood."
        },
        
        "few-shot": {
            "joy": """Follow the caption style based on these examples:
            Mood: Joyful -> 'What an amazing day to start a new adventure! ‚ú®üòä'
            Mood: Sad -> 'Sometimes silence is the best companion for reflection. üíôüòî'
            Mood: Surprised -> 'Wow, this beauty is truly unexpected! üò±‚ú®'
            
            Now create an English caption for joyful mood ->""",
            
            "sad": """Follow the caption style based on these examples:
            Mood: Joyful -> 'What an amazing day to start a new adventure! ‚ú®üòä'
            Mood: Sad -> 'Sometimes silence is the best companion for reflection. üíôüòî'
            Mood: Surprised -> 'Wow, this beauty is truly unexpected! üò±‚ú®'
            
            Now create an English caption for sad mood ->""",
            
            "surprised": """Follow the caption style based on these examples:
            Mood: Joyful -> 'What an amazing day to start a new adventure! ‚ú®üòä'
            Mood: Sad -> 'Sometimes silence is the best companion for reflection. üíôüòî'
            Mood: Surprised -> 'Wow, this beauty is truly unexpected! üò±‚ú®'
            
            Now create an English caption for surprised mood ->"""
        },
        
        "chain-of-thought": {
            "joy": """Analyze this image with the following steps:
            1. Describe the main visual atmosphere in this image
            2. Connect that atmosphere with joyful and happy emotions
            3. Create one final English caption that best fits based on this analysis with a cheerful mood""",
            
            "sad": """Analyze this image with the following steps:
            1. Describe the main visual atmosphere in this image
            2. Connect that atmosphere with sad and melancholic emotions
            3. Create one final English caption that best fits based on this analysis with a sad mood""",
            
            "surprised": """Analyze this image with the following steps:
            1. Describe the main visual atmosphere in this image
            2. Connect that atmosphere with surprised and amazed emotions
            3. Create one final English caption that best fits based on this analysis with a surprised mood"""
        },
        
        "persona": {
            "joy": """You are an Influencer Specialist who is expert in audience psychology and an experienced content creator. 
            Create a highly engaging English caption for this image with a joyful and cheerful impression that can increase engagement rate.""",
            
            "sad": """You are an Influencer Specialist who is expert in audience psychology and an experienced content creator. 
            Create a highly engaging English caption for this image with a sad and melancholic impression that can touch the audience's heart.""",
            
            "surprised": """You are an Influencer Specialist who is expert in audience psychology and an experienced content creator. 
            Create a highly engaging English caption for this image with a surprised and amazed impression that can captivate the audience."""
        }
    }
    
    if technique in prompting_techniques and mood in prompting_techniques[technique]:
        return prompting_techniques[technique][mood]
    else:
        return prompting_techniques["zero-shot"].get(mood, prompting_techniques["zero-shot"]["joy"])

print("‚úÖ English prompting techniques functions defined!")

# Test prompting technique
print("\nüí° Example English prompts:")
for technique in CONFIG['prompting_techniques'][:2]:  # Show first 2 techniques
    for mood in ['joy'][:1]:  # Show first mood
        prompt = get_prompting_technique_prompt(mood, technique)
        print(f"\nüîß {technique.upper()} - {mood.upper()}:")
        print(f"   {prompt[:100]}...")

In [None]:
# ==========================================
# IMAGE SELECTION FUNCTIONS
# ==========================================

def select_research_images(df_input: pd.DataFrame, config: Dict) -> Dict[str, List[str]]:
    """Select images for research - 40 images per mood, each processed with 4 techniques"""
    
    # Get all available images
    all_available_files = []
    for _, row in df_input.iterrows():
        filename = row['filename']
        image_path = os.path.join(config['folder_gambar'], filename)
        if os.path.exists(image_path):
            all_available_files.append(filename)
    
    logger.info(f"üìä Total available images: {len(all_available_files)}")
    
    # Check if we have enough images
    total_needed = config['total_unique_images']
    if len(all_available_files) < total_needed:
        logger.warning(f"‚ö†Ô∏è Not enough images! Available: {len(all_available_files)}, Needed: {total_needed}")
        # Adjust images per mood
        adjusted_per_mood = len(all_available_files) // len(config['moods'])
        logger.info(f"üìä Adjusting to {adjusted_per_mood} images per mood")
        config['images_per_mood'] = adjusted_per_mood
        config['total_unique_images'] = adjusted_per_mood * len(config['moods'])
        config['total_captions'] = config['total_unique_images'] * len(config['prompting_techniques'])
    
    # Shuffle the list for random selection
    random.shuffle(all_available_files)
    
    # Select images for each mood
    mood_selections = {}
    current_idx = 0
    
    for mood_name, mood_value in config['moods'].items():
        end_idx = current_idx + config['images_per_mood']
        selected_files = all_available_files[current_idx:end_idx]
        
        mood_selections[mood_name] = {
            'mood_value': mood_value,
            'files': selected_files
        }
        
        logger.info(f"üé≠ {mood_name} ({mood_value}): Selected {len(selected_files)} images")
        logger.info(f"   Sample files: {selected_files[:3]}...")
        
        current_idx = end_idx
    
    # Save selection for reproducibility
    selection_data = {
        'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
        'random_seed': config['random_seed'],
        'images_per_mood': config['images_per_mood'],
        'total_unique_images': config['total_unique_images'],
        'total_captions': config['total_captions'],
        'total_available': len(all_available_files),
        'research_design': 'Each image processed with 4 English prompting techniques',
        'language': 'English',
        'selections': mood_selections
    }
    
    with open(config['selection_file'], 'w') as f:
        json.dump(selection_data, f, indent=2)
    
    logger.info(f"üíæ Selection saved to: {config['selection_file']}")
    
    return mood_selections

print("‚úÖ Image selection functions defined!")

In [None]:
# ==========================================
# ENGLISH CAPTION PROCESSOR CLASS
# ==========================================

class EnglishPromptingTechniquesCaptionProcessor:
    def __init__(self, config: Dict):
        self.config = config
        self.model = model
        self.current_delay = config['base_delay']
        self.success_count = 0
        self.error_count = 0
        self.start_time = time.time()
        
        # Initialize stats for each technique
        self.technique_stats = {}
        for technique in config['prompting_techniques']:
            self.technique_stats[technique] = {'success': 0, 'error': 0}
        
        random.seed(config['random_seed'])
    
    def load_and_optimize_image(self, image_path: str) -> Optional[Image.Image]:
        """Load and optimize image to reduce memory usage"""
        try:
            img = Image.open(image_path)
            
            # Resize if too large
            if img.size[0] > self.config['max_image_size'][0] or img.size[1] > self.config['max_image_size'][1]:
                img.thumbnail(self.config['max_image_size'], Image.Resampling.LANCZOS)
                logger.debug(f"Resized image {image_path} to {img.size}")
            
            # Convert to RGB if needed
            if img.mode != 'RGB':
                img = img.convert('RGB')
            
            return img
        except Exception as e:
            logger.error(f"Error loading image {image_path}: {e}")
            return None
    
    def adaptive_delay(self, success: bool):
        """Adaptive delay based on success rate"""
        if success:
            self.current_delay = max(self.config['base_delay'], self.current_delay * 0.98)
        else:
            self.current_delay = min(self.config['max_delay'], self.current_delay * 1.2)
        
        time.sleep(self.current_delay)
    
    def generate_english_mood_caption(self, img: Image.Image, mood: str, technique: str, filename: str) -> str:
        """Generate English caption for specific mood and technique with retry mechanism"""
        prompt = get_prompting_technique_prompt(mood, technique)
        
        for attempt in range(self.config['retry_attempts']):
            try:
                response = self.model.generate_content([prompt, img])
                result = response.text.strip()
                
                # Clean up result
                if result.startswith('"') and result.endswith('"'):
                    result = result[1:-1]
                
                logger.info(f"‚úÖ {technique}-{mood} English caption success for {filename} (attempt {attempt + 1})")
                self.technique_stats[technique]['success'] += 1
                self.adaptive_delay(True)
                return result
                
            except Exception as e:
                logger.warning(f"‚ö†Ô∏è {technique}-{mood} English caption failed for {filename} (attempt {attempt + 1}): {e}")
                
                if attempt < self.config['retry_attempts'] - 1:
                    wait_time = (2 ** attempt) * self.config['base_delay']
                    time.sleep(wait_time)
                else:
                    self.technique_stats[technique]['error'] += 1
                    self.adaptive_delay(False)
                    return f"Error generating {technique}-{mood} English caption: {str(e)[:50]}..."
    
    def process_image_with_all_techniques(self, filename: str, mood_name: str, mood_value: str) -> List[Dict]:
        """Process single image with all 4 English prompting techniques"""
        results = []
        image_path = os.path.join(self.config['folder_gambar'], filename)
        
        logger.info(f"üñºÔ∏è Processing {filename} for mood {mood_value} with all 4 English techniques")
        
        # Check if image exists
        if not os.path.exists(image_path):
            logger.warning(f"‚ùå Image not found: {image_path}")
            for technique in self.config['prompting_techniques']:
                result = {
                    'filename': filename,
                    'mood_type': mood_value,
                    'mood_column': mood_name,
                    'prompting_technique': technique,
                    'processing_timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
                    'image_exists': False,
                    'processing_duration': 0,
                    'success': False,
                    'language': 'English',
                    'caption': f"Image not found: {filename}"
                }
                results.append(result)
            return results
        
        # Load image once for all techniques
        img = self.load_and_optimize_image(image_path)
        if img is None:
            logger.warning(f"‚ùå Failed to load image: {image_path}")
            for technique in self.config['prompting_techniques']:
                result = {
                    'filename': filename,
                    'mood_type': mood_value,
                    'mood_column': mood_name,
                    'prompting_technique': technique,
                    'processing_timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
                    'image_exists': True,
                    'processing_duration': 0,
                    'success': False,
                    'language': 'English',
                    'caption': f"Failed to load image: {filename}"
                }
                results.append(result)
            return results
        
        # Process with each technique
        for i, technique in enumerate(self.config['prompting_techniques']):
            start_time = time.time()
            logger.info(f"  üîß Technique {i+1}/4: {technique} (English)")
            
            # Generate English caption
            caption = self.generate_english_mood_caption(img, mood_value, technique, filename)
            
            # Create result
            result = {
                'filename': filename,
                'mood_type': mood_value,
                'mood_column': mood_name,
                'prompting_technique': technique,
                'processing_timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
                'image_exists': True,
                'processing_duration': time.time() - start_time,
                'success': not caption.startswith('Error'),
                'language': 'English',
                'caption': caption
            }
            
            if result['success']:
                self.success_count += 1
            else:
                self.error_count += 1
            
            results.append(result)
        
        # Cleanup image
        img.close()
        del img
        
        return results
    
    def get_processing_stats(self) -> Dict:
        """Get current processing statistics"""
        elapsed_time = time.time() - self.start_time
        total_processed = self.success_count + self.error_count
        
        return {
            'total_processed': total_processed,
            'success_count': self.success_count,
            'error_count': self.error_count,
            'success_rate': self.success_count / max(total_processed, 1),
            'elapsed_time': elapsed_time,
            'avg_time_per_image': elapsed_time / max(total_processed, 1),
            'current_delay': self.current_delay,
            'language': 'English',
            'technique_stats': self.technique_stats
        }

print("‚úÖ English caption processor class defined!")

In [None]:
# ==========================================
# LOAD DATASET & PREPARE FOR PROCESSING
# ==========================================

print("üé≠ Gemini Mood Caption Generator - English Research Prompting Techniques Comparison")
print("=" * 80)
print(f"üìä Research Design: Each image processed with 4 English prompting techniques")
print(f"üìä Dataset: {CONFIG['csv_input']}")
print(f"üìä Images per mood: {CONFIG['images_per_mood']}")
print(f"üìä Total unique images: {CONFIG['total_unique_images']}")
print(f"üìä Total English captions: {CONFIG['total_captions']}")
print(f"üìä Techniques: {', '.join(CONFIG['prompting_techniques'])}")
print(f"üìä Moods: {', '.join(CONFIG['moods'].values())}")
print(f"‚è∞ Started at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("=" * 80)

# Load dataset
logger.info(f"üìÇ Loading dataset: {CONFIG['csv_input']}")
df_input = pd.read_csv(CONFIG['csv_input'])
logger.info(f"üìä Dataset loaded: {len(df_input)} images available")

print(f"\nüìä Dataset Info:")
print(f"   - Total images in dataset: {len(df_input)}")
print(f"   - Dataset columns: {df_input.columns.tolist()}")
print(f"   - Sample data:")
print(df_input.head())

# Check images folder
if os.path.exists(CONFIG['folder_gambar']):
    image_files = [f for f in os.listdir(CONFIG['folder_gambar']) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
    print(f"\nüìä Available images in {CONFIG['folder_gambar']}: {len(image_files)}")
    print(f"üìä Required images: {CONFIG['total_unique_images']}")
    print(f"üìä Sufficient images: {'‚úÖ' if len(image_files) >= CONFIG['total_unique_images'] else '‚ùå'}")
else:
    print(f"\n‚ùå Images folder '{CONFIG['folder_gambar']}' not found!")
    print(f"üí° Please create the folder and add your images there.")

print("\n‚úÖ Dataset loaded and ready for English processing!")

In [None]:
# ==========================================
# EXECUTE ENGLISH RESEARCH PROCESSING
# ==========================================
# UNCOMMENT THE CODE BELOW TO START PROCESSING

# # Initialize English processor
# processor = EnglishPromptingTechniquesCaptionProcessor(CONFIG)

# # Create research image selection
# logger.info("üé≤ Creating research image selection for English captions...")
# mood_selections = select_research_images(df_input, CONFIG)

# # Process each mood
# all_results = []

# for mood_name, mood_data in mood_selections.items():
#     mood_value = mood_data['mood_value']
#     selected_files = mood_data['files']
    
#     logger.info(f"\nüé≠ Processing mood: {mood_name} ({mood_value}) - {len(selected_files)} images for English captions")
    
#     # Process each image with all 4 English techniques
#     for i, filename in enumerate(tqdm(selected_files, desc=f"Processing {mood_value} images (English)")):
#         logger.info(f"\nüì∏ Image {i+1}/{len(selected_files)}: {filename} (English)")
        
#         # Process this image with all 4 English techniques
#         image_results = processor.process_image_with_all_techniques(filename, mood_name, mood_value)
#         all_results.extend(image_results)
        
#         # Save progress after every 5 images
#         if (i + 1) % 5 == 0:
#             temp_df = pd.DataFrame(all_results)
#             temp_df.to_csv(CONFIG['output_file'], index=False)
            
#             # Save stats
#             stats = processor.get_processing_stats()
#             with open(CONFIG['log_file'], 'w') as f:
#                 json.dump(stats, f, indent=2)
            
#             logger.info(f"üíæ Progress saved: {len(all_results)} total English captions")
#             logger.info(f"üìà Current success rate: {stats['success_rate']:.2%}")
        
#         # Periodic garbage collection
#         if (i + 1) % CONFIG['gc_interval'] == 0:
#             gc.collect()
#             logger.info(f"üßπ Garbage collection performed")

print("\nüí° Uncomment the code above to start English processing!")
print("üí° Make sure the 'scaled_images' folder contains your images")
print("üí° Processing will take approximately 24 minutes for 480 English captions")
print("üí° Results will be saved to:", CONFIG['output_file'])

In [None]:
# ==========================================
# FINAL ENGLISH RESULTS SUMMARY
# ==========================================
# This cell will run after processing is complete

# # Final summary
# if all_results:
#     final_df = pd.DataFrame(all_results)
#     final_df.to_csv(CONFIG['output_file'], index=False)
    
#     final_stats = processor.get_processing_stats()
    
#     print("\n" + "=" * 80)
#     print("üéâ ENGLISH RESEARCH PROCESSING COMPLETED!")
#     print("=" * 80)
#     print(f"üìä Total processed: {final_stats['total_processed']} English captions")
#     print(f"üìä Unique images: {CONFIG['total_unique_images']}")
#     print(f"‚úÖ Success: {final_stats['success_count']}")
#     print(f"‚ùå Errors: {final_stats['error_count']}")
#     print(f"üìà Overall success rate: {final_stats['success_rate']:.2%}")
#     print(f"‚è±Ô∏è Total time: {final_stats['elapsed_time']/60:.1f} minutes")
#     print(f"üá∫üá∏ Language: English")
    
#     print(f"\nüîß Final Technique Breakdown:")
#     technique_summary = final_df['prompting_technique'].value_counts()
#     for technique, count in technique_summary.items():
#         print(f"   {technique.capitalize()}: {count} English captions")
    
#     # Final breakdown by mood
#     print(f"\nüé≠ Final Mood Breakdown:")
#     mood_summary = final_df['mood_type'].value_counts()
#     for mood, count in mood_summary.items():
#         print(f"   {mood.capitalize()}: {count} English captions")
    
#     # Research analysis summary
#     print(f"\nüìä English Research Summary:")
#     print(f"   - Each of {CONFIG['total_unique_images']} images processed with 4 English techniques")
#     print(f"   - Perfect for comparative analysis of English prompting techniques")
#     print(f"   - Each technique tested on identical image set")
#     print(f"   - All captions generated in English")
    
#     print(f"\nüíæ English results saved to: {CONFIG['output_file']}")
#     print(f"üé≤ Selection saved to: {CONFIG['selection_file']}")
#     print("=" * 80)
    
#     # Display sample results
#     print("\nüìä Sample English Results:")
#     print(final_df.head(10))

print("\n‚úÖ English notebook ready for execution!")
print("üí° Uncomment the processing code above to start the English research")