# üß™ Initial Test - English Prompting Techniques

## Purpose:
This notebook is designed for **initial testing** of the English prompting techniques research before running the full study.

## Test Configuration:
- **Images per mood**: 1 image (quota-friendly)
- **Total unique images**: 3 images (1 √ó 3 moods)
- **Total captions**: 12 English captions (3 √ó 4 techniques)
- **Estimated time**: ~1 minute (vs 24 minutes for full study)

## 4 Prompting Techniques Tested:
1. **Zero-Shot** - Direct English instruction without examples
2. **Few-Shot** - English instruction with example captions
3. **Chain-of-Thought** - Step-by-step English analysis approach
4. **Persona** - Role-playing as English-speaking Influencer Specialist

## Output:
- **File**: `data/initial_test_english_prompting_results.csv`
- **Purpose**: Validate prompts, API connectivity, and expected output quality

---
**üí° Run this test first to ensure everything works before the full research!**

In [None]:
# Import libraries
import os
import pandas as pd
import google.generativeai as genai
from PIL import Image
import time
from tqdm import tqdm
import warnings
import logging
from typing import Dict, List, Optional
import json
from datetime import datetime
import random

warnings.filterwarnings('ignore')

# Set random seed for reproducibility
random.seed(42)

print("üß™ Initial Test - English Prompting Techniques")
print("=" * 50)
print("‚úÖ Libraries imported successfully!")
print("üá∫üá∏ Configured for English caption testing")
print("üî¨ Test mode: 5 images per mood")

In [None]:
# Setup logging for test
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('initial_test_english_prompting.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

# Set working directory to project root
os.chdir('../../')
print(f"Current working directory: {os.getcwd()}")
print("‚úÖ Logging setup completed!")

In [None]:
# ==========================================
# INITIAL TEST CONFIGURATION
# ==========================================
API_KEY = "AIzaSyABAI_PQAryjzvw7UIeStI_Lbl13douv04"

TEST_CONFIG = {
    'csv_input': "data/raw/filenames_with_mood.csv",
    'folder_gambar': "scaled_images",
    'output_file': "data/initial_test_english_prompting_results.csv",
    'log_file': "data/initial_test_english_prompting_log.json",
    'selection_file': "data/initial_test_english_prompting_selection.json",
    
    # TEST configuration - smaller scale for initial testing
    'images_per_mood': 1,                 # Only 1 image per mood for testing
    'total_unique_images': 3,             # 1 x 3 moods = 3 unique images
    'total_captions': 12,                 # 3 images x 4 techniques = 12 English captions
    'random_seed': 42,                    # For reproducibility
    
    # Quota-friendly rate limiting
    'base_delay': 3.0,      # Jeda antar request (3 detik untuk quota safety)
    'max_delay': 15.0,      # Jeda maksimal jika terjadi error berturut-turut
    'retry_attempts': 1,    # Fewer retries to save quota
    'batch_size': 1,
    
    # üõ°Ô∏è SAFETY & MEMORY
    'max_image_size': (1024, 1024),
    'gc_interval': 5,       # Pembersihan memori setiap 5 gambar
    
    # Mood configuration
    'moods': {
        'mood_1': 'joy',
        'mood_2': 'sad', 
        'mood_3': 'surprised'
    },
    
    # Prompting techniques
    'prompting_techniques': [
        'zero-shot',
        'few-shot', 
        'chain-of-thought',
        'persona'
    ]
}

# Gemini configuration
genai.configure(api_key=API_KEY)
model = genai.GenerativeModel('gemini-2.5-flash')

print("üß™ Initial Test Configuration - English Prompting Techniques")
print("=" * 60)
print(f"üìä Dataset: {TEST_CONFIG['csv_input']}")
print(f"üî¨ Images per mood: {TEST_CONFIG['images_per_mood']} (TEST MODE)")
print(f"üìä Total unique images: {TEST_CONFIG['total_unique_images']}")
print(f"üìä Total English captions: {TEST_CONFIG['total_captions']}")
print(f"üìä Techniques: {', '.join(TEST_CONFIG['prompting_techniques'])}")
print(f"üìä Moods: {', '.join(TEST_CONFIG['moods'].values())}")
print(f"‚è±Ô∏è Estimated time: {TEST_CONFIG['total_captions'] * 2 / 60:.1f} minutes")
print(f"üéØ Purpose: Initial testing before full research")
print("=" * 60)
print("‚úÖ Test configuration completed!")

In [None]:
# ==========================================
# ENGLISH PROMPTING TECHNIQUES (TEST)
# ==========================================

def get_english_test_prompt(mood: str, technique: str) -> str:
    """Generate English prompts for testing - same as research but clearly marked as test"""
    
    test_prompting_techniques = {
        "zero-shot": {
            "joy": "Create a short English caption for this image with a joyful and cheerful mood.",
            "sad": "Create a short English caption for this image with a sad and melancholic mood.",
            "surprised": "Create a short English caption for this image with a surprised and amazed mood."
        },
        
        "few-shot": {
            "joy": "Follow the caption style based on these examples:\nMood: Joyful -> 'What an amazing day to start a new adventure! ‚ú®üòä'\nMood: Sad -> 'Sometimes silence is the best companion for reflection. üíôüòî'\nMood: Surprised -> 'Wow, this beauty is truly unexpected! üò±‚ú®'\n\nNow create an English caption for joyful mood ->",
            
            "sad": "Follow the caption style based on these examples:\nMood: Joyful -> 'What an amazing day to start a new adventure! ‚ú®üòä'\nMood: Sad -> 'Sometimes silence is the best companion for reflection. üíôüòî'\nMood: Surprised -> 'Wow, this beauty is truly unexpected! üò±‚ú®'\n\nNow create an English caption for sad mood ->",
            
            "surprised": "Follow the caption style based on these examples:\nMood: Joyful -> 'What an amazing day to start a new adventure! ‚ú®üòä'\nMood: Sad -> 'Sometimes silence is the best companion for reflection. üíôüòî'\nMood: Surprised -> 'Wow, this beauty is truly unexpected! üò±‚ú®'\n\nNow create an English caption for surprised mood ->"
        },
        
        "chain-of-thought": {
            "joy": "Analyze this image with the following steps:\n1. Describe the main visual atmosphere in this image\n2. Connect that atmosphere with joyful and happy emotions\n3. Create one final English caption that best fits based on this analysis with a cheerful mood",
            
            "sad": "Analyze this image with the following steps:\n1. Describe the main visual atmosphere in this image\n2. Connect that atmosphere with sad and melancholic emotions\n3. Create one final English caption that best fits based on this analysis with a sad mood",
            
            "surprised": "Analyze this image with the following steps:\n1. Describe the main visual atmosphere in this image\n2. Connect that atmosphere with surprised and amazed emotions\n3. Create one final English caption that best fits based on this analysis with a surprised mood"
        },
        
        "persona": {
            "joy": "You are an Influencer Specialist who is expert in audience psychology and an experienced content creator. Create a highly engaging English caption for this image with a joyful and cheerful impression that can increase engagement rate.",
            
            "sad": "You are an Influencer Specialist who is expert in audience psychology and an experienced content creator. Create a highly engaging English caption for this image with a sad and melancholic impression that can touch the audience's heart.",
            
            "surprised": "You are an Influencer Specialist who is expert in audience psychology and an experienced content creator. Create a highly engaging English caption for this image with a surprised and amazed impression that can captivate the audience."
        }
    }
    
    if technique in test_prompting_techniques and mood in test_prompting_techniques[technique]:
        return test_prompting_techniques[technique][mood]
    else:
        return test_prompting_techniques["zero-shot"].get(mood, test_prompting_techniques["zero-shot"]["joy"])

print("‚úÖ English test prompting techniques functions defined!")

# Test prompting technique preview
print("\nüí° Test Prompt Examples:")
for technique in TEST_CONFIG['prompting_techniques'][:2]:  # Show first 2 techniques
    for mood in ['joy'][:1]:  # Show first mood
        prompt = get_english_test_prompt(mood, technique)
        print(f"\nüîß {technique.upper()} - {mood.upper()}:")
        print(f"   {prompt[:80]}...")

In [None]:
# ==========================================
# DATA LOADING AND IMAGE SELECTION
# ==========================================

def load_and_select_test_images():
    """Load dataset and select 5 random images per mood for testing"""
    
    # Load dataset
    print("üìä Loading dataset...")
    df = pd.read_csv(TEST_CONFIG['csv_input'])
    print(f"‚úÖ Dataset loaded: {len(df)} total images")
    
    # Set random seed for reproducibility
    random.seed(TEST_CONFIG['random_seed'])
    
    # Select images for each mood
    selected_images = {}
    
    for mood_key, mood_name in TEST_CONFIG['moods'].items():
        # Get all images (since all images have all moods in this dataset)
        available_images = df['filename'].tolist()
        
        # Randomly select images for this mood
        selected = random.sample(available_images, TEST_CONFIG['images_per_mood'])
        selected_images[mood_name] = selected
        
        print(f"üé≠ {mood_name.upper()}: Selected {selected[0]}")
    
    # Create test combinations
    test_combinations = []
    for mood_name, images in selected_images.items():
        for image_filename in images:
            for technique in TEST_CONFIG['prompting_techniques']:
                test_combinations.append({
                    'image_filename': image_filename,
                    'mood': mood_name,
                    'technique': technique
                })
    
    print(f"\nüî¨ Test combinations created: {len(test_combinations)} total")
    print(f"üìä Breakdown: {len(selected_images)} moods √ó {TEST_CONFIG['images_per_mood']} images √ó {len(TEST_CONFIG['prompting_techniques'])} techniques")
    
    # Save selection for reproducibility
    selection_data = {
        'timestamp': datetime.now().isoformat(),
        'config': TEST_CONFIG,
        'selected_images': selected_images,
        'total_combinations': len(test_combinations)
    }
    
    with open(TEST_CONFIG['selection_file'], 'w') as f:
        json.dump(selection_data, f, indent=2)
    
    print(f"üíæ Selection saved to: {TEST_CONFIG['selection_file']}")
    
    return test_combinations, selected_images

# Load and select images
test_combinations, selected_images = load_and_select_test_images()

In [None]:
# ==========================================
# GEMINI API FUNCTIONS
# ==========================================

def generate_caption_with_gemini(image_path: str, prompt: str, max_retries: int = 2) -> dict:
    """Generate caption using Gemini API with error handling and retries"""
    
    result = {
        'caption': None,
        'success': False,
        'error': None,
        'attempts': 0,
        'response_time': None
    }
    
    for attempt in range(max_retries + 1):
        result['attempts'] = attempt + 1
        
        try:
            start_time = time.time()
            
            # Load and resize image if needed
            image = Image.open(image_path)
            
            # Resize if image is too large
            if image.size[0] > TEST_CONFIG['max_image_size'][0] or image.size[1] > TEST_CONFIG['max_image_size'][1]:
                image.thumbnail(TEST_CONFIG['max_image_size'], Image.Resampling.LANCZOS)
            
            # Generate content with Gemini
            response = model.generate_content([prompt, image])
            
            # Extract text from response
            if response.text:
                result['caption'] = response.text.strip()
                result['success'] = True
                result['response_time'] = time.time() - start_time
                return result
            else:
                result['error'] = "Empty response from Gemini"
                
        except Exception as e:
            result['error'] = str(e)
            logger.warning(f"Attempt {attempt + 1} failed for {image_path}: {e}")
            
            # Wait before retry (exponential backoff)
            if attempt < max_retries:
                wait_time = TEST_CONFIG['base_delay'] * (2 ** attempt)
                time.sleep(min(wait_time, TEST_CONFIG['max_delay']))
    
    return result

def safe_api_call_with_rate_limit(func, *args, **kwargs):
    """Execute API call with rate limiting"""
    result = func(*args, **kwargs)
    
    # Rate limiting - wait between calls
    time.sleep(TEST_CONFIG['base_delay'])
    
    return result

print("‚úÖ Gemini API functions defined!")
print(f"üîß Max retries: {TEST_CONFIG['retry_attempts']}")
print(f"‚è±Ô∏è Base delay: {TEST_CONFIG['base_delay']}s")
print(f"üñºÔ∏è Max image size: {TEST_CONFIG['max_image_size']}")

In [None]:
# ==========================================
# MAIN TEST EXECUTION FUNCTION
# ==========================================

def run_initial_test():
    """Run the initial test with selected images and techniques"""
    
    print("üöÄ Starting Initial Test - English Prompting Techniques")
    print("=" * 60)
    
    # Initialize results storage
    results = []
    failed_attempts = []
    
    # Progress tracking
    total_combinations = len(test_combinations)
    successful_captions = 0
    failed_captions = 0
    
    print(f"üìä Total combinations to process: {total_combinations}")
    print(f"‚è±Ô∏è Estimated time: {total_combinations * TEST_CONFIG['base_delay'] / 60:.1f} minutes")
    print("\nüîÑ Starting caption generation...\n")
    
    # Process each combination
    for i, combination in enumerate(tqdm(test_combinations, desc="Generating captions"), 1):
        
        image_filename = combination['image_filename']
        mood = combination['mood']
        technique = combination['technique']
        
        # Construct image path
        image_path = os.path.join(TEST_CONFIG['folder_gambar'], image_filename)
        
        # Check if image exists
        if not os.path.exists(image_path):
            logger.error(f"Image not found: {image_path}")
            failed_attempts.append({
                'image_filename': image_filename,
                'mood': mood,
                'technique': technique,
                'error': 'Image file not found',
                'timestamp': datetime.now().isoformat()
            })
            failed_captions += 1
            continue
        
        # Get prompt for this combination
        prompt = get_english_test_prompt(mood, technique)
        
        # Progress info
        print(f"\nüì∏ [{i}/{total_combinations}] Processing: {image_filename}")
        print(f"üé≠ Mood: {mood.upper()} | üîß Technique: {technique.upper()}")
        
        # Generate caption with API call
        api_result = safe_api_call_with_rate_limit(
            generate_caption_with_gemini,
            image_path,
            prompt,
            TEST_CONFIG['retry_attempts']
        )
        
        # Process result
        timestamp = datetime.now().isoformat()
        
        if api_result['success']:
            # Successful caption generation
            result_entry = {
                'image_filename': image_filename,
                'mood': mood,
                'technique': technique,
                'caption': api_result['caption'],
                'success': True,
                'attempts': api_result['attempts'],
                'response_time': api_result['response_time'],
                'timestamp': timestamp
            }
            
            results.append(result_entry)
            successful_captions += 1
            
            # Show preview of generated caption
            caption_preview = api_result['caption'][:100] + "..." if len(api_result['caption']) > 100 else api_result['caption']
            print(f"‚úÖ SUCCESS: \"{caption_preview}\"")
            print(f"‚è±Ô∏è Response time: {api_result['response_time']:.2f}s | Attempts: {api_result['attempts']}")
            
        else:
            # Failed caption generation
            failed_entry = {
                'image_filename': image_filename,
                'mood': mood,
                'technique': technique,
                'error': api_result['error'],
                'attempts': api_result['attempts'],
                'timestamp': timestamp
            }
            
            failed_attempts.append(failed_entry)
            failed_captions += 1
            
            print(f"‚ùå FAILED: {api_result['error']}")
            print(f"üîÑ Attempts made: {api_result['attempts']}")
        
        # Progress summary every 10 items
        if i % 10 == 0 or i == total_combinations:
            success_rate = (successful_captions / i) * 100
            print(f"\nüìä Progress: {i}/{total_combinations} | Success: {successful_captions} | Failed: {failed_captions} | Rate: {success_rate:.1f}%")
    
    return results, failed_attempts

print("‚úÖ Main test execution function defined!")
print("üéØ Ready to run initial test with Gemini API")

In [None]:
# ==========================================
# RESULTS SAVING FUNCTIONS
# ==========================================

def save_initial_test_results_enhanced(results: List[dict], failed_attempts: List[dict]):
    """Save initial test results with enhanced CSV format for easy analysis"""
    
    print("\nüíæ Saving Initial Test Results...")
    
    if results:
        # Create DataFrame with results
        results_df = pd.DataFrame(results)
        
        # Reorder columns for better readability
        column_order = [
            'image_filename',
            'mood', 
            'technique',
            'caption',
            'success',
            'response_time',
            'attempts',
            'timestamp'
        ]
        
        # Reorder columns if they exist
        available_columns = [col for col in column_order if col in results_df.columns]
        results_df = results_df[available_columns]
        
        # Save to CSV with specific filename for initial test
        csv_filename = TEST_CONFIG['output_file']
        results_df.to_csv(csv_filename, index=False, encoding='utf-8')
        
        print(f"‚úÖ Initial Test Results saved to: {csv_filename}")
        print(f"üìä Total successful captions: {len(results)}")
        print(f"üìã Columns: {', '.join(results_df.columns)}")
        
        # Show sample of saved data
        print("\nüìã Sample of Saved Captions:")
        print("=" * 80)
        for i, row in results_df.head(3).iterrows():
            print(f"\nüñºÔ∏è  Image: {row['image_filename']}")
            print(f"üé≠ Mood: {row['mood'].upper()} | üîß Technique: {row['technique'].upper()}")
            caption_text = row['caption'][:100] + "..." if len(row['caption']) > 100 else row['caption']
            print(f"üí¨ Caption: \"{caption_text}\"")
            print(f"‚è±Ô∏è  Time: {row['response_time']:.2f}s")
        
        if len(results_df) > 3:
            print(f"\n... and {len(results_df) - 3} more captions saved to CSV")
        
        # Create summary by technique
        print("\nüìä Summary by Technique:")
        technique_summary = results_df.groupby('technique').size()
        for technique, count in technique_summary.items():
            print(f"   {technique}: {count} captions")
        
        # Create summary by mood
        print("\nüé≠ Summary by Mood:")
        mood_summary = results_df.groupby('mood').size()
        for mood, count in mood_summary.items():
            print(f"   {mood}: {count} captions")
            
    else:
        print("‚ö†Ô∏è No successful results to save to CSV")
    
    # Save comprehensive log
    log_data = {
        'test_info': {
            'test_type': 'Initial Test - English Prompting Techniques',
            'timestamp': datetime.now().isoformat(),
            'config': TEST_CONFIG,
            'total_combinations': len(test_combinations),
            'successful_captions': len(results),
            'failed_captions': len(failed_attempts),
            'success_rate': (len(results) / len(test_combinations)) * 100 if test_combinations else 0
        },
        'successful_results': results,
        'failed_attempts': failed_attempts,
        'selected_images': selected_images
    }
    
    with open(TEST_CONFIG['log_file'], 'w', encoding='utf-8') as f:
        json.dump(log_data, f, indent=2, ensure_ascii=False)
    
    print(f"\nüìù Complete log saved to: {TEST_CONFIG['log_file']}")
    
    # Final Summary
    total = len(test_combinations)
    success = len(results)
    failed = len(failed_attempts)
    success_rate = (success / total) * 100 if total > 0 else 0
    
    print("\nüìä INITIAL TEST SUMMARY")
    print("=" * 50)
    print(f"üéØ Total combinations: {total}")
    print(f"‚úÖ Successful: {success} ({success_rate:.1f}%)")
    print(f"‚ùå Failed: {failed} ({100-success_rate:.1f}%)")
    print(f"‚è±Ô∏è Test completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    
    if success > 0:
        avg_response_time = sum(r.get('response_time', 0) for r in results) / len(results)
        print(f"üìà Average response time: {avg_response_time:.2f}s")
    
    print(f"\nüìÅ Files Created:")
    print(f"   üìä CSV Results: {TEST_CONFIG['output_file']}")
    print(f"   üìù JSON Log: {TEST_CONFIG['log_file']}")
    print(f"   üîç Selection: {TEST_CONFIG['selection_file']}")
    
    return log_data

print("‚úÖ Enhanced CSV saving function defined!")
print(f"üìÅ CSV Output: {TEST_CONFIG['output_file']}")
print(f"üìù JSON Log: {TEST_CONFIG['log_file']}")
print(f"üîç Selection Log: {TEST_CONFIG['selection_file']}")

In [None]:
# ==========================================
# RUN THE INITIAL TEST
# ==========================================

print("üß™ INITIAL TEST - ENGLISH PROMPTING TECHNIQUES")
print("=" * 60)
print("üéØ This is a test run with limited data before the full research")
print(f"üìä Testing {len(test_combinations)} combinations")
print(f"üîß Using {len(TEST_CONFIG['prompting_techniques'])} techniques on {TEST_CONFIG['total_unique_images']} images")
print("\n‚ö†Ô∏è  IMPORTANT: This will make API calls to Google Gemini")
print(f"üí∞ Estimated API calls: {len(test_combinations)}")
print(f"‚è±Ô∏è Estimated time: {len(test_combinations) * TEST_CONFIG['base_delay'] / 60:.1f} minutes")

# Confirmation before starting
print("\n" + "="*60)
print("üöÄ Starting test in 3 seconds...")
print("   (Stop the cell if you want to cancel)")
print("="*60)

time.sleep(3)

try:
    # Run the test
    start_time = time.time()
    results, failed_attempts = run_initial_test()
    end_time = time.time()
    
    # Save results
    log_data = save_initial_test_results_enhanced(results, failed_attempts)
    
    # Final summary
    total_time = end_time - start_time
    print(f"\nüèÅ INITIAL TEST COMPLETED!")
    print(f"‚è±Ô∏è Total execution time: {total_time/60:.1f} minutes")
    print(f"üìä Success rate: {log_data['test_info']['success_rate']:.1f}%")
    
    if log_data['test_info']['success_rate'] >= 80:
        print("\nüéâ EXCELLENT! Test passed with high success rate")
        print("‚úÖ Ready to proceed with full research")
    elif log_data['test_info']['success_rate'] >= 60:
        print("\n‚ö†Ô∏è MODERATE: Test passed but with some issues")
        print("üîç Review failed attempts before full research")
    else:
        print("\n‚ùå LOW SUCCESS RATE: Issues detected")
        print("üõ†Ô∏è Fix issues before running full research")
    
    print(f"\nüìÅ Check output files:")
    print(f"   üìä Results: {TEST_CONFIG['output_file']}")
    print(f"   üìù Full log: {TEST_CONFIG['log_file']}")
    
except KeyboardInterrupt:
    print("\n‚èπÔ∏è Test interrupted by user")
    print("üíæ Saving partial results...")
    if 'results' in locals():
        save_initial_test_results_enhanced(results, failed_attempts if 'failed_attempts' in locals() else [])
        
except Exception as e:
    print(f"\nüí• Test failed with error: {e}")
    logger.error(f"Test execution failed: {e}")
    if 'results' in locals():
        print("üíæ Saving partial results...")
        save_initial_test_results_enhanced(results, failed_attempts if 'failed_attempts' in locals() else [])