In [1]:
import os
import json
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

print("✅ All imports successful!")


✅ All imports successful!


In [2]:
# Data paths
RESULTS_DIR = "results/validation_analysis/"
VAL_DIR = r"C:\Users\egese\Desktop\dataset\val\SAP"
REFERENCE_OUTPUT = "results/reference_solutions/"

# Output directory oluştur
os.makedirs(REFERENCE_OUTPUT, exist_ok=True)

# Available result files listele
if os.path.exists(RESULTS_DIR):
    result_files = [f for f in os.listdir(RESULTS_DIR) if f.startswith('validation_claude_results_')]
    candidate_files = [f for f in os.listdir(RESULTS_DIR) if f.startswith('reference_candidates_')]
    
    print(f"📁 Results directory: {RESULTS_DIR}")
    print(f"📄 Available result files: {len(result_files)}")
    print(f"🎯 Available candidate files: {len(candidate_files)}")
    
    if result_files:
        latest_results = sorted(result_files)[-1]
        print(f"\n📊 Latest results: {latest_results}")
    
    if candidate_files:
        latest_candidates = sorted(candidate_files)[-1]
        print(f"🎯 Latest candidates: {latest_candidates}")
else:
    print("❌ Results directory not found! Run validation analysis first.")


📁 Results directory: results/validation_analysis/
📄 Available result files: 1
🎯 Available candidate files: 1

📊 Latest results: validation_claude_results_20250619_191555.json
🎯 Latest candidates: reference_candidates_claude_20250619_191555.json


In [3]:
# CELL 4: Load validation results
validation_results = []
reference_candidates = {}

if 'latest_results' in locals():
    results_path = os.path.join(RESULTS_DIR, latest_results)
    print(f"📄 Loading: {results_path}")
    
    with open(results_path, 'r', encoding='utf-8') as f:
        validation_results = json.load(f)
    
    print(f"✅ Loaded validation results: {len(validation_results)} images")
    
    # Load candidates if available
    if 'latest_candidates' in locals():
        candidates_path = os.path.join(RESULTS_DIR, latest_candidates)
        with open(candidates_path, 'r', encoding='utf-8') as f:
            reference_candidates = json.load(f)
        
        print(f"🎯 Loaded reference candidates: {len(reference_candidates)} categories")
    else:
        print("⚠️ No candidate file found, will generate from results")
else:
    print("❌ No validation results found!")
    print("💡 Please run validation_claude_analysis.ipynb first!")

📄 Loading: results/validation_analysis/validation_claude_results_20250619_191555.json
✅ Loaded validation results: 10 images
🎯 Loaded reference candidates: 3 categories


In [4]:
# CELL 5: Analyze validation results and create selection tool
def analyze_validation_results(results):
    """Validation results'ın detaylı analizi"""
    
    if not results:
        print("❌ No validation results to analyze!")
        return {}
    
    successful_results = [r for r in results if r.get('status') == 'success']
    
    print(f"📊 ANALYZING {len(results)} VALIDATION RESULTS")
    print(f"✅ Successful: {len(successful_results)}")
    print(f"❌ Failed: {len(results) - len(successful_results)}")
    
    # Category statistics
    category_stats = {}
    
    for result in successful_results:
        category = result['cnn_prediction']
        if category not in category_stats:
            category_stats[category] = {
                'count': 0,
                'items': []
            }
        
        category_stats[category]['count'] += 1
        
        # CNN confidence
        cnn_conf = result['cnn_confidence']
        
        # Claude analysis
        claude_analysis = result.get('claude_analysis', {})
        claude_score = 0
        claude_success = False
        
        if 'error' not in claude_analysis and 'gesamt_score' in claude_analysis:
            claude_score = claude_analysis['gesamt_score']
            claude_success = True
        
        # Combined score for ranking
        combined_score = (cnn_conf * 0.3) + (claude_score/10 * 0.7) if claude_success else cnn_conf * 0.5
        
        category_stats[category]['items'].append({
            'filename': result['filename'],
            'cnn_confidence': cnn_conf,
            'claude_score': claude_score,
            'claude_success': claude_success,
            'combined_score': combined_score,
            'result': result
        })
    
    # Sort items by combined score
    for category, stats in category_stats.items():
        stats['items'] = sorted(stats['items'], key=lambda x: x['combined_score'], reverse=True)
        
        # Calculate averages
        if stats['count'] > 0:
            stats['avg_cnn_confidence'] = sum(item['cnn_confidence'] for item in stats['items']) / stats['count']
            
            claude_successful = [item for item in stats['items'] if item['claude_success']]
            stats['claude_success_rate'] = len(claude_successful) / stats['count'] * 100
            
            if claude_successful:
                stats['avg_claude_score'] = sum(item['claude_score'] for item in claude_successful) / len(claude_successful)
            else:
                stats['avg_claude_score'] = 0
    
    return category_stats

# Analyze results
category_analysis = analyze_validation_results(validation_results)

if category_analysis:
    print("\n📊 CATEGORY ANALYSIS FOR REFERENCE SELECTION")
    print("=" * 60)
    
    for category, stats in sorted(category_analysis.items()):
        print(f"\n📁 {category}:")
        print(f"   Total Images: {stats['count']}")
        print(f"   Avg CNN Confidence: {stats['avg_cnn_confidence']:.2%}")
        print(f"   Avg Claude Score: {stats['avg_claude_score']:.1f}/10")
        print(f"   Claude Success Rate: {stats['claude_success_rate']:.1f}%")
        print(f"   Top candidates available: {min(5, len(stats['items']))}")

📊 ANALYZING 10 VALIDATION RESULTS
✅ Successful: 10
❌ Failed: 0

📊 CATEGORY ANALYSIS FOR REFERENCE SELECTION

📁 Data Source:
   Total Images: 2
   Avg CNN Confidence: 71.20%
   Avg Claude Score: 0.0/10
   Claude Success Rate: 0.0%
   Top candidates available: 2

📁 Excel-Tabelle:
   Total Images: 2
   Avg CNN Confidence: 99.23%
   Avg Claude Score: 8.0/10
   Claude Success Rate: 100.0%
   Top candidates available: 2

📁 Info-Object:
   Total Images: 6
   Avg CNN Confidence: 67.34%
   Avg Claude Score: 7.0/10
   Claude Success Rate: 33.3%
   Top candidates available: 5


In [5]:
# CELL 6: Manual selection commands
if category_analysis:
    print("🎯 MANUAL REFERENCE SELECTION")
    print("=" * 40)
    
    categories = list(category_analysis.keys())
    print("📋 Available categories:")
    for i, cat in enumerate(categories, 1):
        count = category_analysis[cat]['count']
        print(f"   {i}. {cat} ({count} images)")
    
    print("\n🔍 TO VIEW CANDIDATES FOR A CATEGORY:")
    if categories:
        example_cat = categories[0]
        print(f"# View top 3 candidates for {example_cat}")
        print(f"if '{example_cat}' in category_analysis:")
        print(f"    stats = category_analysis['{example_cat}']")
        print(f"    candidates = stats['items'][:3]")
        print(f"    for i, candidate in enumerate(candidates, 1):")
        print(f"        print(f'{{i}}. {{candidate[\"filename\"]}} - CNN: {{candidate[\"cnn_confidence\"]:.2%}} - Claude: {{candidate[\"claude_score\"]:.1f}}/10')")
    
    print("\n💡 TO SELECT A REFERENCE:")
    print("# Add your manual selections here:")
    print("selected_references = {")
    for cat in categories:
        print(f"    # '{cat}': 'your_chosen_filename.jpg',")
    print("}")
else:
    print("❌ No categories available for selection!")

🎯 MANUAL REFERENCE SELECTION
📋 Available categories:
   1. Info-Object (6 images)
   2. Excel-Tabelle (2 images)
   3. Data Source (2 images)

🔍 TO VIEW CANDIDATES FOR A CATEGORY:
# View top 3 candidates for Info-Object
if 'Info-Object' in category_analysis:
    stats = category_analysis['Info-Object']
    candidates = stats['items'][:3]
    for i, candidate in enumerate(candidates, 1):
        print(f'{i}. {candidate["filename"]} - CNN: {candidate["cnn_confidence"]:.2%} - Claude: {candidate["claude_score"]:.1f}/10')

💡 TO SELECT A REFERENCE:
# Add your manual selections here:
selected_references = {
    # 'Info-Object': 'your_chosen_filename.jpg',
    # 'Excel-Tabelle': 'your_chosen_filename.jpg',
    # 'Data Source': 'your_chosen_filename.jpg',
}


In [6]:
# CELL 7: Quick test - show first category candidates
if category_analysis:
    first_category = list(category_analysis.keys())[0]
    print(f"🔍 QUICK VIEW: {first_category}")
    print("=" * 40)
    
    stats = category_analysis[first_category]
    candidates = stats['items'][:3]
    
    for i, candidate in enumerate(candidates, 1):
        print(f"\n{i}. {candidate['filename']}")
        print(f"   📊 CNN Confidence: {candidate['cnn_confidence']:.2%}")
        if candidate['claude_success']:
            print(f"   🤖 Claude Score: {candidate['claude_score']:.1f}/10")
        else:
            print(f"   🤖 Claude Score: ❌ Failed")
        print(f"   ⭐ Combined Score: {candidate['combined_score']:.3f}")
    
    print(f"\n💡 To select the best one, you can manually choose:")
    print(f"# Example: selected_references = {{'{first_category}': '{candidates[0]['filename']}'}}")

🔍 QUICK VIEW: Info-Object

1. 005a2c943c5a4344bc058a3680a62c43.png
   📊 CNN Confidence: 96.58%
   🤖 Claude Score: 8.0/10
   ⭐ Combined Score: 0.850

2. 0056d6ebd8c045ea8bac5910b8aee061.jpeg
   📊 CNN Confidence: 89.97%
   🤖 Claude Score: 6.0/10
   ⭐ Combined Score: 0.690

3. 001e53127c8049dc94ead93b884b92fa.jpeg
   📊 CNN Confidence: 91.61%
   🤖 Claude Score: ❌ Failed
   ⭐ Combined Score: 0.458

💡 To select the best one, you can manually choose:
# Example: selected_references = {'Info-Object': '005a2c943c5a4344bc058a3680a62c43.png'}


In [7]:
def automatic_reference_selection(validation_results, min_score_threshold=0.4):
    """
    Tamamen otomatik reference selection
    En yüksek combined score'a göre her kategoriden en iyi adayları seçer
    """
    
    if not validation_results:
        print("❌ No validation results!")
        return {}
    
    print("🤖 AUTOMATIC REFERENCE SELECTION ENGINE")
    print("=" * 50)
    
    # Successful results only
    successful_results = [r for r in validation_results if r.get('status') == 'success']
    print(f"📊 Processing {len(successful_results)} successful results")
    
    # Group by category
    categories = {}
    
    for result in successful_results:
        category = result['cnn_prediction']
        if category not in categories:
            categories[category] = []
        
        # Calculate scores
        cnn_conf = result['cnn_confidence']
        claude_analysis = result.get('claude_analysis', {})
        
        # Claude score
        claude_score = 0
        claude_success = False
        if 'error' not in claude_analysis and 'gesamt_score' in claude_analysis:
            claude_score = claude_analysis['gesamt_score']
            claude_success = True
        
        # Combined score calculation
        if claude_success:
            combined_score = (cnn_conf * 0.3) + (claude_score/10 * 0.7)  # Claude weighted more
        else:
            combined_score = cnn_conf * 0.5  # Penalty for Claude failure
        
        candidate = {
            'filename': result['filename'],
            'cnn_confidence': cnn_conf,
            'claude_score': claude_score,
            'claude_success': claude_success,
            'combined_score': combined_score,
            'full_result': result
        }
        
        categories[category].append(candidate)
    
    # Select best reference for each category
    selected_references = {}
    
    print(f"\n🎯 AUTOMATIC SELECTION RESULTS:")
    print("-" * 50)
    
    for category, candidates in categories.items():
        # Sort by combined score (highest first)
        sorted_candidates = sorted(candidates, key=lambda x: x['combined_score'], reverse=True)
        
        # Get best candidate
        best_candidate = sorted_candidates[0]
        
        # Quality check
        if best_candidate['combined_score'] >= min_score_threshold:
            selected_references[category] = {
                'filename': best_candidate['filename'],
                'selection_reason': f"Highest combined score: {best_candidate['combined_score']:.3f}",
                'cnn_confidence': best_candidate['cnn_confidence'],
                'claude_score': best_candidate['claude_score'],
                'claude_success': best_candidate['claude_success'],
                'combined_score': best_candidate['combined_score'],
                'selected_at': datetime.now().isoformat(),
                'selection_method': 'automatic_best_score'
            }
            
            print(f"✅ {category}:")
            print(f"   📄 File: {best_candidate['filename']}")
            print(f"   📊 CNN: {best_candidate['cnn_confidence']:.2%}")
            if best_candidate['claude_success']:
                print(f"   🤖 Claude: {best_candidate['claude_score']:.1f}/10")
            else:
                print(f"   🤖 Claude: ❌ Failed")
            print(f"   ⭐ Score: {best_candidate['combined_score']:.3f}")
            print()
        else:
            print(f"⚠️ {category}: Best score {best_candidate['combined_score']:.3f} below threshold {min_score_threshold}")
    
    return selected_references

# Execute automatic selection
automatic_references = automatic_reference_selection(validation_results)

print(f"🎉 AUTOMATIC SELECTION COMPLETE!")
print(f"📁 Selected references for {len(automatic_references)} categories")

🤖 AUTOMATIC REFERENCE SELECTION ENGINE
📊 Processing 10 successful results

🎯 AUTOMATIC SELECTION RESULTS:
--------------------------------------------------
✅ Info-Object:
   📄 File: 005a2c943c5a4344bc058a3680a62c43.png
   📊 CNN: 96.58%
   🤖 Claude: 8.0/10
   ⭐ Score: 0.850

✅ Excel-Tabelle:
   📄 File: 0042c7e49bb143ca9b50b3d6336c003b.png
   📊 CNN: 99.86%
   🤖 Claude: 8.0/10
   ⭐ Score: 0.860

✅ Data Source:
   📄 File: 008e8269e68341848a3680ebc4f11910.jpeg
   📊 CNN: 99.82%
   🤖 Claude: ❌ Failed
   ⭐ Score: 0.499

🎉 AUTOMATIC SELECTION COMPLETE!
📁 Selected references for 3 categories


In [8]:
# Save automatic reference selections
if automatic_references:
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_filename = f"reference_solutions_automatic_{timestamp}.json"
    output_path = os.path.join(REFERENCE_OUTPUT, output_filename)
    
    # Prepare final reference data
    reference_data = {
        'created_at': timestamp,
        'selection_method': 'automatic_best_score',
        'total_categories': len(automatic_references),
        'total_references': len(automatic_references),
        'selection_criteria': {
            'cnn_weight': 0.3,
            'claude_weight': 0.7,
            'min_threshold': 0.4,
            'penalty_for_claude_failure': 0.5
        },
        'references': {
            category: [selection] for category, selection in automatic_references.items()
        }
    }
    
    # Save JSON file
    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(reference_data, f, ensure_ascii=False, indent=2)
    
    print(f"✅ AUTOMATIC REFERENCES SAVED!")
    print(f"📄 File: {output_filename}")
    print(f"📊 Categories: {len(automatic_references)}")
    
    # Create summary file
    summary_filename = f"automatic_selection_summary_{timestamp}.txt"
    summary_path = os.path.join(REFERENCE_OUTPUT, summary_filename)
    
    with open(summary_path, 'w', encoding='utf-8') as f:
        f.write("AUTOMATIC REFERENCE SELECTION SUMMARY\n")
        f.write("=" * 40 + "\n\n")
        f.write(f"Selection Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
        f.write(f"Selection Method: Automatic (Best Combined Score)\n")
        f.write(f"Total Categories: {len(automatic_references)}\n\n")
        
        for category, selection in automatic_references.items():
            f.write(f"📁 {category}:\n")
            f.write(f"   ✅ {selection['filename']}\n")
            f.write(f"   📊 CNN: {selection['cnn_confidence']:.2%}\n")
            if selection['claude_success']:
                f.write(f"   🤖 Claude: {selection['claude_score']:.1f}/10\n")
            else:
                f.write(f"   🤖 Claude: ❌ Failed\n")
            f.write(f"   ⭐ Combined Score: {selection['combined_score']:.3f}\n")
            f.write(f"   📝 Reason: {selection['selection_reason']}\n\n")
    
    print(f"📋 Summary: {summary_filename}")
    
    # Show final statistics
    total_candidates = sum(len(category_analysis[cat]['items']) for cat in automatic_references.keys())
    avg_score = sum(ref['combined_score'] for ref in automatic_references.values()) / len(automatic_references)
    claude_success_rate = sum(1 for ref in automatic_references.values() if ref['claude_success']) / len(automatic_references) * 100
    
    print(f"\n📈 SELECTION STATISTICS:")
    print(f"   🎯 Categories processed: {len(automatic_references)}")
    print(f"   📊 Total candidates considered: {total_candidates}")
    print(f"   ⭐ Average combined score: {avg_score:.3f}")
    print(f"   🤖 Claude success rate: {claude_success_rate:.1f}%")
    
    print(f"\n🚀 NEXT STEPS:")
    print(f"   1️⃣ References are ready for comparison_engine.ipynb")
    print(f"   2️⃣ Run aufgabe4_final_evaluation.ipynb for testing")
    print(f"   3️⃣ Reference file: {output_filename}")
    
else:
    print("❌ No references were automatically selected!")
    print("💡 Check validation results and try lowering the threshold")

✅ AUTOMATIC REFERENCES SAVED!
📄 File: reference_solutions_automatic_20250619_193609.json
📊 Categories: 3
📋 Summary: automatic_selection_summary_20250619_193609.txt

📈 SELECTION STATISTICS:
   🎯 Categories processed: 3
   📊 Total candidates considered: 10
   ⭐ Average combined score: 0.736
   🤖 Claude success rate: 66.7%

🚀 NEXT STEPS:
   1️⃣ References are ready for comparison_engine.ipynb
   2️⃣ Run aufgabe4_final_evaluation.ipynb for testing
   3️⃣ Reference file: reference_solutions_automatic_20250619_193609.json


In [9]:
# Verify automatic selections
if automatic_references:
    print("🔍 QUALITY VERIFICATION OF AUTOMATIC SELECTIONS")
    print("=" * 50)
    
    for category, selection in automatic_references.items():
        print(f"\n📁 {category}:")
        print(f"   📄 Selected: {selection['filename']}")
        
        # Check if file exists
        file_path = os.path.join(VAL_DIR, selection['filename'])
        if os.path.exists(file_path):
            file_size = os.path.getsize(file_path) / 1024
            print(f"   ✅ File exists ({file_size:.1f} KB)")
        else:
            print(f"   ❌ File not found!")
        
        # Quality scores
        print(f"   📊 Quality scores:")
        print(f"      CNN Confidence: {selection['cnn_confidence']:.2%}")
        if selection['claude_success']:
            print(f"      Claude Score: {selection['claude_score']:.1f}/10")
            print(f"      Combined Score: {selection['combined_score']:.3f}")
        else:
            print(f"      Claude Score: ❌ Failed (using CNN only)")
            print(f"      Combined Score: {selection['combined_score']:.3f} (penalized)")
    
    print(f"\n🎉 AUTOMATIC REFERENCE SELECTION COMPLETED!")
    print(f"✅ Ready for Aufgabe 4 comparison engine!")
else:
    print("❌ No automatic selections available")

🔍 QUALITY VERIFICATION OF AUTOMATIC SELECTIONS

📁 Info-Object:
   📄 Selected: 005a2c943c5a4344bc058a3680a62c43.png
   ✅ File exists (34.3 KB)
   📊 Quality scores:
      CNN Confidence: 96.58%
      Claude Score: 8.0/10
      Combined Score: 0.850

📁 Excel-Tabelle:
   📄 Selected: 0042c7e49bb143ca9b50b3d6336c003b.png
   ✅ File exists (254.1 KB)
   📊 Quality scores:
      CNN Confidence: 99.86%
      Claude Score: 8.0/10
      Combined Score: 0.860

📁 Data Source:
   📄 Selected: 008e8269e68341848a3680ebc4f11910.jpeg
   ✅ File exists (88.9 KB)
   📊 Quality scores:
      CNN Confidence: 99.82%
      Claude Score: ❌ Failed (using CNN only)
      Combined Score: 0.499 (penalized)

🎉 AUTOMATIC REFERENCE SELECTION COMPLETED!
✅ Ready for Aufgabe 4 comparison engine!
