In [1]:
import os
import json
import base64
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from PIL import Image
from tqdm import tqdm
import warnings
import time
warnings.filterwarnings('ignore')

# Anthropic API
import anthropic

# Local imports
from predict import predict_image
from prompt_templates import prompt_templates

print("✅ All imports successful!")


ModuleNotFoundError: No module named 'anthropic'

In [None]:
class ClaudeSonnetAnalyzer:
    def __init__(self, api_key=None):
        """
        Claude Sonnet 3 Vision API client
        
        Args:
            api_key: Anthropic API key
        """
        # API key setup
        key = api_key or os.getenv('ANTHROPIC_API_KEY')
        if not key:
            raise ValueError("❌ Anthropic API key required!")
        
        self.client = anthropic.Anthropic(api_key=key)
        self.model = "claude-3-sonnet-20240229"  # Claude 3 Sonnet with vision
        
        print("✅ Claude Sonnet 3 API initialized")
    
    def encode_image(self, image_path):
        """Görseli base64'e çevir"""
        try:
            with open(image_path, "rb") as image_file:
                return base64.b64encode(image_file.read()).decode('utf-8')
        except Exception as e:
            print(f"❌ Image encoding error: {e}")
            return None
    
    def get_image_media_type(self, image_path):
        """Görsel formatını belirle"""
        ext = os.path.splitext(image_path)[1].lower()
        if ext in ['.jpg', '.jpeg']:
            return "image/jpeg"
        elif ext == '.png':
            return "image/png"
        elif ext == '.gif':
            return "image/gif"
        elif ext == '.webp':
            return "image/webp"
        else:
            return "image/jpeg"  # Default
    
    def analyze_image(self, image_path, prompt, max_retries=3):
        """Claude Sonnet 3 ile görsel analizi"""
        base64_image = self.encode_image(image_path)
        if not base64_image:
            return {"error": "Failed to encode image"}
        
        media_type = self.get_image_media_type(image_path)
        
        for attempt in range(max_retries):
            try:
                message = self.client.messages.create(
                    model=self.model,
                    max_tokens=1500,
                    temperature=0.1,
                    messages=[
                        {
                            "role": "user",
                            "content": [
                                {
                                    "type": "image",
                                    "source": {
                                        "type": "base64",
                                        "media_type": media_type,
                                        "data": base64_image
                                    }
                                },
                                {
                                    "type": "text",
                                    "text": prompt
                                }
                            ]
                        }
                    ]
                )
                
                return {"response": message.content[0].text}
                
            except anthropic.RateLimitError:
                print(f"⏳ Rate limit hit, waiting... (attempt {attempt + 1})")
                time.sleep(10)
                continue
            except anthropic.APIError as e:
                print(f"❌ API Error: {e}")
                return {"error": f"API Error: {str(e)}"}
            except Exception as e:
                print(f"❌ Request error: {e}")
                if attempt < max_retries - 1:
                    time.sleep(5)
                    continue
                return {"error": f"Request failed: {str(e)}"}
        
        return {"error": "Max retries exceeded"}
    
    def parse_json_response(self, response):
        """Claude response'u JSON olarak parse et"""
        try:
            if "error" in response:
                return response
            
            # Claude response'undan text kısmını al
            text_output = response.get('response', '')
            
            # JSON kısmını extract et (curly braces arasında)
            start_idx = text_output.find('{')
            end_idx = text_output.rfind('}') + 1
            
            if start_idx != -1 and end_idx > start_idx:
                json_str = text_output[start_idx:end_idx]
                parsed = json.loads(json_str)
                return parsed
            else:
                return {
                    "error": "JSON not found in response", 
                    "raw_text": text_output[:500]  # İlk 500 char
                }
                
        except json.JSONDecodeError as e:
            return {
                "error": f"JSON parse error: {str(e)}", 
                "raw_response": str(response.get('response', ''))[:500]
            }
        except Exception as e:
            return {
                "error": f"Unexpected error: {str(e)}", 
                "raw_response": str(response)[:500]
            }

# API'yi initialize edin - API key'inizi buraya girin
API_KEY = "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"

try:
    claude_analyzer = ClaudeSonnetAnalyzer(api_key=API_KEY)
    print("🎯 Claude Sonnet 3 ready to analyze images!")
except Exception as e:
    print(f"❌ Setup failed: {e}")
    print("💡 Make sure you have: pip install anthropic")


In [None]:
# Dataset paths
VAL_DIR = r"C:\Users\egese\Desktop\dataset\val\SAP"
OUTPUT_DIR = "results/validation_analysis/"

# Output directory oluştur
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Validation set'teki görselleri listele
image_files = [f for f in os.listdir(VAL_DIR) 
               if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

print(f"📁 Validation directory: {VAL_DIR}")
print(f"📊 Total images found: {len(image_files)}")
print(f"💾 Output directory: {OUTPUT_DIR}")

# İlk birkaç dosya adını göster
print("\n📋 Sample files:")
for i, filename in enumerate(image_files[:5]):
    print(f"  {i+1}. {filename}")
if len(image_files) > 5:
    print(f"  ... and {len(image_files) - 5} more")


In [None]:
def test_single_image_analysis(image_filename):
    """Tek görsel ile test analizi"""
    image_path = os.path.join(VAL_DIR, image_filename)
    
    print(f"🔍 Testing with: {image_filename}")
    
    try:
        # 1. CNN Prediction
        print("1️⃣ CNN Analysis...")
        predicted_class, confidence = predict_image(image_path)
        print(f"   ✅ CNN Result: {predicted_class} ({confidence:.2%} confidence)")
        
        # 2. Claude Vision Analysis
        print("2️⃣ Claude Vision Analysis...")
        prompt = prompt_templates[predicted_class]
        claude_response = claude_analyzer.analyze_image(image_path, prompt)
        claude_analysis = claude_analyzer.parse_json_response(claude_response)
        
        print("   ✅ Claude Analysis completed")
        
        # 3. Results
        result = {
            "filename": image_filename,
            "cnn_prediction": predicted_class,
            "cnn_confidence": float(confidence),
            "claude_analysis": claude_analysis,
            "timestamp": datetime.now().isoformat(),
            "status": "success"
        }
        
        # Sonuçları güzel formatta yazdır
        print("\n📊 ANALYSIS RESULTS:")
        print(f"   Category: {predicted_class}")
        print(f"   CNN Confidence: {confidence:.2%}")
        
        if "error" not in claude_analysis:
            if "gesamt_score" in claude_analysis:
                print(f"   Claude Score: {claude_analysis['gesamt_score']}/10")
            print("   Claude Analysis: ✅ Success")
        else:
            print(f"   Claude Analysis: ❌ {claude_analysis['error']}")
        
        return result
        
    except Exception as e:
        print(f"❌ Test failed: {str(e)}")
        return None

# Test ile ilk görsel
if image_files:
    test_result = test_single_image_analysis(image_files[0])
    
    # Test success kontrolü - hem CNN hem Claude başarılı olmalı
    if test_result and test_result.get('status') == 'success':
        claude_analysis = test_result.get('claude_analysis', {})
        claude_success = 'error' not in claude_analysis
        
        if claude_success:
            print("\n🎉 Test successful! Both CNN and Claude working. Ready for batch processing.")
        else:
            print("\n⚠️ Test partially successful: CNN works but Claude failed.")
            print("🔧 You can still continue with CNN-only analysis if needed.")
    else:
        print("\n⚠️ Test failed. Please check configuration.")
else:
    print("❌ No images found in validation directory!")


In [None]:
def process_single_image(image_filename):
    """Tek görsel için tam analiz"""
    image_path = os.path.join(VAL_DIR, image_filename)
    
    try:
        # 1. CNN Prediction
        predicted_class, confidence = predict_image(image_path)
        
        # 2. Claude Vision Analysis
        prompt = prompt_templates[predicted_class]
        claude_response = claude_analyzer.analyze_image(image_path, prompt)
        claude_analysis = claude_analyzer.parse_json_response(claude_response)
        
        result = {
            "filename": image_filename,
            "image_path": image_path,
            "cnn_prediction": predicted_class,
            "cnn_confidence": float(confidence),
            "claude_analysis": claude_analysis,
            "timestamp": datetime.now().isoformat(),
            "status": "success"
        }
        
        return result
        
    except Exception as e:
        return {
            "filename": image_filename,
            "status": "error",
            "error": str(e),
            "timestamp": datetime.now().isoformat()
        }

def run_batch_analysis(limit=None, save_every=10):
    """Batch analiz - tüm validation set"""
    
    # Limit uygula
    files_to_process = image_files[:limit] if limit else image_files
    
    print(f"🔄 Starting batch analysis of {len(files_to_process)} images")
    print(f"💾 Saving intermediate results every {save_every} images")
    print(f"🤖 Using Claude Sonnet 3 Vision API")
    
    all_results = []
    
    # Progress bar ile işlem
    for i, image_file in enumerate(tqdm(files_to_process, desc="Processing images")):
        
        result = process_single_image(image_file)
        all_results.append(result)
        
        # Status yazdır
        if result.get('status') == 'success':
            cnn_pred = result['cnn_prediction']
            cnn_conf = result['cnn_confidence']
            claude_status = "✅" if 'error' not in result.get('claude_analysis', {}) else "❌"
            print(f"   {i+1:3d}. {image_file[:30]:30s} | {cnn_pred:15s} ({cnn_conf:.2%}) | Claude: {claude_status}")
        else:
            print(f"   {i+1:3d}. {image_file[:30]:30s} | ❌ ERROR")
        
        # Interim save
        if (i + 1) % save_every == 0:
            interim_filename = f"interim_results_{i+1}.json"
            interim_path = os.path.join(OUTPUT_DIR, interim_filename)
            
            with open(interim_path, 'w', encoding='utf-8') as f:
                json.dump(all_results, f, ensure_ascii=False, indent=2)
            
            # Progress info
            success_count = len([r for r in all_results if r.get('status') == 'success'])
            claude_success = len([r for r in all_results if r.get('status') == 'success' and 'error' not in r.get('claude_analysis', {})])
            print(f"\n📊 Progress: {i+1}/{len(files_to_process)} | Success: {success_count} | Claude Success: {claude_success} | Saved: {interim_filename}")
            
            # Rate limiting - Claude API için
            if i < len(files_to_process) - 1:  # Son değilse
                print("⏳ Pausing 2 seconds for rate limiting...")
                time.sleep(2)
    
    return all_results

# BATCH PROCESSING BAŞLAT
print("⚡ Choose your processing option:")
print("1. Test run (first 5 images)")
print("2. Medium run (first 20 images)")
print("3. Full run (all images)")

# Test run ile başla - Bu değeri değiştirerek full run yapabilirsiniz
PROCESSING_LIMIT = 10  # 10 görsel ile başlayalım

print(f"\n🚀 Starting processing with limit: {PROCESSING_LIMIT}")
batch_results = run_batch_analysis(limit=PROCESSING_LIMIT)


In [None]:
# Final sonuçları kaydet
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
final_filename = f"validation_claude_results_{timestamp}.json"
final_path = os.path.join(OUTPUT_DIR, final_filename)

with open(final_path, 'w', encoding='utf-8') as f:
    json.dump(batch_results, f, ensure_ascii=False, indent=2)

print(f"✅ Final results saved: {final_path}")
print(f"📁 File size: {os.path.getsize(final_path) / 1024:.1f} KB")

# Analiz fonksiyonu
def analyze_batch_results(results):
    """Batch sonuçlarının detaylı analizi"""
    
    successful_results = [r for r in results if r.get('status') == 'success']
    error_count = len(results) - len(successful_results)
    
    print("="*60)
    print("📊 VALIDATION SET ANALYSIS REPORT - CLAUDE SONNET 3")
    print("="*60)
    
    print(f"\n📈 Processing Summary:")
    print(f"   Total Images: {len(results)}")
    print(f"   Successful: {len(successful_results)}")
    print(f"   Errors: {error_count}")
    print(f"   Success Rate: {len(successful_results)/len(results)*100:.1f}%")
    
    if not successful_results:
        print("❌ No successful results to analyze!")
        return
    
    # Claude success rate
    claude_successful = [r for r in successful_results if 'error' not in r.get('claude_analysis', {})]
    claude_success_rate = len(claude_successful) / len(successful_results) * 100
    print(f"   Claude Success Rate: {claude_success_rate:.1f}%")
    
    # Kategori istatistikleri
    category_stats = {}
    all_confidences = []
    all_claude_scores = []
    
    for result in successful_results:
        category = result['cnn_prediction']
        confidence = result['cnn_confidence']
        all_confidences.append(confidence)
        
        if category not in category_stats:
            category_stats[category] = {
                'count': 0,
                'confidences': [],
                'claude_scores': [],
            }
        
        category_stats[category]['count'] += 1
        category_stats[category]['confidences'].append(confidence)
        
        # Claude analysis kontrol et
        claude_analysis = result.get('claude_analysis', {})
        if 'error' not in claude_analysis and 'gesamt_score' in claude_analysis:
            score = claude_analysis['gesamt_score']
            category_stats[category]['claude_scores'].append(score)
            all_claude_scores.append(score)
    
    # Kategori başına istatistikler
    print(f"\n🔍 Category Analysis:")
    print("-" * 60)
    
    for category, stats in sorted(category_stats.items()):
        avg_conf = sum(stats['confidences']) / len(stats['confidences'])
        avg_score = sum(stats['claude_scores']) / len(stats['claude_scores']) if stats['claude_scores'] else 0
        claude_success = len(stats['claude_scores']) / stats['count'] * 100
        
        print(f"\n📁 {category}:")
        print(f"   Images: {stats['count']}")
        print(f"   Avg CNN Confidence: {avg_conf:.2%}")
        print(f"   Avg Claude Score: {avg_score:.1f}/10")
        print(f"   Claude Success Rate: {claude_success:.1f}%")
    
    # Genel istatistikler
    print(f"\n📊 Overall Statistics:")
    print("-" * 30)
    if all_confidences:
        print(f"   Avg CNN Confidence: {sum(all_confidences)/len(all_confidences):.2%}")
        print(f"   Min CNN Confidence: {min(all_confidences):.2%}")
        print(f"   Max CNN Confidence: {max(all_confidences):.2%}")
    
    if all_claude_scores:
        print(f"   Avg Claude Score: {sum(all_claude_scores)/len(all_claude_scores):.1f}/10")
        print(f"   Min Claude Score: {min(all_claude_scores):.1f}/10")
        print(f"   Max Claude Score: {max(all_claude_scores):.1f}/10")
    
    return category_stats

# Analizi çalıştır
category_statistics = analyze_batch_results(batch_results)


In [None]:
def identify_reference_candidates(results, top_n=3):
    """Her kategoriden en iyi örnekleri referans aday olarak seç"""
    
    successful_results = [r for r in results if r.get('status') == 'success']
    
    if not successful_results:
        print("❌ No successful results for reference selection!")
        return {}
    
    reference_candidates = {}
    
    # Kategori başına grup
    by_category = {}
    for result in successful_results:
        category = result['cnn_prediction']
        if category not in by_category:
            by_category[category] = []
        by_category[category].append(result)
    
    print("🎯 REFERENCE SOLUTION CANDIDATES - CLAUDE ANALYZED")
    print("="*60)
    
    for category, items in by_category.items():
        print(f"\n📁 {category} ({len(items)} images):")
        
        # Kombinasyon skoru: CNN confidence + Claude score
        scored_items = []
        for item in items:
            cnn_conf = item['cnn_confidence']
            claude_analysis = item.get('claude_analysis', {})
            
            if 'error' not in claude_analysis and 'gesamt_score' in claude_analysis:
                claude_score = claude_analysis['gesamt_score'] / 10  # 0-1 aralığına normalize
                combined_score = (cnn_conf * 0.3) + (claude_score * 0.7)  # Claude'a daha çok ağırlık
            else:
                combined_score = cnn_conf * 0.5  # Sadece CNN score, penalty
            
            scored_items.append((item, combined_score))
        
        # En yüksek skorlular
        top_items = sorted(scored_items, key=lambda x: x[1], reverse=True)[:top_n]
        reference_candidates[category] = []
        
        for i, (item, score) in enumerate(top_items, 1):
            reference_candidates[category].append(item)
            
            claude_info = ""
            claude_analysis = item.get('claude_analysis', {})
            if 'gesamt_score' in claude_analysis:
                claude_info = f", Claude: {claude_analysis['gesamt_score']:.1f}/10"
            elif 'error' in claude_analysis:
                claude_info = f", Claude: ERROR"
            
            print(f"   {i}. {item['filename']}")
            print(f"      Combined Score: {score:.3f} (CNN: {item['cnn_confidence']:.2%}{claude_info})")
    
    return reference_candidates

# Reference candidates seç
reference_candidates = identify_reference_candidates(batch_results, top_n=3)

# Sonuçları kaydet
reference_filename = f"reference_candidates_claude_{timestamp}.json"
reference_path = os.path.join(OUTPUT_DIR, reference_filename)

with open(reference_path, 'w', encoding='utf-8') as f:
    json.dump(reference_candidates, f, ensure_ascii=False, indent=2)

print(f"\n✅ Reference candidates saved: {reference_path}")

# Summary raporu
summary = {
    "analysis_timestamp": timestamp,
    "model_used": "Claude Sonnet 3 + EfficientNet-B0",
    "total_images_processed": len(batch_results),
    "successful_analyses": len([r for r in batch_results if r.get('status') == 'success']),
    "claude_successful_analyses": len([r for r in batch_results if r.get('status') == 'success' and 'error' not in r.get('claude_analysis', {})]),
    "categories_found": list(reference_candidates.keys()),
    "reference_candidates_per_category": {k: len(v) for k, v in reference_candidates.items()},
    "files_generated": {
        "full_results": final_filename,
        "reference_candidates": reference_filename
    }
}

summary_filename = f"analysis_summary_claude_{timestamp}.json"
summary_path = os.path.join(OUTPUT_DIR, summary_filename)

with open(summary_path, 'w', encoding='utf-8') as f:
    json.dump(summary, f, ensure_ascii=False, indent=2)

print(f"📋 Summary saved: {summary_path}")


In [None]:
print("\n" + "="*60)
print("🎉 VALIDATION SET ANALYSIS COMPLETED - CLAUDE SONNET 3!")
print("="*60)

# Detaylı sonuç özeti
total_processed = len(batch_results)
successful = len([r for r in batch_results if r.get('status') == 'success'])
claude_successful = len([r for r in batch_results if r.get('status') == 'success' and 'error' not in r.get('claude_analysis', {})])

print(f"\n📊 Analysis Summary:")
print(f"   ✅ Total Processed: {total_processed} images")
print(f"   📊 CNN Successful: {successful} ({successful/total_processed*100:.1f}%)")
print(f"   🤖 Claude Successful: {claude_successful} ({claude_successful/total_processed*100:.1f}%)")
print(f"   📁 Categories found: {len(reference_candidates)} categories")
print(f"   🎯 Reference candidates: {sum(len(v) for v in reference_candidates.values())} total")

print(f"\n💾 Files Generated:")
print(f"   📄 {final_filename} - Full analysis results")
print(f"   🎯 {reference_filename} - Reference solution candidates")
print(f"   📋 {summary_filename} - Analysis summary")

print(f"\n🚀 Next Steps for Aufgabe 4:")
print(f"   1️⃣ Review reference candidates manually")
print(f"   2️⃣ Select final Musterlösung for each category")
print(f"   3️⃣ Implement comparison engine using comparison_prompt")
print(f"   4️⃣ Test student vs reference comparison with Claude")

print(f"\n💡 Advantages of Claude Sonnet 3:")
print(f"   • Excellent vision capabilities")
print(f"   • Consistent JSON output parsing")
print(f"   • High-quality analysis of SAP screenshots")
print(f"   • Perfect for comparison tasks")

print("\n🔧 Ready for Aufgabe 4 - Comparison Engine with Claude!")

# Quick stats per category
if reference_candidates:
    print(f"\n📊 Quick Category Overview:")
    for category, candidates in reference_candidates.items():
        print(f"   📁 {category}: {len(candidates)} reference candidates")


In [None]:
# 🔧 Anthropic Paketi Kurulumu
# Bu hücreyi sadece ilk çalıştırmada kullanın

import subprocess
import sys

def install_package(package):
    """Paketi jupyter notebook içinden kur"""
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
        print(f"✅ {package} successfully installed!")
        return True
    except subprocess.CalledProcessError as e:
        print(f"❌ Failed to install {package}: {e}")
        return False

# Anthropic paketi kurulumu
print("🔄 Checking anthropic package...")
try:
    import anthropic
    print("✅ Anthropic already installed!")
except ImportError:
    print("📦 Installing anthropic package...")
    success = install_package("anthropic")
    if success:
        print("🔄 Restarting kernel may be required...")
    else:
        print("❌ Manual installation required. Please run in terminal:")
        print("   py -m pip install anthropic")
        print("   OR python -m pip install anthropic")
        print("   OR pip install anthropic")

🔄 Checking anthropic package...
📦 Installing anthropic package...
