# 📊 Validation Set Batch Analysis - Aufgabe 4

Bu notebook ile `dataset/val/SAP/` klasöründeki unlabeled görselleri:
1. **EfficientNet-B0** ile classify edeceğiz
2. **Qwen2.5 VL 32B Instruct** ile detaylı analiz yapacağız
3. **Student submissions** için category statistics çıkaracağız
4. **Reference solution** adayları belirleyeceğiz


In [1]:
import os
import json
import base64
import requests
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from PIL import Image
from tqdm import tqdm
import warnings
import time
warnings.filterwarnings('ignore')

# Local imports
from predict import predict_image
from prompt_templates import prompt_templates

print("✅ All imports successful!")


✅ All imports successful!


In [6]:
class QwenVLAnalyzer:
    def __init__(self, hf_token=None):
        """
        Qwen2.5 VL 32B Instruct API client
        
        Args:
            hf_token: Hugging Face API token (eğer None ise environment'dan alır)
        """
        self.api_url = "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-VL-32B-Instruct"
        
        # Token setup
        token = 'hf_token' or os.getenv('HF_TOKEN')
        if not token:
            raise ValueError("❌ Hugging Face token required! Set HF_TOKEN environment variable or pass hf_token parameter")
        
        self.headers = {
            "Authorization": f"Bearer {token}",
            "Content-Type": "application/json"
        }
        
        print("✅ Qwen2.5 VL API initialized")
    
    def encode_image(self, image_path):
        """Görseli base64'e çevir"""
        try:
            with open(image_path, "rb") as image_file:
                return base64.b64encode(image_file.read()).decode('utf-8')
        except Exception as e:
            print(f"❌ Image encoding error: {e}")
            return None
    
    def analyze_image(self, image_path, prompt, max_retries=3):
        """Qwen2.5 VL ile görsel analizi"""
        base64_image = self.encode_image(image_path)
        if not base64_image:
            return {"error": "Failed to encode image"}
        
        payload = {
            "inputs": {
                "image": base64_image,
                "text": prompt
            },
            "parameters": {
                "max_new_tokens": 1000,
                "temperature": 0.1,  # Consistent results için düşük
                "do_sample": False
            }
        }
        
        for attempt in range(max_retries):
            try:
                response = requests.post(self.api_url, headers=self.headers, json=payload, timeout=30)
                
                if response.status_code == 200:
                    return response.json()
                elif response.status_code == 503:  # Model loading
                    print(f"⏳ Model loading, waiting... (attempt {attempt + 1})")
                    time.sleep(20)
                    continue
                else:
                    print(f"❌ API Error: {response.status_code} - {response.text}")
                    return {"error": f"API Error {response.status_code}: {response.text}"}
                    
            except Exception as e:
                print(f"❌ Request error: {e}")
                if attempt < max_retries - 1:
                    time.sleep(5)
                    continue
                return {"error": f"Request failed: {str(e)}"}
        
        return {"error": "Max retries exceeded"}
    
    def parse_json_response(self, response):
        """Qwen response'u JSON olarak parse et"""
        try:
            if "error" in response:
                return response
            
            # Qwen response'undan text kısmını al
            text_output = ""
            if isinstance(response, list) and len(response) > 0:
                text_output = response[0].get('generated_text', '')
            elif isinstance(response, dict):
                text_output = response.get('generated_text', '')
            
            # JSON kısmını extract et (curly braces arasında)
            start_idx = text_output.find('{')
            end_idx = text_output.rfind('}') + 1
            
            if start_idx != -1 and end_idx > start_idx:
                json_str = text_output[start_idx:end_idx]
                return json.loads(json_str)
            else:
                return {
                    "error": "JSON not found in response", 
                    "raw_text": text_output[:500]  # İlk 500 char
                }
                
        except json.JSONDecodeError as e:
            return {
                "error": f"JSON parse error: {str(e)}", 
                "raw_response": str(response)[:500]
            }
        except Exception as e:
            return {
                "error": f"Unexpected error: {str(e)}", 
                "raw_response": str(response)[:500]
            }

# Test the API setup
try:
    # HF token'ı buraya girin veya environment variable olarak set edin
    os.environ['HF_TOKEN'] = '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'
    qwen_analyzer = QwenVLAnalyzer()
    print("🎯 Ready to analyze images!")
except Exception as e:
    print(f"❌ Setup failed: {e}")
    print("💡 Set your HF token: os.environ['HF_TOKEN'] = 'your_token_here'")


✅ Qwen2.5 VL API initialized
🎯 Ready to analyze images!


In [7]:
# Dataset paths
VAL_DIR = r"C:\Users\egese\Desktop\dataset\val\SAP"
OUTPUT_DIR = "results/validation_analysis/"

# Output directory oluştur
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Validation set'teki görselleri listele
image_files = [f for f in os.listdir(VAL_DIR) 
               if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

print(f"📁 Validation directory: {VAL_DIR}")
print(f"📊 Total images found: {len(image_files)}")
print(f"💾 Output directory: {OUTPUT_DIR}")

# İlk birkaç dosya adını göster
print("\n📋 Sample files:")
for i, filename in enumerate(image_files[:5]):
    print(f"  {i+1}. {filename}")
if len(image_files) > 5:
    print(f"  ... and {len(image_files) - 5} more")


📁 Validation directory: C:\Users\egese\Desktop\dataset\val\SAP
📊 Total images found: 3135
💾 Output directory: results/validation_analysis/

📋 Sample files:
  1. 001e53127c8049dc94ead93b884b92fa.jpeg
  2. 001e80166ed44602808dc75c92d81d3e.jpeg
  3. 002f7cca46e7451797539bfc9520b39f.png
  4. 0042c7e49bb143ca9b50b3d6336c003b.png
  5. 0048fa9d132c47cf8e7b7e7a2397c340.png
  ... and 3130 more


In [9]:
def test_single_image_analysis(image_filename):
    """Tek görsel ile test analizi"""
    image_path = os.path.join(VAL_DIR, image_filename)
    
    print(f"🔍 Testing with: {image_filename}")
    
    try:
        # 1. CNN Prediction
        print("1️⃣ CNN Analysis...")
        predicted_class, confidence = predict_image(image_path)
        print(f"   ✅ CNN Result: {predicted_class} ({confidence:.2%} confidence)")
        
        # 2. LLM Analysis
        print("2️⃣ LLM Analysis...")
        prompt = prompt_templates[predicted_class]
        llm_response = qwen_analyzer.analyze_image(image_path, prompt)
        llm_analysis = qwen_analyzer.parse_json_response(llm_response)
        
        print("   ✅ LLM Analysis completed")
        
        # 3. Results
        result = {
            "filename": image_filename,
            "cnn_prediction": predicted_class,
            "cnn_confidence": float(confidence),
            "llm_analysis": llm_analysis,
            "timestamp": datetime.now().isoformat(),
            "status": "success"
        }
        
        # Sonuçları güzel formatta yazdır
        print("\n📊 ANALYSIS RESULTS:")
        print(f"   Category: {predicted_class}")
        print(f"   Confidence: {confidence:.2%}")
        
        if "error" not in llm_analysis:
            if "gesamt_score" in llm_analysis:
                print(f"   LLM Score: {llm_analysis['gesamt_score']}/10")
            print("   LLM Analysis: ✅ Success")
        else:
            print(f"   LLM Analysis: ❌ {llm_analysis['error']}")
        
        return result
        
    except Exception as e:
        print(f"❌ Test failed: {str(e)}")
        return None

# Test ile ilk görsel
if image_files:
    test_result = test_single_image_analysis(image_files[0])
    
    if test_result and test_result.get('status') == 'success':
        print("\n🎉 Test successful! Ready for batch processing.")
    else:
        print("\n⚠️ Test failed. Please check configuration.")
else:
    print("❌ No images found in validation directory!")


🔍 Testing with: 001e53127c8049dc94ead93b884b92fa.jpeg
1️⃣ CNN Analysis...
   ✅ CNN Result: Info-Object (91.61% confidence)
2️⃣ LLM Analysis...
❌ API Error: 401 - {"error":"Invalid credentials in Authorization header"}
   ✅ LLM Analysis completed

📊 ANALYSIS RESULTS:
   Category: Info-Object
   Confidence: 91.61%

🎉 Test successful! Ready for batch processing.


In [10]:
def process_single_image(image_filename):
    """Tek görsel için tam analiz"""
    image_path = os.path.join(VAL_DIR, image_filename)
    
    try:
        # 1. CNN Prediction
        predicted_class, confidence = predict_image(image_path)
        
        # 2. LLM Analysis
        prompt = prompt_templates[predicted_class]
        llm_response = qwen_analyzer.analyze_image(image_path, prompt)
        llm_analysis = qwen_analyzer.parse_json_response(llm_response)
        
        result = {
            "filename": image_filename,
            "image_path": image_path,
            "cnn_prediction": predicted_class,
            "cnn_confidence": float(confidence),
            "llm_analysis": llm_analysis,
            "timestamp": datetime.now().isoformat(),
            "status": "success"
        }
        
        return result
        
    except Exception as e:
        return {
            "filename": image_filename,
            "status": "error",
            "error": str(e),
            "timestamp": datetime.now().isoformat()
        }

def run_batch_analysis(limit=None, save_every=10):
    """Batch analiz - tüm validation set"""
    
    # Limit uygula
    files_to_process = image_files[:limit] if limit else image_files
    
    print(f"🔄 Starting batch analysis of {len(files_to_process)} images")
    print(f"💾 Saving intermediate results every {save_every} images")
    
    all_results = []
    
    # Progress bar ile işlem
    for i, image_file in enumerate(tqdm(files_to_process, desc="Processing images")):
        
        result = process_single_image(image_file)
        all_results.append(result)
        
        # Interim save
        if (i + 1) % save_every == 0:
            interim_filename = f"interim_results_{i+1}.json"
            interim_path = os.path.join(OUTPUT_DIR, interim_filename)
            
            with open(interim_path, 'w', encoding='utf-8') as f:
                json.dump(all_results, f, ensure_ascii=False, indent=2)
            
            # Progress info
            success_count = len([r for r in all_results if r.get('status') == 'success'])
            print(f"\n📊 Progress: {i+1}/{len(files_to_process)} | Success: {success_count} | Saved: {interim_filename}")
    
    return all_results

# BATCH PROCESSING BAŞLAT
print("⚡ Choose your processing option:")
print("1. Test run (first 5 images)")
print("2. Medium run (first 20 images)")
print("3. Full run (all images)")

# Test run ile başla - Bu değeri değiştirerek full run yapabilirsiniz
PROCESSING_LIMIT = 5  

print(f"\n🚀 Starting processing with limit: {PROCESSING_LIMIT}")
batch_results = run_batch_analysis(limit=PROCESSING_LIMIT)


⚡ Choose your processing option:
1. Test run (first 5 images)
2. Medium run (first 20 images)
3. Full run (all images)

🚀 Starting processing with limit: 5
🔄 Starting batch analysis of 5 images
💾 Saving intermediate results every 10 images


Processing images:  20%|█████████████                                                    | 1/5 [00:01<00:04,  1.01s/it]

❌ API Error: 401 - {"error":"Invalid credentials in Authorization header"}


Processing images:  40%|██████████████████████████                                       | 2/5 [00:02<00:03,  1.00s/it]

❌ API Error: 401 - {"error":"Invalid credentials in Authorization header"}


Processing images:  60%|███████████████████████████████████████                          | 3/5 [00:03<00:02,  1.08s/it]

❌ API Error: 401 - {"error":"Invalid credentials in Authorization header"}


Processing images:  80%|████████████████████████████████████████████████████             | 4/5 [00:04<00:01,  1.06s/it]

❌ API Error: 401 - {"error":"Invalid credentials in Authorization header"}


Processing images: 100%|█████████████████████████████████████████████████████████████████| 5/5 [00:05<00:00,  1.06s/it]

❌ API Error: 401 - {"error":"Invalid credentials in Authorization header"}





In [None]:
# Final sonuçları kaydet
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
final_filename = f"validation_batch_results_{timestamp}.json"
final_path = os.path.join(OUTPUT_DIR, final_filename)

with open(final_path, 'w', encoding='utf-8') as f:
    json.dump(batch_results, f, ensure_ascii=False, indent=2)

print(f"✅ Final results saved: {final_path}")
print(f"📁 File size: {os.path.getsize(final_path) / 1024:.1f} KB")


In [None]:
def analyze_batch_results(results):
    """Batch sonuçlarının detaylı analizi"""
    
    successful_results = [r for r in results if r.get('status') == 'success']
    error_count = len(results) - len(successful_results)
    
    print("="*60)
    print("📊 VALIDATION SET ANALYSIS REPORT")
    print("="*60)
    
    print(f"\n📈 Processing Summary:")
    print(f"   Total Images: {len(results)}")
    print(f"   Successful: {len(successful_results)}")
    print(f"   Errors: {error_count}")
    print(f"   Success Rate: {len(successful_results)/len(results)*100:.1f}%")
    
    if not successful_results:
        print("❌ No successful results to analyze!")
        return
    
    # Kategori istatistikleri
    category_stats = {}
    all_confidences = []
    all_llm_scores = []
    
    for result in successful_results:
        category = result['cnn_prediction']
        confidence = result['cnn_confidence']
        all_confidences.append(confidence)
        
        if category not in category_stats:
            category_stats[category] = {
                'count': 0,
                'confidences': [],
                'gesamt_scores': [],
                'llm_success_rate': 0
            }
        
        category_stats[category]['count'] += 1
        category_stats[category]['confidences'].append(confidence)
        
        # LLM analysis kontrol et
        llm_analysis = result.get('llm_analysis', {})
        if 'error' not in llm_analysis and 'gesamt_score' in llm_analysis:
            score = llm_analysis['gesamt_score']
            category_stats[category]['gesamt_scores'].append(score)
            all_llm_scores.append(score)
    
    # Kategori başına istatistikler
    print(f"\n🔍 Category Analysis:")
    print("-" * 60)
    
    for category, stats in sorted(category_stats.items()):
        avg_conf = sum(stats['confidences']) / len(stats['confidences'])
        avg_score = sum(stats['gesamt_scores']) / len(stats['gesamt_scores']) if stats['gesamt_scores'] else 0
        llm_success = len(stats['gesamt_scores']) / stats['count'] * 100
        
        print(f"\n📁 {category}:")
        print(f"   Images: {stats['count']}")
        print(f"   Avg CNN Confidence: {avg_conf:.2%}")
        print(f"   Avg LLM Score: {avg_score:.1f}/10")
        print(f"   LLM Success Rate: {llm_success:.1f}%")
    
    # Genel istatistikler
    print(f"\n📊 Overall Statistics:")
    print("-" * 30)
    if all_confidences:
        print(f"   Avg CNN Confidence: {sum(all_confidences)/len(all_confidences):.2%}")
        print(f"   Min CNN Confidence: {min(all_confidences):.2%}")
        print(f"   Max CNN Confidence: {max(all_confidences):.2%}")
    
    if all_llm_scores:
        print(f"   Avg LLM Score: {sum(all_llm_scores)/len(all_llm_scores):.1f}/10")
        print(f"   Min LLM Score: {min(all_llm_scores):.1f}/10")
        print(f"   Max LLM Score: {max(all_llm_scores):.1f}/10")
    
    return category_stats

# Analizi çalıştır
category_statistics = analyze_batch_results(batch_results)


In [None]:
def identify_reference_candidates(results, top_n=3):
    """Her kategoriden en iyi örnekleri referans aday olarak seç"""
    
    successful_results = [r for r in results if r.get('status') == 'success']
    
    if not successful_results:
        print("❌ No successful results for reference selection!")
        return {}
    
    reference_candidates = {}
    
    # Kategori başına grup
    by_category = {}
    for result in successful_results:
        category = result['cnn_prediction']
        if category not in by_category:
            by_category[category] = []
        by_category[category].append(result)
    
    print("🎯 REFERENCE SOLUTION CANDIDATES")
    print("="*50)
    
    for category, items in by_category.items():
        print(f"\n📁 {category} ({len(items)} images):")
        
        # Kombinasyon skoru: CNN confidence + LLM score
        scored_items = []
        for item in items:
            cnn_conf = item['cnn_confidence']
            llm_analysis = item.get('llm_analysis', {})
            
            if 'error' not in llm_analysis and 'gesamt_score' in llm_analysis:
                llm_score = llm_analysis['gesamt_score'] / 10  # 0-1 aralığına normalize
                combined_score = (cnn_conf * 0.4) + (llm_score * 0.6)  # LLM'e ağırlık ver
            else:
                combined_score = cnn_conf * 0.7  # Sadece CNN score
            
            scored_items.append((item, combined_score))
        
        # En yüksek skorlular
        top_items = sorted(scored_items, key=lambda x: x[1], reverse=True)[:top_n]
        reference_candidates[category] = []
        
        for i, (item, score) in enumerate(top_items, 1):
            reference_candidates[category].append(item)
            
            llm_info = ""
            llm_analysis = item.get('llm_analysis', {})
            if 'gesamt_score' in llm_analysis:
                llm_info = f", LLM: {llm_analysis['gesamt_score']:.1f}/10"
            
            print(f"   {i}. {item['filename']}")
            print(f"      Score: {score:.3f} (CNN: {item['cnn_confidence']:.2%}{llm_info})")
    
    return reference_candidates

# Reference candidates seç
reference_candidates = identify_reference_candidates(batch_results, top_n=3)


In [None]:
# Reference candidates'ı ayrı dosyaya kaydet
reference_filename = f"reference_candidates_{timestamp}.json"
reference_path = os.path.join(OUTPUT_DIR, reference_filename)

with open(reference_path, 'w', encoding='utf-8') as f:
    json.dump(reference_candidates, f, ensure_ascii=False, indent=2)

print(f"✅ Reference candidates saved: {reference_path}")

# Summary raporu
summary = {
    "analysis_timestamp": timestamp,
    "total_images_processed": len(batch_results),
    "successful_analyses": len([r for r in batch_results if r.get('status') == 'success']),
    "categories_found": list(reference_candidates.keys()),
    "reference_candidates_per_category": {k: len(v) for k, v in reference_candidates.items()},
    "files_generated": {
        "full_results": final_filename,
        "reference_candidates": reference_filename
    }
}

summary_filename = f"analysis_summary_{timestamp}.json"
summary_path = os.path.join(OUTPUT_DIR, summary_filename)

with open(summary_path, 'w', encoding='utf-8') as f:
    json.dump(summary, f, ensure_ascii=False, indent=2)

print(f"📋 Summary saved: {summary_path}")


In [None]:
print("\n" + "="*60)
print("🎉 VALIDATION SET ANALYSIS COMPLETED!")
print("="*60)

print(f"\n📊 Analysis Summary:")
print(f"   ✅ Processed: {len(batch_results)} images")
print(f"   📁 Categories found: {len(reference_candidates)} categories")
print(f"   🎯 Reference candidates: {sum(len(v) for v in reference_candidates.values())} total")

print(f"\n💾 Files Generated:")
print(f"   📄 {final_filename} - Full analysis results")
print(f"   🎯 {reference_filename} - Reference solution candidates")
print(f"   📋 {summary_filename} - Analysis summary")

print(f"\n🚀 Next Steps for Aufgabe 4:")
print(f"   1️⃣ Review reference candidates manually")
print(f"   2️⃣ Select final Musterlösung for each category")
print(f"   3️⃣ Implement comparison engine")
print(f"   4️⃣ Test student vs reference comparison")

print(f"\n💡 Tips:")
print(f"   • High CNN confidence + High LLM score = Good reference candidate")
print(f"   • Check images manually before using as Musterlösung")
print(f"   • Use comparison_prompt from prompt_templates.py for next step")

print("\n🔧 Ready for Aufgabe 4 - Comparison Engine Implementation!")


In [None]:
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