# GRPO Reranking Ki√©rt√©kel√©se - CourtRankRL Projekt

Ez a notebook a CourtRankRL GRPO alap√∫ reranking komponens√©t √©rt√©keli ki. Az agents.md specifik√°ci√≥ alapj√°n a reinforcement learning reranking teljes√≠tm√©ny√©t √©s hat√©konys√°g√°t teszteli.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import torch
from pathlib import Path
from typing import Dict, Any, List
import time

# Plot st√≠lus be√°ll√≠t√°sa
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 12

# Projekt konfigur√°ci√≥ bet√∂lt√©se
import sys
project_root = Path(__file__).parent.parent
sys.path.insert(0, str(project_root))
from configs import config
from src.search.hybrid_search import HybridRetriever
from src.search.grpo_reranker import GRPOReranker

print("CourtRankRL - GRPO Reranking Evaluation")
print(f"RL Policy: {config.RL_POLICY_PATH}")
print(f"BM25 index: {config.BM25_INDEX_PATH}")
print(f"FAISS index: {config.FAISS_INDEX_PATH}")

## 1. Komponensek Bet√∂lt√©se

A GRPO reranker √©s retrieval komponensek bet√∂lt√©se.

In [None]:
# Komponensek inicializ√°l√°sa
retriever = None
reranker = None
policy_loaded = False

print("Komponensek bet√∂lt√©se...")

# Hybrid retriever
try:
    retriever = HybridRetriever()
    print("‚úÖ Hybrid retriever bet√∂ltve")
except Exception as e:
    print(f"‚ùå Hybrid retriever hiba: {e}")
    retriever = None

# GRPO reranker
try:
    reranker = GRPOReranker()
    print("‚úÖ GRPO reranker inicializ√°lva")
    
    # Policy bet√∂lt√©se
    if config.RL_POLICY_PATH.exists():
        reranker.load_policy(config.RL_POLICY_PATH)
        policy_loaded = True
        print("‚úÖ RL policy bet√∂ltve")
    else:
        print(f"‚ö†Ô∏è RL policy nem tal√°lhat√≥: {config.RL_POLICY_PATH}")
        print("Policy n√©lk√ºli reranker m≈±k√∂d√©s (baseline)")
        
except Exception as e:
    print(f"‚ùå GRPO reranker hiba: {e}")
    reranker = None

if reranker is not None:
    print(f"\nReranker konfigur√°ci√≥:")
    print(f"  Learning rate: {config.RL_LEARNING_RATE}")
    print(f"  Batch size: {config.RL_BATCH_SIZE}")
    print(f"  Hidden dim: {config.RL_HIDDEN_DIM}")
    print(f"  Policy loaded: {policy_loaded}")

## 2. Reranking Teljes√≠tm√©ny Tesztel√©se

A GRPO reranking teljes√≠tm√©ny√©nek √©s hat√©konys√°g√°nak tesztel√©se.

In [None]:
if retriever is not None and reranker is not None:
    print("üéØ Reranking teljes√≠tm√©ny tesztel√©se:")
    
    # Teszt lek√©rdez√©sek
    test_queries = [
        "szerz≈ëd√©s felmond√°sa",
        "k√°rt√©r√≠t√©s",
        "csal√°di jog",
        "munkajog",
        "ingatlan tulajdonjog"
    ]
    
    results_summary = []
    
    for query in test_queries:
        print(f"\nüîç Teszt lek√©rdez√©s: '{query}'")
        
        try:
            # Baseline retrieval
            start_time = time.time()
            bm25_results, dense_results = retriever.retrieve_candidates(query, top_k=config.TOP_K_BASELINE)
            baseline_time = time.time() - start_time
            
            print(f"  Baseline retrieval: {len(bm25_results)} BM25 + {len(dense_results)} dense = {len(bm25_results) + len(dense_results)} candidate")
            print(f"  Baseline id≈ë: {baseline_time*1000:.1f}ms")
            
            # Baseline fusion (RRF)
            start_time = time.time()
            baseline_fusion = retriever.retrieve(query, top_k=config.TOP_K_RERANKED, fusion_method="rrf")
            fusion_time = time.time() - start_time
            
            print(f"  Baseline fusion (RRF): {len(baseline_fusion)} eredm√©ny, {fusion_time*1000:.1f}ms")
            
            # GRPO reranking
            start_time = time.time()
            try:
                reranked_results = reranker.rerank(bm25_results, dense_results)
                reranking_time = time.time() - start_time
                
                print(f"  GRPO reranking: {len(reranked_results)} eredm√©ny, {reranking_time*1000:.1f}ms")
                
                # Eredm√©nyek √∂sszehasonl√≠t√°sa
                if reranked_results and baseline_fusion:
                    # √Åtfed√©s sz√°m√≠t√°sa
                    reranked_set = set([doc_id for doc_id, _ in reranked_results[:5]])
                    baseline_set = set(baseline_fusion[:5])
                    
                    overlap = len(reranked_set & baseline_set) / len(baseline_set) if baseline_set else 0
                    print(f"  √Åtfed√©s baseline vs reranked (top 5): {overlap:.2f}")
                    
                    # Elt√©r√©sek
                    only_reranked = reranked_set - baseline_set
                    only_baseline = baseline_set - reranked_set
                    
                    if only_reranked:
                        print(f"  Csak reranked-ben: {list(only_reranked)}")
                    if only_baseline:
                        print(f"  Csak baseline-ben: {list(only_baseline)}")
                    
                    # Reranked eredm√©nyek megjelen√≠t√©se
                    print(f"\nTop 5 reranked eredm√©ny:")
                    for i, (doc_id, score) in enumerate(reranked_results[:5], 1):
                        print(f"  {i}. {doc_id} (score: {score:.4f})")
                
                # √ñsszefoglal√≥ adatok
                results_summary.append({
                    'query': query,
                    'baseline_results': len(baseline_fusion),
                    'baseline_time': baseline_time * 1000,
                    'fusion_time': fusion_time * 1000,
                    'reranking_time': reranking_time * 1000,
                    'total_reranking_time': (baseline_time + reranking_time) * 1000,
                    'overlap_ratio': overlap if 'overlap' in locals() else 0
                })
                
            except Exception as rerank_e:
                print(f"‚ùå Reranking hiba: {rerank_e}")
                results_summary.append({
                    'query': query,
                    'baseline_results': len(baseline_fusion),
                    'baseline_time': baseline_time * 1000,
                    'fusion_time': fusion_time * 1000,
                    'reranking_time': 0,
                    'total_reranking_time': baseline_time * 1000,
                    'overlap_ratio': 0
                })
            
        except Exception as e:
            print(f"‚ùå Teszt hiba: {e}")
    
    # √ñsszefoglal√≥ t√°bl√°zat
    if results_summary:
        results_df = pd.DataFrame(results_summary)
        print("\nüìä Reranking teljes√≠tm√©ny √∂sszefoglal√≥:")
        display(results_df.round(2))
        
        # √Åtlagos teljes√≠tm√©ny
        print("\nüìà √Åtlagos teljes√≠tm√©ny:")
        print(f"  Baseline retrieval: {results_df['baseline_time'].mean():.1f}ms √°tlag")
        print(f"  Fusion: {results_df['fusion_time'].mean():.1f}ms √°tlag")
        print(f"  Reranking: {results_df['reranking_time'].mean():.1f}ms √°tlag")
        print(f"  √ñsszes reranking: {results_df['total_reranking_time'].mean():.1f}ms √°tlag")
        print(f"  √Åtfed√©s ar√°ny: {results_df['overlap_ratio'].mean():.2f} √°tlag")
        
        # Teljes√≠tm√©ny vizualiz√°ci√≥
        plt.figure(figsize=(14, 6))
        
        x = np.arange(len(test_queries))
        width = 0.2
        
        plt.bar(x - width, results_df['baseline_time'], width, label='Baseline', alpha=0.7)
        plt.bar(x, results_df['fusion_time'], width, label='Fusion', alpha=0.7)
        plt.bar(x + width, results_df['reranking_time'], width, label='Reranking', alpha=0.7)
        
        plt.xlabel('Teszt lek√©rdez√©s')
        plt.ylabel('Id≈ë (ms)')
        plt.title('Reranking teljes√≠tm√©ny √∂sszehasonl√≠t√°sa')
        plt.xticks(x, [f'Q{i+1}' for i in range(len(test_queries))], rotation=45)
        plt.legend()
        plt.grid(True, alpha=0.3)
        plt.tight_layout()
        plt.show()
else:
    print("‚ùå Reranking tesztel√©s nem el√©rhet≈ë - hi√°nyz√≥ komponensek")
    if retriever is None:
        print("üí° Sz√ºks√©ges: Hybrid retriever - futtassa: uv run courtrankrl build")
    if reranker is None:
        print("üí° Sz√ºks√©ges: GRPO reranker - ellen≈ërizze a GRPOReranker implement√°ci√≥t")

## 3. Policy Elemz√©se

A betan√≠tott RL policy tulajdons√°gainak √©s teljes√≠tm√©ny√©nek elemz√©se.

In [None]:
if reranker is not None and policy_loaded:
    print("üß† Policy elemz√©se:")
    
    try:
        # Policy tulajdons√°gok
        policy_info = reranker.get_policy_info()
        
        print(f"Policy inform√°ci√≥k:")
        print(f"  Policy t√≠pusa: {policy_info.get('type', 'N/A')}")
        print(f"  Input dimenzi√≥: {policy_info.get('input_dim', 'N/A')}")
        print(f"  Hidden dimenzi√≥: {policy_info.get('hidden_dim', config.RL_HIDDEN_DIM)}")
        print(f"  Param√©terek sz√°ma: {policy_info.get('param_count', 'N/A'):,}")
        print(f"  Tan√≠t√°si id≈ë: {policy_info.get('training_time', 'N/A')}")
        
        # Policy teljes√≠tm√©ny metrik√°k
        if 'metrics' in policy_info:
            print(f"\nTan√≠t√°si metrik√°k:")
            metrics = policy_info['metrics']
            for epoch, epoch_metrics in metrics.items():
                print(f"  Epoch {epoch}:")
                for metric, value in epoch_metrics.items():
                    print(f"    {metric}: {value:.4f}")
        
        # Feature s√∫lyok elemz√©se (ha line√°ris policy)
        if 'feature_weights' in policy_info:
            print(f"\nFeature s√∫lyok:")
            weights = policy_info['feature_weights']
            
            # S√∫lyok vizualiz√°ci√≥ja
            plt.figure(figsize=(10, 6))
            features = list(weights.keys())
            weight_values = list(weights.values())
            
            plt.barh(features, weight_values)
            plt.title('Feature s√∫lyok a policy-ben')
            plt.xlabel('S√∫ly')
            plt.ylabel('Feature')
            plt.grid(True, alpha=0.3)
            plt.tight_layout()
            plt.show()
            
            # Legfontosabb feature-√∂k
            sorted_weights = sorted(weights.items(), key=lambda x: abs(x[1]), reverse=True)
            print(f"\nLegfontosabb feature-√∂k:")
            for i, (feature, weight) in enumerate(sorted_weights[:5]):
                print(f"  {i+1}. {feature}: {weight:.4f}")
        
        # Policy stabilit√°s
        if 'stability_metrics' in policy_info:
            print(f"\nPolicy stabilit√°s:")
            stability = policy_info['stability_metrics']
            for metric, value in stability.items():
                print(f"  {metric}: {value:.4f}")
                
    except Exception as e:
        print(f"‚ùå Policy elemz√©s hiba: {e}")
        print("Policy metrik√°k nem el√©rhet≈ëek")
else:
    print("‚ùå Policy elemz√©s nem el√©rhet≈ë - hi√°nyz√≥ policy")
    if not policy_loaded:
        print("üí° Policy bet√∂lt√©se sz√ºks√©ges: futtassa a train parancsot")

## 4. Feature Elemz√©se

A GRPO reranking √°ltal haszn√°lt feature-√∂k elemz√©se.

In [None]:
if retriever is not None and reranker is not None:
    print("üìä Feature elemz√©se:")
    
    # Teszt lek√©rdez√©s feature elemz√©shez
    test_query = "szerz≈ëd√©s felmond√°sa"
    
    try:
        # Kandid√°tusok lek√©r√©se
        bm25_results, dense_results = retriever.retrieve_candidates(test_query, top_k=10)
        
        if bm25_results and dense_results:
            # Features sz√°m√≠t√°sa
            features = reranker.extract_features(bm25_results, dense_results, test_query)
            
            print(f"\nTeszt lek√©rdez√©s: '{test_query}'")
            print(f"Feature m√°trix: {features.shape}")
            
            # Feature statisztik√°k
            feature_stats = pd.DataFrame({
                'mean': features.mean(axis=0),
                'std': features.std(axis=0),
                'min': features.min(axis=0),
                'max': features.max(axis=0)
            })
            
            print(f"\nFeature statisztik√°k:")
            display(feature_stats.round(4))
            
            # Feature korrel√°ci√≥
            if features.shape[1] > 1:
                correlation_matrix = np.corrcoef(features.T)
                
                plt.figure(figsize=(8, 6))
                sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0, 
                          xticklabels=[f'F{i}' for i in range(features.shape[1])],
                          yticklabels=[f'F{i}' for i in range(features.shape[1])])
                plt.title('Feature korrel√°ci√≥ m√°trix')
                plt.tight_layout()
                plt.show()
            
            # Feature eloszl√°sok
            plt.figure(figsize=(14, 8))
            for i in range(min(6, features.shape[1])):  # Maximum 6 feature
                plt.subplot(2, 3, i+1)
                plt.hist(features[:, i], bins=20, alpha=0.7)
                plt.title(f'Feature {i} eloszl√°sa')
                plt.xlabel(f'Feature {i} √©rt√©k')
                plt.ylabel('Kandid√°tusok sz√°ma')
                plt.grid(True, alpha=0.3)
            
            plt.tight_layout()
            plt.show()
            
            # Policy score-ok
            if policy_loaded:
                try:
                    scores = reranker.policy.predict(features)
                    
                    print(f"\nPolicy score statisztik√°k:")
                    print(f"  √Åtlag score: {scores.mean():.4f}")
                    print(f"  Score tartom√°ny: [{scores.min():.4f}, {scores.max():.4f}]")
                    
                    # Score √©s feature kapcsolat
                    print(f"\nTop 5 scoring kandid√°tus:")
                    top_indices = np.argsort(scores)[::-1][:5]
                    for i, idx in enumerate(top_indices):
                        doc_id = bm25_results[idx] if idx < len(bm25_results) else dense_results[idx - len(bm25_results)]
                        print(f"  {i+1}. {doc_id}: {scores[idx]:.4f}")
                        print(f"      Features: {features[idx]}")
                        
                except Exception as score_e:
                    print(f"‚ùå Score sz√°m√≠t√°s hiba: {score_e}")
        
    except Exception as e:
        print(f"‚ùå Feature elemz√©s hiba: {e}")
else:
    print("‚ùå Feature elemz√©s nem el√©rhet≈ë - hi√°nyz√≥ komponensek")

## 5. Ranking √ñsszehasonl√≠t√°s

A baseline √©s GRPO reranked eredm√©nyek √∂sszehasonl√≠t√°sa.

In [None]:
if retriever is not None and reranker is not None and policy_loaded:
    print("üìä Ranking √∂sszehasonl√≠t√°s:")
    
    # Teszt lek√©rdez√©sek r√©szletes elemz√©shez
    test_queries_detailed = [
        "szerz≈ëd√©s felmond√°sa",
        "k√°rt√©r√≠t√©s m√©rt√©ke",
        "csal√°djogi √ºgyek"
    ]
    
    comparison_results = []
    
    for query in test_queries_detailed:
        try:
            # Baseline retrieval
            baseline_results = retriever.retrieve(query, top_k=config.TOP_K_RERANKED, fusion_method="rrf")
            
            # Reranked retrieval
            bm25_results, dense_results = retriever.retrieve_candidates(query, top_k=config.TOP_K_BASELINE)
            reranked_results = reranker.rerank(bm25_results, dense_results)
            
            if baseline_results and reranked_results:
                # √ñsszehasonl√≠t√°s
                baseline_set = set(baseline_results[:5])
                reranked_set = set([doc_id for doc_id, _ in reranked_results[:5]])
                
                overlap = len(baseline_set & reranked_set) / len(baseline_set) if baseline_set else 0
                
                print(f"\nüîç Lek√©rdez√©s: '{query}'")
                print(f"\nBaseline ranking:")
                for i, doc_id in enumerate(baseline_results[:5], 1):
                    print(f"  {i}. {doc_id}")
                
                print(f"\nReranked ranking:")
                for i, (doc_id, score) in enumerate(reranked_results[:5], 1):
                    print(f"  {i}. {doc_id} (score: {score:.4f})")
                
                print(f"\n√Åtfed√©s (top 5): {overlap:.2f}")
                
                # Kendall tau korrel√°ci√≥ (ha lehets√©ges)
                try:
                    from scipy.stats import kendalltau
                    
                    # K√∂z√∂s dokumentumok rangsorol√°sa
                    common_docs = list(baseline_set & reranked_set)
                    if len(common_docs) >= 3:  # Minimum 3 dokumentum a korrel√°ci√≥hoz
                        baseline_ranks = [baseline_results.index(doc) for doc in common_docs]
                        reranked_ranks = [reranked_results.index((doc, 0)) for doc in common_docs]
                        
                        tau, p_value = kendalltau(baseline_ranks, reranked_ranks)
                        print(f"  Kendall tau korrel√°ci√≥: {tau:.3f} (p-value: {p_value:.3f})")
                    
                except ImportError:
                    print("  (scipy nem el√©rhet≈ë a Kendall tau korrel√°ci√≥hoz)")
                except Exception as kendall_e:
                    print(f"  Kendall tau sz√°m√≠t√°s hiba: {kendall_e}")
                
                comparison_results.append({
                    'query': query,
                    'overlap': overlap,
                    'baseline_top1': baseline_results[0] if len(baseline_results) > 0 else None,
                    'reranked_top1': reranked_results[0][0] if len(reranked_results) > 0 else None
                })
            
        except Exception as e:
            print(f"‚ùå √ñsszehasonl√≠t√°s hiba: {e}")
    
    # √ñsszefoglal√≥
    if comparison_results:
        comparison_df = pd.DataFrame(comparison_results)
        print(f"\nüìã √ñsszehasonl√≠t√°s √∂sszefoglal√≥:")
        display(comparison_df)
        
        print(f"\n√Åtlagos √°tfed√©s: {comparison_df['overlap'].mean():.2f}")
        
        # Baseline vs reranked top-1 v√°ltoz√°s
        top1_changes = comparison_df['baseline_top1'] != comparison_df['reranked_top1']
        print(f"Top-1 v√°ltoz√°s: {top1_changes.sum()}/{len(comparison_df)} lek√©rdez√©s")
else:
    print("‚ùå Ranking √∂sszehasonl√≠t√°s nem el√©rhet≈ë - hi√°nyz√≥ komponensek vagy policy")

## 6. Training Data Elemz√©se

A GRPO training sor√°n haszn√°lt adatok √©s folyamat elemz√©se.

In [None]:
if reranker is not None:
    print("üéì Training data elemz√©se:")
    
    try:
        # Training data inform√°ci√≥k
        training_info = reranker.get_training_info()
        
        if training_info:
            print(f"Training inform√°ci√≥k:")
            print(f"  Qrels f√°jl: {training_info.get('qrels_file', 'N/A')}")
            print(f"  Training mint√°k: {training_info.get('num_samples', 'N/A')}")
            print(f"  Epoch-ok: {training_info.get('epochs', 'N/A')}")
            print(f"  Batch size: {training_info.get('batch_size', config.RL_BATCH_SIZE)}")
            
            # Training metrik√°k
            if 'training_metrics' in training_info:
                print(f"\nTraining metrik√°k:")
                metrics = training_info['training_metrics']
                
                # Metrik√°k vizualiz√°ci√≥ja
                if isinstance(metrics, dict) and 'epochs' in metrics:
                    epochs = metrics['epochs']
                    
                    plt.figure(figsize=(14, 8))
                    
                    # Loss g√∂rbe
                    if 'loss' in metrics:
                        plt.subplot(2, 2, 1)
                        plt.plot(epochs, metrics['loss'])
                        plt.title('Training Loss')
                        plt.xlabel('Epoch')
                        plt.ylabel('Loss')
                        plt.grid(True, alpha=0.3)
                    
                    # Reward g√∂rbe
                    if 'reward' in metrics:
                        plt.subplot(2, 2, 2)
                        plt.plot(epochs, metrics['reward'])
                        plt.title('Average Reward')
                        plt.xlabel('Epoch')
                        plt.ylabel('Reward')
                        plt.grid(True, alpha=0.3)
                    
                    # nDCG g√∂rbe
                    if 'ndcg' in metrics:
                        plt.subplot(2, 2, 3)
                        plt.plot(epochs, metrics['ndcg'])
                        plt.title('nDCG Score')
                        plt.xlabel('Epoch')
                        plt.ylabel('nDCG')
                        plt.grid(True, alpha=0.3)
                    
                    # Policy entropy
                    if 'entropy' in metrics:
                        plt.subplot(2, 2, 4)
                        plt.plot(epochs, metrics['entropy'])
                        plt.title('Policy Entropy')
                        plt.xlabel('Epoch')
                        plt.ylabel('Entropy')
                        plt.grid(True, alpha=0.3)
                    
                    plt.tight_layout()
                    plt.show()
                
                # Legutols√≥ metrik√°k
                print(f"\nLegutols√≥ epoch metrik√°k:")
                for metric, values in metrics.items():
                    if isinstance(values, list) and values:
                        print(f"  {metric}: {values[-1]:.4f}")
                    elif not isinstance(values, list):
                        print(f"  {metric}: {values:.4f}")
            
            # Qrels statisztik√°k
            if 'qrels_stats' in training_info:
                qrels_stats = training_info['qrels_stats']
                print(f"\nQrels statisztik√°k:")
                for stat, value in qrels_stats.items():
                    print(f"  {stat}: {value}")
        else:
            print("‚ö†Ô∏è Training inform√°ci√≥k nem el√©rhet≈ëek")
            print("A policy betan√≠t√°sa ut√°n lesznek el√©rhet≈ëek a r√©szletes metrik√°k")
    
    except Exception as e:
        print(f"‚ùå Training data elemz√©s hiba: {e}")
        print("Training metrik√°k nem el√©rhet≈ëek")
else:
    print("‚ùå Training data elemz√©s nem el√©rhet≈ë - hi√°nyz√≥ reranker")

## 7. Ablation Study

A GRPO komponensek hat√°s√°nak vizsg√°lata.

In [None]:
if retriever is not None and reranker is not None:
    print("üî¨ Ablation study:")
    
    # Teszt lek√©rdez√©s
    test_query = "szerz≈ëd√©s felmond√°sa"
    
    try:
        # √ñsszes komponens bekapcsolva
        bm25_results, dense_results = retriever.retrieve_candidates(test_query, top_k=config.TOP_K_BASELINE)
        
        # Baseline (csak RRF fusion)
        baseline_results = retriever.retrieve(test_query, top_k=config.TOP_K_RERANKED, fusion_method="rrf")
        
        # Reranking (BM25 + dense + policy)
        if policy_loaded:
            reranked_results = reranker.rerank(bm25_results, dense_results)
            
            # Csak BM25 (ablation)
            bm25_only_results = reranker.rerank(bm25_results[:10], [])  # √úres dense lista
            
            # Csak dense (ablation)
            dense_only_results = reranker.rerank([], dense_results[:10])  # √úres BM25 lista
            
            print(f"\nAblation study - '{test_query}':")
            
            # Eredm√©nyek √∂sszehasonl√≠t√°sa
            ablation_results = {
                'Baseline (RRF)': baseline_results[:5],
                'Reranked (full)': [doc_id for doc_id, _ in reranked_results[:5]],
                'BM25 only': [doc_id for doc_id, _ in bm25_only_results[:5]],
                'Dense only': [doc_id for doc_id, _ in dense_only_results[:5]]
            }
            
            # √Åtfed√©sek sz√°m√≠t√°sa
            baseline_set = set(ablation_results['Baseline (RRF)'])
            reranked_set = set(ablation_results['Reranked (full)'])
            bm25_set = set(ablation_results['BM25 only'])
            dense_set = set(ablation_results['Dense only'])
            
            print(f"\n√Åtfed√©sek (top 5):")
            print(f"  Baseline vs Reranked: {len(baseline_set & reranked_set)}/5")
            print(f"  Baseline vs BM25: {len(baseline_set & bm25_set)}/5")
            print(f"  Baseline vs Dense: {len(baseline_set & dense_set)}/5")
            print(f"  BM25 vs Dense: {len(bm25_set & dense_set)}/5")
            
            # Eredm√©nyek megjelen√≠t√©se
            print(f"\nEredm√©nyek:")
            for method, results in ablation_results.items():
                print(f"\n{method}:")
                for i, doc_id in enumerate(results, 1):
                    print(f"  {i}. {doc_id}")
            
            # Ablation metrik√°k
            print(f"\nAblation metrik√°k:")
            print(f"  Baseline diverzit√°s: {len(baseline_set)} egyedi dokumentum")
            print(f"  Reranked diverzit√°s: {len(reranked_set)} egyedi dokumentum")
            print(f"  BM25 diverzit√°s: {len(bm25_set)} egyedi dokumentum")
            print(f"  Dense diverzit√°s: {len(dense_set)} egyedi dokumentum")
            
            # Kombin√°ci√≥ el≈ënye
            combo_advantage = len(baseline_set & reranked_set) / len(baseline_set) if baseline_set else 0
            print(f"  Kombin√°ci√≥ el≈ënye: {combo_advantage:.2f} (baseline vs reranked √°tfed√©s)")
        else:
            print("‚ö†Ô∏è Ablation study policy n√©lk√ºl nem teljes - csak baseline vs fusion √∂sszehasonl√≠t√°s")
            
            print(f"\nBaseline vs Fusion √∂sszehasonl√≠t√°s:")
            baseline_results = retriever.retrieve(test_query, top_k=config.TOP_K_RERANKED, fusion_method="rrf")
            
            print(f"\nBaseline (RRF) eredm√©nyek:")
            for i, doc_id in enumerate(baseline_results[:5], 1):
                print(f"  {i}. {doc_id}")
            
            # Egyszer≈± fusion vs baseline √∂sszehasonl√≠t√°s
            bm25_results, dense_results = retriever.retrieve_candidates(test_query, top_k=20)
            
            bm25_set = set(bm25_results[:5])
            dense_set = set(dense_results[:5])
            baseline_set = set(baseline_results[:5])
            
            print(f"\nBM25 top 5: {list(bm25_set)}")
            print(f"Dense top 5: {list(dense_set)}")
            
            print(f"\n√Åtfed√©sek:")
            print(f"  BM25 vs Baseline: {len(bm25_set & baseline_set)}/5")
            print(f"  Dense vs Baseline: {len(dense_set & baseline_set)}/5")
    
    except Exception as e:
        print(f"‚ùå Ablation study hiba: {e}")
else:
    print("‚ùå Ablation study nem el√©rhet≈ë - hi√°nyz√≥ komponensek")

## 8. K√∂vetkeztet√©sek

A GRPO reranking ki√©rt√©kel√©s√©nek √∂sszefoglal√°sa.

In [None]:
print("=== GRPO RERANKING ELEMZ√âS √ñSSZEFOGLAL√ì ===")
print("\n‚úÖ Komponensek √°llapota:")

if retriever is not None:
    print(f"   üéØ Hybrid retriever: m≈±k√∂d≈ëk√©pes")
else:
    print(f"   ‚ùå Hybrid retriever: nem el√©rhet≈ë")

if reranker is not None:
    print(f"   üß† GRPO reranker: m≈±k√∂d≈ëk√©pes")
else:
    print(f"   ‚ùå GRPO reranker: nem el√©rhet≈ë")

if policy_loaded:
    print(f"   üìà RL policy: bet√∂ltve")
else:
    print(f"   ‚ö†Ô∏è RL policy: nincs bet√∂ltve")

print("\nüìã Agents.md specifik√°ci√≥ ellen≈ërz√©s:")
if reranker is not None and policy_loaded:
    print("   ‚úÖ GRPO reranking komponens m≈±k√∂d≈ëk√©pes")
    print("   üéØ Features: dense similarity, BM25 score, rank difference")
    print("   üèÜ Reward: nDCG@10 group level")
    print("   ü§ñ Policy: linear/shallow MLP")
    print("   üìä Groupwise softmax")
    
    # Teljes√≠tm√©ny ellen≈ërz√©s
    try:
        test_query = "szerz≈ëd√©s felmond√°sa"
        bm25_results, dense_results = retriever.retrieve_candidates(test_query, top_k=10)
        reranked_results = reranker.rerank(bm25_results, dense_results)
        
        if reranked_results:
            print("   ‚úÖ Reranking m≈±k√∂dik - eredm√©nyek gener√°lva")
            
            # Min≈ës√©gi metrik√°k
            scores = [score for _, score in reranked_results[:10]]
            if len(set(scores)) > 1:  # V√°ltozatos score-ok
                print("   ‚úÖ Policy differenci√°l - v√°ltozatos score-ok")
            else:
                print("   ‚ö†Ô∏è Policy egys√©ges - score-ok nem differenci√°lnak")
        else:
            print("   ‚ùå Reranking nem gener√°l eredm√©nyeket")
    except Exception as e:
        print(f"   ‚ùå Reranking m≈±k√∂d√©si hiba: {e}")
else:
    missing_components = []
    if retriever is None:
        missing_components.append("Hybrid retriever")
    if reranker is None:
        missing_components.append("GRPO reranker")
    if not policy_loaded:
        missing_components.append("RL policy")
    
    print(f"   ‚ùå Hi√°nyz√≥ komponensek: {', '.join(missing_components)}")
    print("   üí° Sz√ºks√©ges: uv run courtrankrl build + uv run courtrankrl train")

print("\nüí° Aj√°nl√°sok:")
if reranker is not None and policy_loaded:
    print("   ‚úÖ GRPO reranking haszn√°latra k√©sz")
    print("   üöÄ Haszn√°lat: uv run courtrankrl query \"lek√©rdez√©s\" --rerank")
    print("   üìà Tov√°bbi jav√≠t√°s: t√∂bb training data, finomhangolt hyperparameters")
else:
    if not policy_loaded:
        print("   üéì Policy betan√≠t√°sa sz√ºks√©ges: uv run courtrankrl train")
    print("   üîß Komponensek inicializ√°l√°sa sz√ºks√©ges")

print("\nüéØ GRPO reranking elemz√©se k√©sz!")