# Task 2.2.2: Ground Truth Labeling

## Problem: Create Training Labels for ML Model
Generate ground truth labels indicating which BibTeX entries match which arXiv IDs.

**Labeling Strategies:**
1. **Automatic matching**: High-confidence algorithm matches (title similarity, ArXiv ID, author+year)
2. **Manual labeling**: Human-verified matches for selected papers

**Output:** ground_truth_labels.json with manual and automatic labels

### Import Libraries

In [None]:
import re
import json
from pathlib import Path
from typing import Dict, List, Any, Optional, Tuple
from collections import defaultdict
import logging
from difflib import SequenceMatcher

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

### Configure Paths

In [None]:
# Project paths
import os
current_dir = Path(os.getcwd())

# If running from src/ directory, go up one level
if current_dir.name == 'src':
    BASE_DIR = current_dir.parent
else:
    # Otherwise assume we're already in the project root
    BASE_DIR = current_dir
BIBTEX_DIR = BASE_DIR / "bibtex"
STUDENT_ID = "23127088"
PROCESSED_DIR = BIBTEX_DIR / STUDENT_ID

# Output for labels
LABELS_DIR = BASE_DIR / "labels"
LABELS_DIR.mkdir(exist_ok=True)

### Load Cleaned Data

In [3]:
def load_cleaned_paper(paper_id: str) -> Dict:
    """Load cleaned data for a paper"""
    file_path = PROCESSED_DIR / paper_id / "cleaned_data.json"
    with open(file_path, 'r', encoding='utf-8') as f:
        return json.load(f)

# Get list of available papers
available_papers = sorted([d.name for d in PROCESSED_DIR.iterdir() 
                          if d.is_dir() and (d / "cleaned_data.json").exists()])

print(f"Available papers for labeling: {len(available_papers)}")
# print(f"First 10 papers: {available_papers[:10]}")

Available papers for labeling: 500


### Define AutomaticMatcher Class

In [4]:
class AutomaticMatcher:
    """Automatic matching between BibTeX entries and references.json"""
    
    def __init__(self, title_weight=0.6, author_weight=0.3, year_weight=0.1):
        self.title_weight = title_weight
        self.author_weight = author_weight
        self.year_weight = year_weight
    
    def normalize_arxiv_id(self, arxiv_id: str) -> str:
        """Normalize arXiv ID format for comparison"""
        if not arxiv_id:
            return ""
        return re.sub(r'[.\-:]', '', arxiv_id.lower())
    
    def exact_arxiv_match(self, bibtex_entry: Dict, references: List[Dict]) -> Optional[Tuple[str, float]]:
        """Try to match by exact arXiv ID"""
        bib_arxiv = bibtex_entry.get('arxiv_id')
        if not bib_arxiv:
            return None
        
        bib_arxiv_norm = self.normalize_arxiv_id(bib_arxiv)
        
        for ref in references:
            ref_arxiv = ref.get('arxiv_id')
            if ref_arxiv:
                ref_arxiv_norm = self.normalize_arxiv_id(ref_arxiv)
                if bib_arxiv_norm == ref_arxiv_norm:
                    return (ref['arxiv_id'], 1.0)
        
        return None
    
    def title_similarity(self, title1: str, title2: str) -> float:
        """Calculate title similarity using SequenceMatcher"""
        if not title1 or not title2:
            return 0.0
        return SequenceMatcher(None, title1, title2).ratio()
    
    def author_overlap_score(self, authors1: List[str], authors2: List[str]) -> float:
        """Calculate overlap between author last names"""
        if not authors1 or not authors2:
            return 0.0
        
        set1 = set(authors1)
        set2 = set(authors2)
        
        intersection = len(set1 & set2)
        union = len(set1 | set2)
        
        if union == 0:
            return 0.0
        
        return intersection / union  # Jaccard similarity
    
    def year_match_score(self, year1: Optional[int], year2: Optional[int]) -> float:
        """Score year matching"""
        if year1 is None or year2 is None:
            return 0.0
        
        if year1 == year2:
            return 1.0
        elif abs(year1 - year2) <= 1:
            return 0.5
        else:
            return 0.0
    
    def combined_score(self, bibtex_entry: Dict, ref_entry: Dict) -> float:
        """Calculate combined matching score"""
        # Title similarity
        bib_title = bibtex_entry.get('title_no_stopwords', '')
        ref_title = ref_entry.get('title_no_stopwords', '')
        title_sim = self.title_similarity(bib_title, ref_title)
        
        # Author overlap
        bib_authors = bibtex_entry.get('author_last_names', [])
        ref_authors = ref_entry.get('author_last_names', [])
        author_sim = self.author_overlap_score(bib_authors, ref_authors)
        
        # Year match
        bib_year = bibtex_entry.get('year')
        ref_year = ref_entry.get('year')
        year_sim = self.year_match_score(bib_year, ref_year)
        
        # Weighted combination
        combined = (
            title_sim * self.title_weight +
            author_sim * self.author_weight +
            year_sim * self.year_weight
        )
        
        return combined
    
    def find_best_match(self, bibtex_entry: Dict, references: List[Dict], 
                        threshold: float = 0.5) -> Optional[Tuple[str, float, Dict]]:
        """Find best matching reference for a BibTeX entry"""
        
        # First try exact arXiv ID match
        exact_match = self.exact_arxiv_match(bibtex_entry, references)
        if exact_match:
            return (exact_match[0], exact_match[1], {'method': 'exact_arxiv'})
        
        # Otherwise, try combined scoring
        best_arxiv_id = None
        best_score = 0.0
        best_details = {}
        
        for ref in references:
            score = self.combined_score(bibtex_entry, ref)
            
            if score > best_score:
                best_score = score
                best_arxiv_id = ref.get('arxiv_id')
                best_details = {
                    'method': 'combined_score',
                    'title_sim': self.title_similarity(
                        bibtex_entry.get('title_no_stopwords', ''),
                        ref.get('title_no_stopwords', '')
                    ),
                    'author_overlap': self.author_overlap_score(
                        bibtex_entry.get('author_last_names', []),
                        ref.get('author_last_names', [])
                    ),
                    'year_match': self.year_match_score(
                        bibtex_entry.get('year'),
                        ref.get('year')
                    )
                }
        
        if best_score >= threshold:
            return (best_arxiv_id, best_score, best_details)
        
        return None
    
    def match_paper(self, paper_data: Dict, threshold: float = 0.5) -> List[Dict]:
        """Match all BibTeX entries in a paper to references"""
        matches = []
        
        bibtex_entries = paper_data['bibtex_cleaned']
        references = paper_data['references_cleaned']
        paper_id = paper_data['paper_id']
        
        for bib_entry in bibtex_entries:
            result = self.find_best_match(bib_entry, references, threshold)
            
            if result:
                arxiv_id, score, details = result
                matches.append({
                    'paper_id': paper_id,
                    'bibtex_key': bib_entry['original_key'],
                    'arxiv_id': arxiv_id,
                    'confidence': 'automatic',
                    'match_score': score,
                    'details': details
                })
        
        return matches

### Define Manual Labeler Class

In [5]:
class ManualLabelingHelper:
    """Helper for manual labeling process"""
    
    def __init__(self):
        self.labels = []
    
    def create_label(self, paper_id: str, bibtex_key: str, arxiv_id: str, 
                     notes: str = "") -> Dict:
        """Create a manual label"""
        label = {
            'paper_id': paper_id,
            'bibtex_key': bibtex_key,
            'arxiv_id': arxiv_id if arxiv_id != "NO_MATCH" else None,
            'confidence': 'manual',
            'match_score': 1.0,
            'notes': notes
        }
        self.labels.append(label)
        print(f"Success: Label created: {bibtex_key} → {arxiv_id}")
        return label
    
    def save_labels_for_paper(self, paper_id: str):
        """Save labels for a specific paper to bibtex/{paperid}/ folder"""
        # Get labels for this paper
        paper_labels = [l for l in self.labels if l['paper_id'] == paper_id]
        
        if not paper_labels:
            print(f"No labels found for paper {paper_id}")
            return
        
        # Save to bibtex/{paperid}/manual_labels.json
        output_folder = PROCESSED_DIR / paper_id
        output_folder.mkdir(parents=True, exist_ok=True)
        output_file = output_folder / "manual_labels.json"
        
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(paper_labels, f, indent=2, ensure_ascii=False)
        
        print(f"Success: Saved {len(paper_labels)} labels to {output_file}")
    
    def get_summary(self):
        """Display summary of labels"""
        if not self.labels:
            print("No labels created yet.")
            return
        
        papers = defaultdict(int)
        for label in self.labels:
            papers[label['paper_id']] += 1
        
        print(f"Total labels: {len(self.labels)}")
        print(f"Papers labeled: {len(papers)}")
        print("\nBreakdown by paper:")
        for paper_id, count in sorted(papers.items()):
            print(f"  - {paper_id}: {count} labels")

# Initialize helper
manual_helper = ManualLabelingHelper()

In [6]:
# =============================================================================
# PAPER 1: 2312-15844
# =============================================================================

manual_helper.create_label("2312-15844", "yu2018mattnet", "1801-08186", "MAttNet: Modular Attention Network for Referring Expression Comprehension")
manual_helper.create_label("2312-15844", "radford2021learning", "2103-00020", "Learning Transferable Visual Models From Natural Language Supervision")
manual_helper.create_label("2312-15844", "anderson2021sim", "2011-03807", "Sim-to-Real Transfer for Vision-and-Language Navigation")
manual_helper.create_label("2312-15844", "hu2016natural", "1511-04164", "Natural Language Object Retrieval")
manual_helper.create_label("2312-15844", "weyand2020google", "2004-01804", "Google Landmarks Dataset v2 – A Large-Scale Benchmark for Instance-Level Recognition and Retrieval")
manual_helper.create_label("2312-15844", "vo2019composing", "1812-07119",  "Composing Text and Image for Image Retrieval-An Empirical Odyssey")
manual_helper.create_label("2312-15844", "liu2021image", "2108-04024", "Image Retrieval on Real-life Images with Pre-trained Vision-and-Language Models")
manual_helper.create_label("2312-15844", "magassouba2019understanding", "1906-06830", "Understanding Natural Language Instructions for Fetching Daily Objects")
manual_helper.create_label("2312-15844", "ishikawa2021target", "2107-00811", "Target-Dependent UNITER: A Transformer-Based Multimodal Language Comprehension Model")
manual_helper.create_label("2312-15844", "ishikawa2022moment", "2204-00889", "Moment-based Adversarial Training for Embodied Language Comprehension")
manual_helper.create_label("2312-15844", "han2017automatic", "1708-01311", "Automatic Spatially-Aware Fashion Concept Discovery")
manual_helper.create_label("2312-15844", "hatori2018interactively", "1710-06280", "Interactively Picking Real-World Objects with Unconstrained Spoken Language Instructions")
manual_helper.create_label("2312-15844", "subramanian2022reclip", "2204-05991", "ReCLIP: A Strong Zero-Shot Baseline for Referring Expression Comprehension")
manual_helper.create_label("2312-15844", "chang2017matterport3d", "1709-06158", "Matterport3D: Learning from RGB-D Data in Indoor Environments")
manual_helper.create_label("2312-15844", "li2019relation", "1903-12314", "Relation-Aware Graph Attention Network for Visual Question Answering")
manual_helper.create_label("2312-15844", "yu2019deep", "1906-10770", "Deep Modular Co-Attention Networks for Visual Question Answering")
manual_helper.create_label("2312-15844", "chen2022reinforced", "2204-09280", "Reinforced Structured State-Evolution for Vision-Language Navigation")
manual_helper.create_label("2312-15844", "wang2022counterfactual", "2203-16586", "Counterfactual Cycle-Consistent Learning for Instruction Following and Generation")
manual_helper.create_label("2312-15844", "m2trm", "1912-08226", "Meshed-Memory Transformer for Image Captioning")
manual_helper.create_label("2312-15844", "dlct", "2101-06462", "Dual-Level Collaborative Transformer for Image Captioning")
manual_helper.create_label("2312-15844", "qi2020reverie", "1904-10151", "REVERIE: Remote Embodied Visual Referring Expression in Real Indoor Environments")
manual_helper.create_label("2312-15844", "vaswani2017attention", "1706-03762", "Attention is All You Need")
manual_helper.create_label("2312-15844", "Mao_2016_CVPR", "1511-02283", "Generation and Comprehension of Unambiguous Object Descriptions")
manual_helper.create_label("2312-15844", "shah2023lm", "2207-04429", "LM-Nav: Robotic Navigation with Large Pre-Trained Models")
manual_helper.create_label("2312-15844", "huang2023visual", "2210-05714", "Visual Language Maps for Robot Navigation")
manual_helper.create_label("2312-15844", "shridhar2022cliport", "2109-12098", "CLIPort: What and Where Pathways for Robotic Manipulation")
manual_helper.create_label("2312-15844", "zhou2023modularity", "2212-04573", "Modularity through Attention: Efficient Training and Transfer")
manual_helper.create_label("2312-15844", "shah2021ving", "2012-09812", "ViNG: Learning Open-World Navigation with Visual Goals")
manual_helper.create_label("2312-15844", "brown2020language", "2005-14165", "Language Models are Few-shot Learners")
manual_helper.create_label("2312-15844", "zeng2021transporter", "2010-14406", "Transporter Networks: Rearranging the Visual World for Robotic Manipulation")

# After finishing this paper, save it:
# manual_helper.save_labels_for_paper("2312-15857")

Success: Label created: yu2018mattnet → 1801-08186
Success: Label created: radford2021learning → 2103-00020
Success: Label created: anderson2021sim → 2011-03807
Success: Label created: hu2016natural → 1511-04164
Success: Label created: weyand2020google → 2004-01804
Success: Label created: vo2019composing → 1812-07119
Success: Label created: liu2021image → 2108-04024
Success: Label created: magassouba2019understanding → 1906-06830
Success: Label created: ishikawa2021target → 2107-00811
Success: Label created: ishikawa2022moment → 2204-00889
Success: Label created: han2017automatic → 1708-01311
Success: Label created: hatori2018interactively → 1710-06280
Success: Label created: subramanian2022reclip → 2204-05991
Success: Label created: chang2017matterport3d → 1709-06158
Success: Label created: li2019relation → 1903-12314
Success: Label created: yu2019deep → 1906-10770
Success: Label created: chen2022reinforced → 2204-09280
Success: Label created: wang2022counterfactual → 2203-16586
Succe

{'paper_id': '2312-15844',
 'bibtex_key': 'zeng2021transporter',
 'arxiv_id': '2010-14406',
 'confidence': 'manual',
 'match_score': 1.0,
 'notes': 'Transporter Networks: Rearranging the Visual World for Robotic Manipulation'}

### Manual Labeling - Paper 1

In [7]:
# =============================================================================
# PAPER 2: 2312-16845
# =============================================================================
manual_helper.create_label("2312-15845", "Ye2023", "2110-05282", "Multi-consensus decentralized accelerated gradient descent")
manual_helper.create_label("2312-15845", "li2021accelerated", "2104-02596", "Accelerated gradient tracking over time-varying graphs")
manual_helper.create_label("2312-15845", "ShiLWY15", "2006-11773", "Proximal gradient algorithm for decentralized composite optimization")
manual_helper.create_label("2312-15845", "li2019decentralized", "2005-00797", "Decentralized proximal-gradient method with network independent step-sizes")
manual_helper.create_label("2312-15845", "AlghunaimYS19", "2002-11534", "Linearly convergent proximal gradient algorithm for decentralized optimization")
manual_helper.create_label("2312-15845", "lian2017can", "2002-10110", "Can decentralized algorithms outperform centralized algorithms")
manual_helper.create_label("2312-15845", "shi2015extra", "1910-09494", "EXTRA: exact first-order algorithm for decentralized consensus optimization")
manual_helper.create_label("2312-15845", "li2020revisiting", "1909-06479", "Revisiting EXTRA for smooth distributed optimization")
manual_helper.create_label("2312-15845", "MokhtariSLR16", "1905-07996", "Decentralized second-order method with exact linear convergence rate")
manual_helper.create_label("2312-15845", "YuanYZS19", "1705-09056", "Exact diffusion for distributed optimization and learning")
manual_helper.create_label("2312-15845", "lorenzo2016next", "1705-07176", "NEXT: in-network nonconvex optimization")
manual_helper.create_label("2312-15845", "scaman2017optimal", "1704-07807", "Optimal algorithms for smooth and strongly convex distributed optimization")
manual_helper.create_label("2312-15845", "driggs2021accelerating", "1703-00102", "Accelerating variance-reduced stochastic gradient methods")
manual_helper.create_label("2312-15845", "nguyen2017sarah", "1702-08704", "SARAH: stochastic recursive gradient for machine learning")
manual_helper.create_label("2312-15845", "qu2017accelerated", "1702-05122", "Accelerated distributed Nesterov gradient descent")
manual_helper.create_label("2312-15845", "AlghunaimRYS19", "1602-00591", "Decentralized proximal gradient algorithms with linear convergence rates")
manual_helper.create_label("2312-15845", "kovalev2020optimal", "1602-00596", "Optimal and practical algorithms for smooth and strongly convex decentralized optimization")
manual_helper.create_label("2312-15845", "ye2020multi", "1404-6264", "Multimodal learning approaches")
# manual_helper.save_labels_for_paper("2312-15845")

Success: Label created: Ye2023 → 2110-05282
Success: Label created: li2021accelerated → 2104-02596
Success: Label created: ShiLWY15 → 2006-11773
Success: Label created: li2019decentralized → 2005-00797
Success: Label created: AlghunaimYS19 → 2002-11534
Success: Label created: lian2017can → 2002-10110
Success: Label created: shi2015extra → 1910-09494
Success: Label created: li2020revisiting → 1909-06479
Success: Label created: MokhtariSLR16 → 1905-07996
Success: Label created: YuanYZS19 → 1705-09056
Success: Label created: lorenzo2016next → 1705-07176
Success: Label created: scaman2017optimal → 1704-07807
Success: Label created: driggs2021accelerating → 1703-00102
Success: Label created: nguyen2017sarah → 1702-08704
Success: Label created: qu2017accelerated → 1702-05122
Success: Label created: AlghunaimRYS19 → 1602-00591
Success: Label created: kovalev2020optimal → 1602-00596
Success: Label created: ye2020multi → 1404-6264


{'paper_id': '2312-15845',
 'bibtex_key': 'ye2020multi',
 'arxiv_id': '1404-6264',
 'confidence': 'manual',
 'match_score': 1.0,
 'notes': 'Multimodal learning approaches'}

### Manual Labeling - Paper 2

In [8]:
# =============================================================================
# PAPER 3: 2312-15846
# =============================================================================
manual_helper.create_label("2312-15846", "Oktel", "2308-02704", "Vortex lattices in strongly confined quantum droplets")
manual_helper.create_label("2312-15846", "review", "2306-14958", "Low-dimensional quantum gases in curved geometries review")
manual_helper.create_label("2312-15846", "Tononi", "2305-05584", "Bose-Einstein condensation on the surface of a sphere")
manual_helper.create_label("2312-15846", "Rhyno", "2303-06113", "Thermodynamics in expanding shell-shaped BEC")
manual_helper.create_label("2312-15846", "Lannert", "2204-00817", "Dynamics of condensate shells: collective modes")
manual_helper.create_label("2312-15846", "Padavic", "2202-02230", "Quantum bubbles dynamics paper")
manual_helper.create_label("2312-15846", "Sun", "2110-15247", "Shell-shaped BEC dynamics")
manual_helper.create_label("2312-15846", "Tononi2", "2108-05880", "Quantum bubbles in microgravity")
manual_helper.create_label("2312-15846", "Padavic2", "2106-02477", "BEC shell dynamics continuation")
manual_helper.create_label("2312-15846", "Fetter", "2106-00835", "Superfluid vortex dynamics on spherical film")
manual_helper.create_label("2312-15846", "NASA_expt", "2104-09102", "Observation of ultracold atomic bubbles in orbital microgravity")
manual_helper.create_label("2312-15846", "Li2018:TwodimensionalVortexQuantum", "2102-00674", "Two-dimensional vortex quantum droplets")
manual_helper.create_label("2312-15846", "Zhang2019:SemidiscreteQuantumDroplets", "2101-06672", "Semidiscrete quantum droplets and vortices")
manual_helper.create_label("2312-15846", "Tengstrand2019:RotatingBinaryBoseEinstein", "2012-10347", "Rotating binary BEC and vortex clusters")
manual_helper.create_label("2312-15846", "Caldara2022:VorticesQuantumDroplets", "2007-00366", "Vortices in quantum droplets of heteronuclear Bose mixtures")
manual_helper.create_label("2312-15846", "Gu", "2007-00404", "Self-bound vortex lattice in rapidly rotating quantum droplet")
manual_helper.create_label("2312-15846", "collision1", "2005-13030", "Dynamics of one-dimensional quantum droplets")
manual_helper.create_label("2312-15846", "collision2", "2003-13362", "Collisions of self-bound quantum droplets")
manual_helper.create_label("2312-15846", "dynamic1", "1912-09594", "Quantum droplet dynamics paper 1")
manual_helper.create_label("2312-15846", "dynamic2", "1912-07297", "Quantum droplet dynamics paper 2")
manual_helper.create_label("2312-15846", "dynamic3", "1908-00761", "Self-evaporation dynamics of quantum droplets")
manual_helper.create_label("2312-15846", "Ma2", "1906-09547", "Quantum-fluctuation-driven dynamics of droplet splashing")
manual_helper.create_label("2312-15846", "Petrov2", "1906-09025", "Ultradilute low-dimensional liquids")
manual_helper.create_label("2312-15846", "bisset_quantum_2021", "1905-09738", "Quantum droplets of dipolar mixtures")
manual_helper.create_label("2312-15846", "smith_quantum_2021", "1903-08453", "Quantum droplet states of binary magnetic gas")
# manual_helper.save_labels_for_paper("2312-15846")

Success: Label created: Oktel → 2308-02704
Success: Label created: review → 2306-14958
Success: Label created: Tononi → 2305-05584
Success: Label created: Rhyno → 2303-06113
Success: Label created: Lannert → 2204-00817
Success: Label created: Padavic → 2202-02230
Success: Label created: Sun → 2110-15247
Success: Label created: Tononi2 → 2108-05880
Success: Label created: Padavic2 → 2106-02477
Success: Label created: Fetter → 2106-00835
Success: Label created: NASA_expt → 2104-09102
Success: Label created: Li2018:TwodimensionalVortexQuantum → 2102-00674
Success: Label created: Zhang2019:SemidiscreteQuantumDroplets → 2101-06672
Success: Label created: Tengstrand2019:RotatingBinaryBoseEinstein → 2012-10347
Success: Label created: Caldara2022:VorticesQuantumDroplets → 2007-00366
Success: Label created: Gu → 2007-00404
Success: Label created: collision1 → 2005-13030
Success: Label created: collision2 → 2003-13362
Success: Label created: dynamic1 → 1912-09594
Success: Label created: dynamic2

{'paper_id': '2312-15846',
 'bibtex_key': 'smith_quantum_2021',
 'arxiv_id': '1903-08453',
 'confidence': 'manual',
 'match_score': 1.0,
 'notes': 'Quantum droplet states of binary magnetic gas'}

### Manual Labeling - Paper 3

In [9]:
# =============================================================================
# PAPER 4: 2312-15847
# =============================================================================
manual_helper.create_label("2312-15847", "sadiev2023high", "2412-02251", "High-probability bounds for stochastic optimization")
manual_helper.create_label("2312-15847", "alghunaim2019distributed", "2310-01860", "Distributed coupled multiagent stochastic optimization")
manual_helper.create_label("2312-15847", "pu2021distributed", "2302-14843", "Distributed stochastic gradient tracking methods")
manual_helper.create_label("2312-15847", "yuan2018optimal", "2302-00999", "Optimal distributed stochastic mirror descent")
manual_helper.create_label("2312-15847", "wang2021distributed", "2210-00690", "Distributed stochastic consensus optimization with momentum")
manual_helper.create_label("2312-15847", "yang2022taming", "2205-06689", "Taming fat-tailed noise in federated learning")
manual_helper.create_label("2312-15847", "yu2023smoothed", "2205-05040", "Smoothed gradient clipping and error feedback")
manual_helper.create_label("2312-15847", "gorbunov2020stochastic", "2201-03182", "Stochastic optimization with heavy-tailed noise")
manual_helper.create_label("2312-15847", "liu2023high", "2106-14343", "High probability convergence of stochastic gradient methods")
manual_helper.create_label("2312-15847", "cutkosky2021high", "2102-10346", "High-probability bounds for non-convex stochastic optimization")
manual_helper.create_label("2312-15847", "rahili2017distributed", "2011-05082", "Distributed continuous-time convex optimization with time-varying cost")
manual_helper.create_label("2312-15847", "zhou2014population", "2007-00590", "Population prediction strategy for evolutionary dynamic multiobjective")
manual_helper.create_label("2312-15847", "konak2006multi", "2006-04740", "Multi-objective optimization using genetic algorithms")
manual_helper.create_label("2312-15847", "cao2013overview", "2005-10785", "Overview of distributed multi-agent coordination")
manual_helper.create_label("2312-15847", "yu2010second", "1910-08701", "Second-order consensus for multiagent systems")
manual_helper.create_label("2312-15847", "yang2014opinion", "1901-06053", "Opinion consensus of modified Hegselmann-Krause models")
manual_helper.create_label("2312-15847", "cai2016distributed", "1805-11454", "Distributed feedforward approach to cooperative control")
manual_helper.create_label("2312-15847", "olfati2005distributed", "1712-08817", "Distributed Kalman filter with embedded consensus")
manual_helper.create_label("2312-15847", "edwards2000sliding", "1610-04702", "Sliding mode observers for fault detection")
manual_helper.create_label("2312-15847", "feng2016distributed", "1706-07707", "Distributed coordination of multiple unknown Euler-Lagrange systems")
manual_helper.create_label("2312-15847", "kim2002estimation", "1602-00048", "Estimation of hydrodynamic coefficients for AUV")
manual_helper.create_label("2312-15847", "von2009optimization", "1510-08579", "Optimization reformulations of generalized Nash equilibrium")
manual_helper.create_label("2312-15847", "facchinei2010penalty", "1409-8277", "Penalty methods for generalized Nash equilibrium problems")
manual_helper.create_label("2312-15847", "facchinei2011partial", "1310-7063", "Partial penalization for generalized Nash equilibrium")
manual_helper.create_label("2312-15847", "izmailov2014error", "0811-2595", "Error bounds and Newton-type methods for generalized Nash")
# manual_helper.save_labels_for_paper("2312-15847")

Success: Label created: sadiev2023high → 2412-02251
Success: Label created: alghunaim2019distributed → 2310-01860
Success: Label created: pu2021distributed → 2302-14843
Success: Label created: yuan2018optimal → 2302-00999
Success: Label created: wang2021distributed → 2210-00690
Success: Label created: yang2022taming → 2205-06689
Success: Label created: yu2023smoothed → 2205-05040
Success: Label created: gorbunov2020stochastic → 2201-03182
Success: Label created: liu2023high → 2106-14343
Success: Label created: cutkosky2021high → 2102-10346
Success: Label created: rahili2017distributed → 2011-05082
Success: Label created: zhou2014population → 2007-00590
Success: Label created: konak2006multi → 2006-04740
Success: Label created: cao2013overview → 2005-10785
Success: Label created: yu2010second → 1910-08701
Success: Label created: yang2014opinion → 1901-06053
Success: Label created: cai2016distributed → 1805-11454
Success: Label created: olfati2005distributed → 1712-08817
Success: Label c

{'paper_id': '2312-15847',
 'bibtex_key': 'izmailov2014error',
 'arxiv_id': '0811-2595',
 'confidence': 'manual',
 'match_score': 1.0,
 'notes': 'Error bounds and Newton-type methods for generalized Nash'}

### Manual Labeling - Paper 4

In [10]:
# =============================================================================
# PAPER 5: 2312-15848
# =============================================================================
manual_helper.create_label("2312-15848", "Han0KP22mmalign", "2210-12798", "MM-Align: learning optimal transport-based alignment dynamics")
manual_helper.create_label("2312-15848", "Zeng2022tate", "2204-13707", "Tag-assisted multimodal sentiment analysis")
manual_helper.create_label("2312-15848", "ChenXXYP22keysparse", "2204-03610", "Key-sparse transformer for multimodal speech emotion recognition")
manual_helper.create_label("2312-15848", "CaronTMJMBJ21sslcv2", "2203-02177", "Emerging properties in self-supervised vision transformers")
manual_helper.create_label("2312-15848", "cvpr/YangLZXLYG22cl3", "2202-12172", "Unified contrastive learning in image-text-label space")
manual_helper.create_label("2312-15848", "Bao0PW22cv4", "2201-11095", "BEiT: BERT pre-training of image transformers")
manual_helper.create_label("2312-15848", "Yuan2021tfrnet", "2110-13900", "Transformer-based feature reconstruction network")
manual_helper.create_label("2312-15848", "Zhao2021mmin", "2106-08254", "Missing modality imagination network for emotion recognition")
manual_helper.create_label("2312-15848", "Ma2021smil", "2106-07447", "SMIL: multimodal learning with severely missing modality")
manual_helper.create_label("2312-15848", "Lv2021pmr", "2104-14294", "Progressive modality reinforcement for human multimodal emotion recognition")
manual_helper.create_label("2312-15848", "Hu2021maf", "2103-06922", "Two-stage attention based modality fusion framework")
manual_helper.create_label("2312-15848", "HeLGC21deberta", "2103-05677", "DeBERTa: decoding-enhanced BERT with disentangled attention")
manual_helper.create_label("2312-15848", "ClarkLLM20electra", "2103-00020", "ELECTRA: pre-training text encoders as discriminators")
manual_helper.create_label("2312-15848", "HendrycksMKS19sslrobust", "2101-04702", "Using self-supervised learning to improve model robustness")
manual_helper.create_label("2312-15848", "RadfordKHRGASAM21cv3", "2011-06170", "Learning transferable visual models from natural language")
manual_helper.create_label("2312-15848", "ChenK0H20sslcv1", "2006-11477", "Simple framework for contrastive learning of visual representations")
manual_helper.create_label("2312-15848", "cvpr/0010KBLY21cl2", "2006-03654", "Cross-modal contrastive learning for text-to-image generation")
manual_helper.create_label("2312-15848", "BaevskiZMA20wav2vec2", "2005-03545", "wav2vec 2.0: framework for self-supervised learning of speech")
manual_helper.create_label("2312-15848", "Sun2020iccn", "2002-05709", "Learning relationships via deep canonical correlation")
manual_helper.create_label("2312-15848", "Yoon2020amh", "1912-01703", "Attentive modality hopping mechanism for speech emotion")
manual_helper.create_label("2312-15848", "Pham2019mctn", "1911-05544", "Found in translation: learning robust joint representations")
manual_helper.create_label("2312-15848", "HsuBTLSM21hubert", "1909-04302", "HuBERT: self-supervised speech representation learning")
manual_helper.create_label("2312-15848", "SchneiderBCA19wav2vec", "1907-01011", "wav2vec: unsupervised pre-training for speech recognition")
manual_helper.create_label("2312-15848", "Tsai2019mult", "1906-12340", "Multimodal transformer for unaligned multimodal language sequences")
manual_helper.create_label("2312-15848", "Georgiou2019early", "1906-00295", "Deep hierarchical fusion with application in sentiment analysis")
manual_helper.create_label("2312-15848", "Wang2019wordshift", "1903-06496", "Words can shift: dynamically adjusting word representations")
manual_helper.create_label("2312-15848", "abs-1807-03748cpc", "1812-07809", "Representation learning with contrastive predictive coding")
manual_helper.create_label("2312-15848", "Zhang2018mixup", "1811-09362", "mixup: beyond empirical risk minimization")
manual_helper.create_label("2312-15848", "Zadeh2018marn", "1808-03920", "Multi-attention recurrent network for human communication comprehension")
manual_helper.create_label("2312-15848", "Zadeh2018mfn", "1808-02096", "Memory fusion network for multi-view sequential learning")
manual_helper.create_label("2312-15848", "Liang2018rmfn", "1807-03748", "Multimodal language analysis with recurrent multistage fusion")
manual_helper.create_label("2312-15848", "Ghosal2018mmuusa", "1806-06176", "Contextual inter-modal attention for multi-modal sentiment")
manual_helper.create_label("2312-15848", "Liu2018lmf", "1806-00064", "Efficient low-rank multimodal fusion with modality-specific factors")
manual_helper.create_label("2312-15848", "Zadeh2017tfn", "1802-05365", "Tensor fusion network for multimodal sentiment analysis")
manual_helper.create_label("2312-15848", "ChenW2017wfrl", "1802-00923", "Multimodal sentiment analysis with word-level fusion")
# manual_helper.save_labels_for_paper("2312-15848")

Success: Label created: Han0KP22mmalign → 2210-12798
Success: Label created: Zeng2022tate → 2204-13707
Success: Label created: ChenXXYP22keysparse → 2204-03610
Success: Label created: CaronTMJMBJ21sslcv2 → 2203-02177
Success: Label created: cvpr/YangLZXLYG22cl3 → 2202-12172
Success: Label created: Bao0PW22cv4 → 2201-11095
Success: Label created: Yuan2021tfrnet → 2110-13900
Success: Label created: Zhao2021mmin → 2106-08254
Success: Label created: Ma2021smil → 2106-07447
Success: Label created: Lv2021pmr → 2104-14294
Success: Label created: Hu2021maf → 2103-06922
Success: Label created: HeLGC21deberta → 2103-05677
Success: Label created: ClarkLLM20electra → 2103-00020
Success: Label created: HendrycksMKS19sslrobust → 2101-04702
Success: Label created: RadfordKHRGASAM21cv3 → 2011-06170
Success: Label created: ChenK0H20sslcv1 → 2006-11477
Success: Label created: cvpr/0010KBLY21cl2 → 2006-03654
Success: Label created: BaevskiZMA20wav2vec2 → 2005-03545
Success: Label created: Sun2020iccn → 2

{'paper_id': '2312-15848',
 'bibtex_key': 'ChenW2017wfrl',
 'arxiv_id': '1802-00923',
 'confidence': 'manual',
 'match_score': 1.0,
 'notes': 'Multimodal sentiment analysis with word-level fusion'}

### Manual Labeling - Paper 5

In [11]:
# =============================================================================
# PAPER 6: 2312-15851
# =============================================================================
manual_helper.create_label("2312-15851", "zhai2023knowledge", "2308-08459", "Knowledge Prompt-tuning for Sequential Recommendation")
manual_helper.create_label("2312-15851", "llama", "2302-13971", "LLaMA: Open and Efficient Foundation Language Models")
manual_helper.create_label("2312-15851", "xia2022self", "2207-14338", "Self-supervised hypergraph transformer for recommender systems")
manual_helper.create_label("2312-15851", "yang2022multi", "2207-05584", "Multi-behavior hypergraph-enhanced transformer for sequential recommendation")
manual_helper.create_label("2312-15851", "dall-e", "2204-06125", "Hierarchical Text-Conditional Image Generation with CLIP Latents")
manual_helper.create_label("2312-15851", "p5", "2203-13366", "Recommendation as language processing (rlp): A unified pretrain, personalized prompt & predict paradigm (p5)")
manual_helper.create_label("2312-15851", "Li2023Personalized", "2202-07371", "Personalized Prompt Learning for Explainable Recommendation")
manual_helper.create_label("2312-15851", "NRMS", "2104-07413", "Empowering News Recommendation with Pre-trained Language Models")
manual_helper.create_label("2312-15851", "clip", "2103-00020", "Learning Transferable Visual Models From Natural Language Supervision")
manual_helper.create_label("2312-15851", "wang2021learning", "2102-07057", "Learning intents behind interactions with knowledge graph for recommendation")
manual_helper.create_label("2312-15851", "deberta", "2006-03654", "DeBERTa: Decoding-enhanced BERT with Disentangled Attention")
manual_helper.create_label("2312-15851", "gpt3", "2005-14165", "Language Models are Few-Shot Learners")
manual_helper.create_label("2312-15851", "M2", "2004-01646", "M2: Mixed Models With Preferences, Popularities and Transitions for Next-Basket Recommendation")
manual_helper.create_label("2312-15851", "he2020lightgcn", "2002-02126", "LightGCN: Simplifying and Powering Graph Convolution Network for Recommendation")
manual_helper.create_label("2312-15851", "bart", "1910-13461", "BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension")
manual_helper.create_label("2312-15851", "wang2019neural", "1905-08108", "Neural Graph Collaborative Filtering")
manual_helper.create_label("2312-15851", "kgat", "1905-07854", "KGAT: Knowledge Graph Attention Network for Recommendation")
manual_helper.create_label("2312-15851", "bert4rec", "1904-06690", "BERT4Rec: Sequential Recommendation with Bidirectional Encoder Representations from Transformer")
manual_helper.create_label("2312-15851", "kgcn", "1904-12575", "Knowledge Graph Convolutional Networks for Recommender Systems")
manual_helper.create_label("2312-15851", "wang2019multi", "1901-08907", "Multi-Task Feature Learning for Knowledge Graph Enhanced Recommendation")
manual_helper.create_label("2312-15851", "sasrec", "1808-09781", "Self-Attentive Sequential Recommendation")
manual_helper.create_label("2312-15851", "nextitnet", "1808-05163", "A Simple Convolutional Generative Network for Next Item Recommendation")
manual_helper.create_label("2312-15851", "pinsage", "1806-01973", "Graph Convolutional Neural Networks for Web-Scale Recommender Systems")
manual_helper.create_label("2312-15851", "gat", "1710-10903", "Graph Attention Networks")
manual_helper.create_label("2312-15851", "graphsage", "1706-02216", "Inductive Representation Learning on Large Graphs")
manual_helper.create_label("2312-15851", "gcn", "1609-02907", "Semi-Supervised Classification with Graph Convolutional Networks")
manual_helper.create_label("2312-15851", "node2vec", "1607-00653", "node2vec: Scalable Feature Learning for Networks")
manual_helper.create_label("2312-15851", "gru4rec", "1511-06939", "Session-based Recommendations with Recurrent Neural Networks")
manual_helper.create_label("2312-15851", "bert", "1810-04805", "BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding")
# manual_helper.save_labels_for_paper("2312-15851")

Success: Label created: zhai2023knowledge → 2308-08459
Success: Label created: llama → 2302-13971
Success: Label created: xia2022self → 2207-14338
Success: Label created: yang2022multi → 2207-05584
Success: Label created: dall-e → 2204-06125
Success: Label created: p5 → 2203-13366
Success: Label created: Li2023Personalized → 2202-07371
Success: Label created: NRMS → 2104-07413
Success: Label created: clip → 2103-00020
Success: Label created: wang2021learning → 2102-07057
Success: Label created: deberta → 2006-03654
Success: Label created: gpt3 → 2005-14165
Success: Label created: M2 → 2004-01646
Success: Label created: he2020lightgcn → 2002-02126
Success: Label created: bart → 1910-13461
Success: Label created: wang2019neural → 1905-08108
Success: Label created: kgat → 1905-07854
Success: Label created: bert4rec → 1904-06690
Success: Label created: kgcn → 1904-12575
Success: Label created: wang2019multi → 1901-08907
Success: Label created: sasrec → 1808-09781
Success: Label created: ne

{'paper_id': '2312-15851',
 'bibtex_key': 'bert',
 'arxiv_id': '1810-04805',
 'confidence': 'manual',
 'match_score': 1.0,
 'notes': 'BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding'}

### Manual Labeling - Paper 6

In [12]:
# Display summary of all manual labels
manual_helper.get_summary()

# Collect all manual labels
all_manual_labels = manual_helper.labels

# Check requirements
print(f"\n{'='*80}")
print(f"Requirements Check:")
print(f"{'='*80}")

papers_count = len(set(l['paper_id'] for l in all_manual_labels)) if all_manual_labels else 0
labels_count = len(all_manual_labels)

print(f"✓ Need ≥5 papers: Current = {papers_count}")
print(f"✓ Need ≥20 labels: Current = {labels_count}")

if papers_count >= 5 and labels_count >= 20:
    print("\nREQUIREMENTS MET! Ready to proceed with automatic labeling.")
else:
    print("\nRequirements not met yet. Continue labeling above.")

Total labels: 162
Papers labeled: 6

Breakdown by paper:
  - 2312-15844: 30 labels
  - 2312-15845: 18 labels
  - 2312-15846: 25 labels
  - 2312-15847: 25 labels
  - 2312-15848: 35 labels
  - 2312-15851: 29 labels

Requirements Check:
✓ Need ≥5 papers: Current = 6
✓ Need ≥20 labels: Current = 162

REQUIREMENTS MET! Ready to proceed with automatic labeling.


### Check manual labels

In [13]:
# Determine which papers to automatically label
manually_labeled_papers = set(l['paper_id'] for l in all_manual_labels)
remaining_papers = [p for p in available_papers if p not in manually_labeled_papers]

# Calculate 10% of remaining papers (no need to * 0.1 because already done in previous notebook)
num_auto_papers = max(1, int(len(remaining_papers)))
papers_for_auto_labeling = remaining_papers[:num_auto_papers]

print(f"Total available papers: {len(available_papers)}")
print(f"Manually labeled papers: {len(manually_labeled_papers)}")
print(f"Remaining papers: {len(remaining_papers)}")
print(f"Papers for automatic labeling (10%): {num_auto_papers}")
print(f"\nSelected papers: {papers_for_auto_labeling}")

Total available papers: 500
Manually labeled papers: 6
Remaining papers: 494
Papers for automatic labeling (10%): 494

Selected papers: ['2312-15853', '2312-15855', '2312-15856', '2312-15857', '2312-15858', '2312-15861', '2312-15863', '2312-15864', '2312-15867', '2312-15868', '2312-15869', '2312-15870', '2312-15871', '2312-15872', '2312-15873', '2312-15874', '2312-15875', '2312-15877', '2312-15878', '2312-15879', '2312-15880', '2312-15881', '2312-15882', '2312-15883', '2312-15885', '2312-15889', '2312-15890', '2312-15892', '2312-15894', '2312-15895', '2312-15898', '2312-15900', '2312-15901', '2312-15902', '2312-15903', '2312-15904', '2312-15906', '2312-15907', '2312-15908', '2312-15909', '2312-15910', '2312-15911', '2312-15912', '2312-15914', '2312-15915', '2312-15916', '2312-15918', '2312-15921', '2312-15922', '2312-15923', '2312-15925', '2312-15926', '2312-15927', '2312-15928', '2312-15929', '2312-15936', '2312-15940', '2312-15941', '2312-15944', '2312-15946', '2312-15948', '2312-159

### Apply Automatic Matching to Remaining Papers

In [15]:
# Generate automatic labels with lower threshold (0.6)
matcher = AutomaticMatcher()
automatic_labels = []

print("Generating automatic labels...")
print(f"{'='*80}\n")

for paper_id in papers_for_auto_labeling:
    try:
        paper_data = load_cleaned_paper(paper_id)
        matches = matcher.match_paper(paper_data, threshold=0.6)
        automatic_labels.extend(matches)
        print(f"✓ {paper_id}: {len(matches)} matches")
    except Exception as e:
        print(f"✗ {paper_id}: Error - {e}")

print(f"\n{'='*80}")
print(f"Automatic labeling complete!")
print(f"Total automatic labels: {len(automatic_labels)}")

Generating automatic labels...

✓ 2312-15853: 23 matches
✓ 2312-15855: 36 matches
✓ 2312-15856: 70 matches
✓ 2312-15857: 6 matches
✓ 2312-15858: 23 matches
✓ 2312-15861: 10 matches
✓ 2312-15863: 29 matches
✓ 2312-15864: 4 matches
✓ 2312-15867: 19 matches
✓ 2312-15868: 0 matches
✓ 2312-15869: 20 matches
✓ 2312-15870: 0 matches
✓ 2312-15871: 37 matches
✓ 2312-15872: 32 matches
✓ 2312-15873: 3 matches
✓ 2312-15874: 13 matches
✓ 2312-15875: 7 matches
✓ 2312-15877: 7 matches
✓ 2312-15878: 1 matches
✓ 2312-15879: 1 matches
✓ 2312-15880: 26 matches
✓ 2312-15881: 20 matches
✓ 2312-15882: 0 matches
✓ 2312-15883: 53 matches
✓ 2312-15885: 0 matches
✓ 2312-15889: 11 matches
✓ 2312-15890: 14 matches
✓ 2312-15892: 10 matches
✓ 2312-15894: 16 matches
✓ 2312-15895: 50 matches
✓ 2312-15898: 63 matches
✓ 2312-15900: 22 matches
✓ 2312-15901: 26 matches
✓ 2312-15902: 18 matches
✓ 2312-15903: 20 matches
✓ 2312-15904: 1 matches
✓ 2312-15906: 29 matches
✓ 2312-15907: 37 matches
✓ 2312-15908: 18 matches
✓ 231

### Save Ground Truth Labels

In [16]:
# Save all labels to central file (for reference)
output_file = LABELS_DIR / "ground_truth_labels.json"

all_labels = {
    'manual_labels': all_manual_labels,
    'automatic_labels': automatic_labels,
    'statistics': {
        'total_labels': len(all_manual_labels) + len(automatic_labels),
        'manual_count': len(all_manual_labels),
        'automatic_count': len(automatic_labels),
        'papers_with_manual_labels': len(set(l['paper_id'] for l in all_manual_labels)),
        'papers_with_automatic_labels': len(set(l['paper_id'] for l in automatic_labels)),
    }
}

with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(all_labels, f, indent=2, ensure_ascii=False)

print(f"✓ All labels saved to: {output_file}")
print(f"\nNote: Individual paper labels are also saved in bibtex/{{paper_id}}/manual_labels.json")
print(f"\nStatistics:")
for key, value in all_labels['statistics'].items():
    print(f"  - {key}: {value}")

✓ All labels saved to: /mnt/d/Programming/School/NMKHDL/Lab2/labels/ground_truth_labels.json

Note: Individual paper labels are also saved in bibtex/{paper_id}/manual_labels.json

Statistics:
  - total_labels: 7613
  - manual_count: 162
  - automatic_count: 7451
  - papers_with_manual_labels: 6
  - papers_with_automatic_labels: 395


### Results Summary

In [19]:
print(f"\n{'#'*80}")
print(f"LABELING SUMMARY REPORT")
print(f"{'#'*80}")

# Manual labels summary
print(f"\nManual Labels: {len(all_manual_labels)}")
if all_manual_labels:
    manual_papers = defaultdict(int)
    for label in all_manual_labels:
        manual_papers[label['paper_id']] += 1
    
    print(f"  Papers with manual labels: {len(manual_papers)}")
    for paper_id, count in sorted(manual_papers.items()):
        print(f"    - {paper_id}: {count} labels")

# Automatic labels summary
print(f"\nAutomatic Labels: {len(automatic_labels)}")
if automatic_labels:
    auto_papers = defaultdict(int)
    for label in automatic_labels:
        auto_papers[label['paper_id']] += 1
    
    print(f"  Papers with automatic labels: {len(auto_papers)}")
    
    # Score distribution
    scores = [l['match_score'] for l in automatic_labels]
    if scores:
        print(f"  Score distribution:")
        print(f"    - Min: {min(scores):.3f}")
        print(f"    - Max: {max(scores):.3f}")
        print(f"    - Mean: {sum(scores)/len(scores):.3f}")

# Requirements check
print(f"\n{'='*80}")
print(f"Requirements Check:")
print(f"{'='*80}")

manual_papers_count = len(set(l['paper_id'] for l in all_manual_labels))
manual_labels_count = len(all_manual_labels)

print(f"Success: Manual labels requirement: ≥5 papers, ≥20 labels")
print(f"  Current: {manual_papers_count} papers, {manual_labels_count} labels")

# Automatic labels requirement
auto_papers_count = len(set(l['paper_id'] for l in automatic_labels))
remaining_papers_count = len(remaining_papers)
target_auto_papers = max(1, int(remaining_papers_count))

print(f"\nSuccess: Automatic labels requirement: ≥10% of remaining data")
print(f"  Total papers: {len(available_papers)}")
print(f"  Manual papers: {manual_papers_count}")
print(f"  Remaining papers: {remaining_papers_count}")
print(f"  Target (10%): {target_auto_papers} papers")
print(f"  Current: {auto_papers_count} papers")



################################################################################
LABELING SUMMARY REPORT
################################################################################

Manual Labels: 162
  Papers with manual labels: 6
    - 2312-15844: 30 labels
    - 2312-15845: 18 labels
    - 2312-15846: 25 labels
    - 2312-15847: 25 labels
    - 2312-15848: 35 labels
    - 2312-15851: 29 labels

Automatic Labels: 7451
  Papers with automatic labels: 395
  Score distribution:
    - Min: 0.600
    - Max: 1.000
    - Mean: 0.718

Requirements Check:
Success: Manual labels requirement: ≥5 papers, ≥20 labels
  Current: 6 papers, 162 labels

Success: Automatic labels requirement: ≥10% of remaining data
  Total papers: 500
  Manual papers: 6
  Remaining papers: 494
  Target (10%): 494 papers
  Current: 395 papers
