In [11]:
def extract_introduction_text(content_json):
    """
    Safely extract the '1 INTRODUCTION' section from a paper content.json.
    """
    # Try 'metadata' block first, fallback to root
    sections = []

    if "metadata" in content_json and isinstance(content_json["metadata"], dict):
        sections = content_json["metadata"].get("sections", [])
    else:
        sections = content_json.get("sections", [])

    for section in sections:
        heading = section.get("heading")
        if heading:
            heading_clean = heading.strip().upper()
            if heading_clean.startswith("1 INTRODUCTION"):
                return section.get("text", "")

    return ""  # return empty string if not found


In [26]:
import os
import json
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss

# Load Sentence-BERT model
model = SentenceTransformer("all-MiniLM-L6-v2")

# Paths

base_dir = "../data/dataset"
conferences = [
    "ICLR_2017", "ICLR_2018", "ICLR_2019", "ICLR_2020",
    "NIPS_2016", "NIPS_2017", "NIPS_2018", "NIPS_2019"
]

titles = []
paper_ids = []
title_intro_texts = []
paper_decisions = {}
review_scores = {}

for conf in conferences:
    paper_dir = os.path.join(base_dir, f"{conf}/{conf}_paper")
    content_dir = os.path.join(base_dir, f"{conf}/{conf}_content")

    for fname in os.listdir(paper_dir):
        if fname.endswith(".json"):
            with open(os.path.join(paper_dir, fname), "r", encoding="utf-8") as f:
                paper = json.load(f)
                pid = paper.get("id")
                title = paper.get("title", "")
                decision = paper.get("decision", "Unknown")

                # Load introduction section
                intro_text = ""
                content_path = os.path.join(content_dir, f"{pid}.json")
                if os.path.exists(content_path):
                    with open(content_path, "r", encoding="utf-8") as cfile:
                        content = json.load(cfile)
                        intro_text = extract_introduction_text(content)

                if pid and title:
                    titles.append(title)
                    paper_ids.append(pid)
                    paper_decisions[pid] = decision
                    title_intro_texts.append(f"Title: {title}\n\nIntroduction: {intro_text}")


In [27]:
# Generate new embeddings
title_intro_embeddings = model.encode(title_intro_texts, convert_to_tensor=False)
title_intro_embeddings_np = np.array(title_intro_embeddings, dtype='float32')

# Create new FAISS index
index = faiss.IndexFlatL2(title_intro_embeddings_np.shape[1])
index.add(title_intro_embeddings_np)


In [29]:
review_scores = {}

for conf in conferences:
    review_dir = os.path.join(base_dir, f"{conf}/{conf}_review")

    for fname in os.listdir(review_dir):
        if fname.endswith(".json"):
            with open(os.path.join(review_dir, fname), "r", encoding="utf-8") as f:
                data = json.load(f)
                pid = data.get("id")
                reviews = data.get("reviews", [])

                ratings, confidences = [], []

                for review in reviews:
                    try:
                        ratings.append(int(review.get("rating", "").split(":")[0]))
                        confidences.append(int(review.get("confidence", "").split(":")[0]))
                    except:
                        continue

                if ratings:
                    review_scores[pid] = {
                        "avg_rating": round(sum(ratings) / len(ratings), 2),
                        "avg_confidence": round(sum(confidences) / len(confidences), 2) if confidences else None,
                        "review_count": len(ratings)
                    }


In [43]:
def normalize_decision(raw_decision):
    """
    Normalize decision string into one of: 'Accept', 'Reject', or 'Other'.
    - Accept if it starts with 'Accept'
    - Reject if it starts with 'Reject'
    - Otherwise, Other
    """
    if not raw_decision:
        return "Other"

    lower_decision = raw_decision.strip().lower()
    if lower_decision.startswith("accept"):
        return "Accept"
    elif lower_decision.startswith("reject"):
        return "Reject"
    else:
        return "Other"

In [44]:
def estimate_weighted_decision_and_scores(query_text, k=10):
    query_vec = model.encode(query_text, convert_to_tensor=False)
    query_vec_np = np.array([query_vec], dtype='float32')
    D, I = index.search(query_vec_np, k + 1)
    top_k_indices = I[0][1:]
    distances = D[0][1:]

    # Compute weights (softmax over negative distance)
    similarities = -distances
    weights = np.exp(similarities) / np.sum(np.exp(similarities))

    decision_weights = {}
    weighted_rating = 0
    weighted_conf = 0
    weight_total_rating = 0
    weight_total_conf = 0

    for i, idx in enumerate(top_k_indices):
        pid = paper_ids[idx]
        decision = normalize_decision(paper_decisions.get(pid, "Unknown"))
        weight = weights[i]

        # Weight decision
        decision_weights[decision] = decision_weights.get(decision, 0) + weight

        # Weight scores
        review = review_scores.get(pid, {})
        rating = review.get("avg_rating")
        confidence = review.get("avg_confidence")

        if rating is not None:
            weighted_rating += weight * rating
            weight_total_rating += weight

        if confidence is not None:
            weighted_conf += weight * confidence
            weight_total_conf += weight

    # Pick decision with highest total weight
    estimated_decision = max(decision_weights.items(), key=lambda x: x[1])[0]

    est_rating = round(weighted_rating / weight_total_rating, 2) if weight_total_rating else None
    est_conf = round(weighted_conf / weight_total_conf, 2) if weight_total_conf else None

    return {
        "estimated_decision": estimated_decision,
        "estimated_rating": est_rating,
        "estimated_confidence": est_conf,
        "decision_distribution": decision_weights
    }


In [45]:
from collections import Counter

def evaluate_accuracy_with_intro(n=100, k=10):
    correct = 0
    total = 0
    confusion = Counter()

    for i in range(min(n, len(titles))):
        query_text = title_intro_texts[i]
        query_pid = paper_ids[i]
        true_decision = normalize_decision(paper_decisions.get(query_pid, "Unknown"))

        result = estimate_weighted_decision_and_scores(query_text, k)
        pred_decision = result["estimated_decision"]

        if pred_decision == true_decision:
            correct += 1
        confusion[(true_decision, pred_decision)] += 1
        total += 1

    accuracy = correct / total if total > 0 else 0

    print(f"\n✅ Initial Results Accuracy (title+intro, top-{k}): {accuracy:.2f}")
    print("\n📊 Confusion Matrix (True → Predicted):")
    for (true, pred), count in confusion.items():
        print(f"{true:25s} → {pred:25s}: {count}")

    return accuracy, confusion


In [46]:
def evaluate_single(i, k=10):
    query_text = title_intro_texts[i]
    query_pid = paper_ids[i]
    true_decision = paper_decisions.get(query_pid, "Unknown")
    query_title = titles[i]

    # Estimate using weighted method
    result = estimate_weighted_decision_and_scores(query_text, k)
    pred_decision = result["estimated_decision"]
    decision_weights = result["decision_distribution"]

    print(f"\n🔍 Original Paper [{query_pid}]:")
    print(f"Title: {query_title}")
    print(f"True Decision: {true_decision}")
    print(f"Predicted Decision: {pred_decision}")
    print("\n📋 Similar Papers and Their Decisions:")

    # Retrieve nearest neighbors and distances
    query_vec = model.encode(query_text, convert_to_tensor=False)
    query_vec_np = np.array([query_vec], dtype='float32')
    D, I = index.search(query_vec_np, k + 1)
    top_k_indices = I[0][1:]  # skip self
    distances = D[0][1:]

    similarities = -distances
    weights = np.exp(similarities) / np.sum(np.exp(similarities))

    for rank, (idx, weight) in enumerate(zip(top_k_indices, weights), start=1):
        pid = paper_ids[idx]
        title = titles[idx]
        decision = paper_decisions.get(pid, "Unknown")
        print(f"{rank}. {title} [{pid}]")
        print(f"   Decision: {decision} | Weight: {weight:.3f}")

    print("\n📊 Weighted Decision Totals:")
    for label, w in sorted(decision_weights.items(), key=lambda x: -x[1]):
        print(f"{label}: {w:.3f}")


In [47]:
evaluate_single(12,5)


🔍 Original Paper [ICLR_2017_111]:
Title: Trusting SVM for Piecewise Linear CNNs
True Decision: Accept (Poster)
Predicted Decision: Accept

📋 Similar Papers and Their Decisions:
1. A Unified View of Piecewise Linear Neural Network Verification [NIPS_2018_442]
   Decision: Accept | Weight: 0.229
2. Piecewise Linear Neural Networks verification: A comparative study [ICLR_2018_713]
   Decision: Reject | Weight: 0.228
3. Piecewise Strong Convexity of Neural Networks [NIPS_2019_1163]
   Decision: Accept | Weight: 0.187
4. SVCCA: Singular Vector Canonical Correlation Analysis for Deep Learning Dynamics and Interpretability [NIPS_2017_582]
   Decision: Accept | Weight: 0.178
5. Cautious Deep Learning [ICLR_2019_1140]
   Decision: Reject | Weight: 0.178

📊 Weighted Decision Totals:
Accept: 0.594
Reject: 0.406


In [49]:
evaluate_accuracy_with_intro(n=len(titles), k=10)



✅ Initial Results Accuracy (title+intro, top-10): 0.60

📊 Confusion Matrix (True → Predicted):
Accept                    → Reject                   : 1301
Accept                    → Accept                   : 4088
Other                     → Accept                   : 92
Other                     → Reject                   : 44
Reject                    → Accept                   : 2126
Reject                    → Reject                   : 1207


(0.5977647324452472,
 Counter({('Accept', 'Accept'): 4088,
          ('Reject', 'Accept'): 2126,
          ('Accept', 'Reject'): 1301,
          ('Reject', 'Reject'): 1207,
          ('Other', 'Accept'): 92,
          ('Other', 'Reject'): 44}))