In [1]:
%pip install -U transformers

Note: you may need to restart the kernel to use updated packages.


In [1]:
%pip install ipywidgets

from huggingface_hub import login
login("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")

Note: you may need to restart the kernel to use updated packages.


In [2]:
# Load model directly
%pip install hf_xet

Note: you may need to restart the kernel to use updated packages.


In [3]:
%pip install torch torchvision transformers accelerate bitsandbytes pandas openpyxl


Note: you may need to restart the kernel to use updated packages.


In [4]:
import pandas as pd

# Load Excel
df = pd.read_excel("tp_2017conference.xlsx")

# Drop rows missing title/abstract/review
df = df.dropna(subset=["title", "abstract", "review"])

# Clean abstract field
df["abstract"] = df["abstract"].str.replace("Abstract:###", "", regex=False).str.strip()

# Deduplicate by title (merge reviews)
grouped = df.groupby("title").agg({
    "abstract": "first",  # assume same abstract
    "review": lambda r: "\n\n".join(r),  # concat reviews
    "rate": list,
    "confidence": list,
    "decision": "first"
}).reset_index()


In [7]:
from sklearn.model_selection import train_test_split

train_val, test = train_test_split(grouped, test_size=0.2, random_state=42)
train, val = train_test_split(train_val, test_size=0.1, random_state=42)

# Save for future use
train.to_csv("train.csv", index=False)
val.to_csv("val.csv", index=False)
test.to_csv("test.csv", index=False)

In [6]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch

model_name = "meta-llama/Llama-3.2-3B-Instruct"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",  # 👈 now this will choose GPU
    quantization_config=bnb_config,
    torch_dtype=torch.float16
)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

ZERO-SHOT BASELINE EVAL:
Here’s the template structure you should follow (based on the format Llama models expect):

prompt = f"""<|begin_of_text|><|start_header_id|>user<|end_header_id|>
Title: {paper_title}
Abstract: {paper_abstract}
What are the main strengths and weaknesses of this paper?<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>"""


In training, your target (label) will be the review_summary (or the review)

First you need to clean the dataset to deduplicate the papers (group by title, take first abstract, and merge reviews)

1. Generate Zero-Shot Predictions
Use your cleaned dataset to prompt the base model without fine-tuning, and generate its assessment for each paper.

In [4]:
def generate_zero_shot(paper):
    prompt = f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n"
    prompt += f"Title: {paper['title']}\nAbstract: {paper['abstract']}\n"
    prompt += "What are the strengths and weaknesses of this paper?<|eot_id|>\n"
    prompt += "<|start_header_id|>assistant<|end_header_id|>\n"

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.inference_mode():
        outputs = model.generate(
            **inputs,
            max_new_tokens=256,
            pad_token_id=tokenizer.eos_token_id,
            do_sample=True,
            temperature=0.7,
            top_p=0.9
        )

    decoded = tokenizer.decode(outputs[0], skip_special_tokens=False)
    response = decoded.split("<|start_header_id|>assistant<|end_header_id|>\n")[-1]
    return response.strip()


In [None]:
import pandas as pd
import os
import gc

OUTPUT_PATH = "zero_shot_predictions.csv"
BATCH_SIZE = 20

# Resume from where we left off if needed
if os.path.exists(OUTPUT_PATH):
    done_df = pd.read_csv(OUTPUT_PATH)
    processed_titles = set(done_df["title"].tolist())
    grouped_remaining = grouped[~grouped["title"].isin(processed_titles)]
    print(f"🔁 Resuming from checkpoint: {len(done_df)} already processed.")
else:
    done_df = pd.DataFrame()
    grouped_remaining = grouped
    print(f"🆕 Starting fresh: {len(grouped)} papers.")

# Loop through batches
total = len(grouped_remaining)
for i in range(0, total, BATCH_SIZE):
    batch = grouped_remaining.iloc[i:i + BATCH_SIZE].copy()
    print(f"\n🚀 Batch {i}-{i+len(batch)-1}/{total}")

    batch_results = []
    for idx, row in batch.iterrows():
        try:
            print(f"   🔹 {row['title'][:60]}...")
            review = generate_zero_shot(row)
        except Exception as e:
            review = f"[ERROR: {e}]"
        batch_results.append(review)
        gc.collect()

    batch["zero_shot_review"] = batch_results

    # Save and update
    done_df = pd.concat([done_df, batch], ignore_index=True)
    done_df.to_csv(OUTPUT_PATH, index=False)
    print(f"💾 Progress saved to {OUTPUT_PATH}")

print("\n✅ All zero-shot reviews generated and saved.")


🆕 Starting fresh: 489 papers.

🚀 Batch 0-19/489
   🔹 #Exploration: A Study of Count-Based Exploration for Deep Re...
   🔹 A Baseline for Detecting Misclassified and Out-of-Distributi...
   🔹 A Compare-Aggregate Model for Matching Text Sequences | Open...
   🔹 A Compositional Object-Based Approach to Learning Physical D...
   🔹 A Context-aware Attention Network for Interactive Question A...
   🔹 A Convolutional Encoder Model for Neural Machine Translation...
   🔹 A Deep Learning Approach for Joint Video Frame and Reward Pr...
   🔹 A Differentiable Physics Engine for Deep Learning in Robotic...
   🔹 A Joint Many-Task Model: Growing a Neural Network for Multip...
   🔹 A Learned Representation For Artistic Style | OpenReview...
   🔹 A Neural Knowledge Language Model | OpenReview...
   🔹 A Neural Stochastic Volatility Model | OpenReview...
   🔹 A STRUCTURED SELF-ATTENTIVE SENTENCE EMBEDDING | OpenReview...
   🔹 A Simple but Tough-to-Beat Baseline for Sentence Embeddings ...
   🔹 A Simple ye