### Data Uploading and Preprocessing

In [1]:
import os
import json
import pandas as pd

# ─── CONFIG ───────────────────────────────────────────────────────────
DATASET_ROOT = "/kaggle/input/is584dataset/dataset"  # adjust if needed
ASPECT_PATH  = os.path.join(DATASET_ROOT, "aspect_data", "review_with_aspect.jsonl")
CONFS        = [
    "ICLR_2017","ICLR_2018","ICLR_2019","ICLR_2020",
    "NIPS_2016","NIPS_2017","NIPS_2018","NIPS_2019"
]
# ────────────────────────────────────────────────────────────────────────

def extract_text_fields(doc):
    """Recursively collect all string values >50 chars from a nested JSON."""
    texts = []
    def recurse(o):
        if isinstance(o, str):
            if len(o.strip()) > 50:
                texts.append(o.strip())
        elif isinstance(o, list):
            for item in o:
                recurse(item)
        elif isinstance(o, dict):
            for v in o.values():
                recurse(v)
    recurse(doc)
    return "\n\n".join(texts) if texts else None

# 1) Load full paper texts
paper_records = []
for conf in CONFS:
    content_dir = os.path.join(DATASET_ROOT, conf, f"{conf}_content")
    if not os.path.isdir(content_dir):
        continue
    for fn in os.listdir(content_dir):
        if not fn.endswith("_content.json"):
            continue
        path = os.path.join(content_dir, fn)
        doc = json.load(open(path, "r"))
        sid = fn.replace("_content.json", "")
        text = extract_text_fields(doc)
        paper_records.append({"submission_id": sid, "paper_text": text})
paper_df = pd.DataFrame(paper_records)

# 2) Flatten aspect spans
asp_recs = []
with open(ASPECT_PATH, "r", encoding="utf-8") as f:
    for line in f:
        obj    = json.loads(line)
        sid    = obj["id"]
        revtxt = obj["text"]
        for st, ed, lbl in obj["labels"]:
            span = revtxt[st:ed].strip()
            if not span:
                continue
            asp_recs.append({
                "submission_id":   sid,
                "review_text":     revtxt,
                "aspect":          lbl,
                "aspect_sentence": span
            })
aspect_df = pd.DataFrame(asp_recs)

# 3) Merge papers + aspects, drop missing texts
df = aspect_df.merge(paper_df, on="submission_id", how="left")
df = df[df["paper_text"].notna()].reset_index(drop=True)

# 4) Filter to core aspects
core = ["clarity_positive","clarity_negative","soundness_positive","soundness_negative","motivation_positive","motivation_negative"]
df = df[df["aspect"].isin(core)]

# 5) Final fields
df = df[["paper_text","review_text","aspect","aspect_sentence"]]

# Quick sanity check
print("✔ df shape:", df.shape)
print(df["aspect"].value_counts().head())
print(df.head())

# ——— Sanitize text columns to remove/replace problematic chars ———
for col in ["paper_text", "review_text", "aspect_sentence"]:
    df[col] = df[col].apply(
        lambda x: x.encode("utf-8", "ignore").decode("utf-8") if isinstance(x, str) else x
    )

# Now save without error
df.to_csv("phase2_dataset.csv", index=False, encoding="utf-8")

✔ df shape: (71854, 4)
aspect
clarity_negative       15697
soundness_negative     15662
clarity_positive       13095
soundness_positive     12780
motivation_positive    11121
Name: count, dtype: int64
                                          paper_text  \
1  Deep neural networks (DNNs) have achieved impe...   
2  Deep neural networks (DNNs) have achieved impe...   
3  Deep neural networks (DNNs) have achieved impe...   
6  Deep neural networks (DNNs) have achieved impe...   
8  A long-term goal in artificial intelligence is...   

                                         review_text               aspect  \
1  This work studies the predictive uncertainty i...  motivation_positive   
2  This work studies the predictive uncertainty i...     clarity_positive   
3  This work studies the predictive uncertainty i...   soundness_negative   
6  This work studies the predictive uncertainty i...   soundness_negative   
8  Summary : This paper proposes a new approach t...  motivation_positive   


In [2]:
import os
import json
import pandas as pd

# ─── CONFIG ───────────────────────────────────────────────────────────
DATASET_ROOT = "/kaggle/input/is584dataset/dataset"  # adjust if needed
ASPECT_PATH  = os.path.join(DATASET_ROOT, "aspect_data", "review_with_aspect.jsonl")
CONFS        = [
    "ICLR_2017","ICLR_2018","ICLR_2019","ICLR_2020",
    "NIPS_2016","NIPS_2017","NIPS_2018","NIPS_2019"
]
CORE_ASPECTS = {
    "clarity_positive","clarity_negative",
    "soundness_positive","soundness_negative",
    "motivation_positive","motivation_negative"
}
# ────────────────────────────────────────────────────────────────────────

def extract_text_fields(doc):
    """Collect all string values ≥50 chars from nested JSON."""
    texts = []
    def recurse(o):
        if isinstance(o, str):
            if len(o.strip()) > 50:
                texts.append(o.strip())
        elif isinstance(o, dict):
            for v in o.values():
                recurse(v)
        elif isinstance(o, list):
            for item in o:
                recurse(item)
    recurse(doc)
    return "\n\n".join(texts) if texts else None

# 1) Load full paper texts
paper_records = []
for conf in CONFS:
    content_dir = os.path.join(DATASET_ROOT, conf, f"{conf}_content")
    if not os.path.isdir(content_dir):
        continue
    for fn in os.listdir(content_dir):
        if not fn.endswith("_content.json"):
            continue
        sid = fn.replace("_content.json", "")
        doc = json.load(open(os.path.join(content_dir, fn), "r"))
        text = extract_text_fields(doc)
        if text:
            paper_records.append({"submission_id": sid, "paper_text": text})
paper_df = pd.DataFrame(paper_records)
print(f"→ Loaded {len(paper_df)} papers")

# 2) Load aspect spans
asp_recs = []
with open(ASPECT_PATH, "r", encoding="utf-8") as f:
    for line in f:
        obj    = json.loads(line)
        sid    = obj["id"]
        revtxt = obj["text"]
        for st, ed, lbl in obj["labels"]:
            span = revtxt[st:ed].strip()
            if span:
                asp_recs.append({
                    "submission_id":   sid,
                    "review_text":     revtxt,
                    "aspect":          lbl,
                    "aspect_sentence": span
                })
aspect_df = pd.DataFrame(asp_recs)
print(f"→ Loaded {len(aspect_df)} aspect‐span entries")

# 3) Load all reviews, parse rating+confidence, pick top‐confidence one per paper
def parse_lead_int(s):
    """If s like '4: ...', return 4 else 0."""
    if not isinstance(s, str) or ":" not in s:
        return 0
    try:
        return int(s.split(":",1)[0])
    except:
        return 0

rev_recs = []
for conf in CONFS:
    rev_dir = os.path.join(DATASET_ROOT, conf, f"{conf}_review")
    if not os.path.isdir(rev_dir):
        continue
    for fn in os.listdir(rev_dir):
        if not fn.endswith("_review.json"):
            continue
        data = json.load(open(os.path.join(rev_dir, fn), "r"))
        root = data.get("root", data)
        sid  = root.get("id", fn.replace("_review.json",""))
        for rv in root.get("reviews", []):
            txt = rv.get("review","").strip()
            if not txt:
                continue
            rating     = parse_lead_int(rv.get("rating",""))
            confidence = parse_lead_int(rv.get("confidence",""))
            rev_recs.append({
                "submission_id": sid,
                "review":        txt,
                "rating":        rating,
                "confidence":    confidence
            })

rev_df = pd.DataFrame(rev_recs)
print(f"→ Loaded {len(rev_df)} raw review entries")

best_rev = (
    rev_df
    .sort_values(
        ["submission_id","confidence","rating"],
        ascending=[True, False, False]
    )
    .groupby("submission_id", as_index=False)
    .first()[["submission_id","review","confidence"]]
)
print(f"→ Selected {len(best_rev)} best‐confidence reviews")

# 4) Merge papers + aspects + chosen reviews
df = (
    aspect_df
    .merge(paper_df,  on="submission_id", how="left")
    .merge(best_rev,  on="submission_id", how="left")
    .dropna(subset=["paper_text","review_text"])
    .reset_index(drop=True)
)

# 5) Keep only core aspects
df = df[df["aspect"].isin(CORE_ASPECTS)]

# 6) Deduplicate per paper+aspect: pick the longest aspect_sentence
df["span_len"] = df["aspect_sentence"].str.len()
df = (
    df
    .sort_values(
        ["submission_id", "aspect", "span_len"],
        ascending=[True, True, False]
    )
    .drop_duplicates(["submission_id"], keep="first")
    .drop(columns="span_len")
)

# 7) Sanitize and save
for col in ["paper_text", "review", "aspect_sentence"]:
    df[col] = (df[col]
               .astype(str)
               .apply(lambda x: x.encode("utf-8","ignore")
                                .decode("utf-8")))
df.to_csv("phase2_dataset.csv", index=False, encoding="utf-8")
print(f"✔ Final dataset: {len(df)} rows")

→ Loaded 8850 papers
→ Loaded 148086 aspect‐span entries
→ Loaded 28122 raw review entries
→ Selected 8780 best‐confidence reviews
✔ Final dataset: 8704 rows


In [3]:
df.head(10)

Unnamed: 0,submission_id,review_text,aspect,aspect_sentence,paper_text,review,confidence
121061,ICLR_2017_1,This is a very interesting and fairly easy to ...,clarity_positive,"As a non-expert on this topic , it was easy to...",MAKING NEURAL PROGRAMMING ARCHITECTURES GENERA...,This paper improves significantly upon the ori...,5
31763,ICLR_2017_10,"* *Edit : Based on the discussion below , my m...",clarity_negative,There are also other typos throughout,Q-PROP: SAMPLE-EFFICIENT POLICY GRADIENT WITH ...,"**Edit: Based on the discussion below, my main...",5
135807,ICLR_2017_100,"In this paper , the authors use a separate int...",clarity_positive,The organization is generally very clear,INTROSPECTION:ACCELERATING NEURAL NETWORK TRAI...,EDIT: Updated score. See additional comment.\n...,5
122259,ICLR_2017_101,This was an interesting paper . The algorithm ...,clarity_positive,"The algorithm seems clear , the problem well-r...",The task of hyperparameter optimization is bec...,"This paper discusses Hyperband, an extension o...",5
419,ICLR_2017_102,The paper proposes a new memory access scheme ...,clarity_negative,The difference to the properties of normal NTM...,Recent work on neural Turing machines (NTMs) (...,*** Paper Summary ***\n\nThis paper formalizes...,4
59132,ICLR_2017_103,This paper points out that you can take an LST...,clarity_negative,"Unfortunately , this simple , effective and in...","Recurrent neural networks (RNNs), including ga...",This paper introduces a novel RNN architecture...,4
124269,ICLR_2017_104,The authors propose a recurrent neural network...,clarity_positive,In general the paper is well written and quite...,"In order to plan and act effectively, agent-ba...",[UPDATE]\nAfter going through the response fro...,5
66336,ICLR_2017_105,This paper explores ensemble optimisation in t...,clarity_positive,The paper is well written and accessible .,EPOPT: LEARNING ROBUST NEURAL NETWORK POLICIES...,This paper explores ensemble optimisation in t...,4
66731,ICLR_2017_106,In this paper a well known soft mixture of exp...,motivation_positive,This is clearly an interesting direction of fu...,Transferring knowledge from prior source tasks...,In this paper a well known soft mixture of exp...,4
77127,ICLR_2017_107,This paper proposes an approach to learning wo...,clarity_positive,The paper is clearly written,MULTI-VIEW RECURRENT NEURAL ACOUSTIC WORD EMBE...,This paper proposes an approach to learning wo...,4


In [4]:
df["aspect"].value_counts()

aspect
clarity_negative       6245
clarity_positive       1959
motivation_positive     219
motivation_negative     137
soundness_negative      111
soundness_positive       33
Name: count, dtype: int64

In [5]:
from sklearn.model_selection import train_test_split

# ─── Load the prepared dataset ──────────────────────────────────────────
df = pd.read_csv("/kaggle/working/phase2_dataset.csv")

# Display overall class distribution
print("Overall aspect distribution:")
print(df["aspect"].value_counts(), "\n")

# ─── 1) Split off final test set (20% of data) ─────────────────────────
# We stratify by 'aspect' to keep class balance across splits.
train_val, test = train_test_split(
    df,
    test_size=0.20,
    stratify=df["aspect"],
    random_state=42
)

print(f"Train+Val shape: {train_val.shape}")
print(f"Test shape:        {test.shape}\n")

# ─── 2) Split train_val into train (80% of original) and val (10% of original) ─────────────────────────
# Since train_val is 80% of original, using test_size=0.125 yields 10% of original for validation.
train, val = train_test_split(
    train_val,
    test_size=0.125,
    stratify=train_val["aspect"],
    random_state=42
)

print(f"Train shape: {train.shape}")
print(f"Val shape:   {val.shape}")
print(f"Test shape:  {test.shape}\n")

# ─── 3) Verify class distribution in each split ─────────────────────────
print("Class distribution in TRAIN set:")
print(train["aspect"].value_counts(), "\n")

print("Class distribution in VAL set:")
print(val["aspect"].value_counts(), "\n")

print("Class distribution in TEST set:")
print(test["aspect"].value_counts(), "\n")

# ─── 4) Save splits to CSV for modeling ────────────────────────────────
train.to_csv("train.csv", index=False)
val.to_csv("val.csv",     index=False)
test.to_csv("test.csv",   index=False)

print("Saved train.csv, val.csv, test.csv")

Overall aspect distribution:
aspect
clarity_negative       6245
clarity_positive       1959
motivation_positive     219
motivation_negative     137
soundness_negative      111
soundness_positive       33
Name: count, dtype: int64 

Train+Val shape: (6963, 7)
Test shape:        (1741, 7)

Train shape: (6092, 7)
Val shape:   (871, 7)
Test shape:  (1741, 7)

Class distribution in TRAIN set:
aspect
clarity_negative       4371
clarity_positive       1371
motivation_positive     153
motivation_negative      96
soundness_negative       78
soundness_positive       23
Name: count, dtype: int64 

Class distribution in VAL set:
aspect
clarity_negative       625
clarity_positive       196
motivation_positive     22
motivation_negative     14
soundness_negative      11
soundness_positive       3
Name: count, dtype: int64 

Class distribution in TEST set:
aspect
clarity_negative       1249
clarity_positive        392
motivation_positive      44
motivation_negative      27
soundness_negative       22

### Baseline Model - Zero shot with longllama

In [6]:
# 1) Imports & Installs
# ─────────────────────────────────────────────────────────────────────
!pip install -q wandb evaluate transformers peft bitsandbytes rouge_score bert_score
import os, random, pandas as pd, numpy as np, wandb
import torch
import evaluate

from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    GenerationConfig,
    Trainer,
    TrainingArguments,
    DataCollatorForSeq2Seq,
    set_seed
)
from peft import LoraConfig, get_peft_model
from bitsandbytes import __version__ as bnb_version 

  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.1/76.1 MB[0m [31m23.3 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m0:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m0:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m0:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━

2025-05-27 14:25:50.168902: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1748355950.359617      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748355950.411935      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [7]:
wandb.login(key="0e48c15605abf65402208cd05becaa061bf0dfbf")

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mesrasekerci[0m ([33mesrasekerci-metu-middle-east-technical-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [24]:
# 2) Configuration & Seeds
# ─────────────────────────────────────────────────────────────────────
PROJECT     = "is584-phase2"
MODEL_ID    = "syzymon/long_llama_3b_instruct"
TRAIN_FILE  = "train.csv"
VAL_FILE    = "val.csv"
TEST_FILE   = "test.csv"
OUTPUT_DIR  = "./phase2_outputs"
os.makedirs(OUTPUT_DIR, exist_ok=True)

SEED        = 42
BATCH       = 8
MAX_IN      = 512
MAX_OUT     = 64
LORA_RANKS  = [8, 16]
EPOCHS      = 1
LR          = 2e-4
DEVICE      = "cuda"
set_seed(SEED)
torch.manual_seed(SEED)
random.seed(SEED)

# 3) Load Data Splits
# ─────────────────────────────────────────────────────────────────────
df_train = pd.read_csv(TRAIN_FILE)
df_val   = pd.read_csv(VAL_FILE)
df_test  = pd.read_csv(TEST_FILE)

# 4) Initialize Tokenizer & Base Model (FP16, GPU)
# ─────────────────────────────────────────────────────────────────────
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
).eval()



In [25]:
df_train = df_train.dropna(subset=['review'])
df_val   = df_val.dropna(subset=['review'])
df_test  = df_test.dropna(subset=['review'])

# 5) Build Prompts & References
# ─────────────────────────────────────────────────────────────────────
def build_prompts(df):
    P, R = [], []
    for _, row in df.iterrows():
        p = row.paper_text.replace("\n"," ")[:2000]
        rv= row.review.replace("\n"," ")[:1000]
        asp = row.aspect
        prompt = (
            f"Paper Excerpt:\n{p}\n\n"
            f"Previous Review:\n{rv}\n\n"
            f"Aspect: {asp}\n\n"
            "Task: Generate exactly one new sentence focusing on this aspect."
        )
        P.append(prompt)
        R.append(row.aspect_sentence)
    return P, R

train_prompts, train_refs = build_prompts(df_train)
val_prompts,   val_refs   = build_prompts(df_val)
test_prompts,  test_refs  = build_prompts(df_test)

# 6) Tokenize for Trainer
# ─────────────────────────────────────────────────────────────────────
def tokenize_for_train(prompts, refs):
    enc = tokenizer(prompts, truncation=True, max_length=MAX_IN,
                    padding="max_length", return_tensors="pt")
    with tokenizer.as_target_tokenizer():
        labs = tokenizer(refs, truncation=True, max_length=MAX_OUT,
                         padding="max_length", return_tensors="pt")["input_ids"]
    enc["labels"] = labs
    return enc

train_ds = tokenize_for_train(train_prompts, train_refs)
val_ds   = tokenize_for_train(val_prompts,   val_refs)

# 7) Data Collator & Metric Computation
# ─────────────────────────────────────────────────────────────────────
collator = DataCollatorForSeq2Seq(tokenizer, model=base_model)

bleu  = evaluate.load("bleu")
rouge = evaluate.load("rouge")
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    dec_p = tokenizer.batch_decode(preds, skip_special_tokens=True)
    dec_l = tokenizer.batch_decode(labels, skip_special_tokens=True)
    refs_wrapped = [[l] for l in dec_l]
    b = bleu.compute(predictions=dec_p, references=refs_wrapped)["bleu"]
    r = rouge.compute(predictions=dec_p, references=dec_l,
                      rouge_types=["rouge1","rouge2","rougeL"])
    return {
        "bleu": b,
        "rouge1": r["rouge1"].mid.fmeasure,
        "rouge2": r["rouge2"].mid.fmeasure,
        "rougeL": r["rougeL"].mid.fmeasure
    }



In [12]:
# 8) Zero-Shot Baseline (FP16 Generation + W&B + CSV)
# ─────────────────────────────────────────────────────────────────────
wandb.init(project=PROJECT, name="zero-shot", reinit=True)

gen_cfg = GenerationConfig(
    max_new_tokens=MAX_OUT,
    temperature=0.7,
    top_p=0.9,
    do_sample=True,
    repetition_penalty=1.1
)

from tqdm.auto import tqdm

def run_zero_shot(df, prompts, refs, split):
    preds = []
    # tqdm for inference progress
    for i in tqdm(range(0, len(prompts), BATCH), desc=f"{split} zero-shot"):
        batch = prompts[i : i + BATCH]
        enc = tokenizer(
            batch,
            return_tensors="pt",
            truncation=True,
            padding=True,
            max_length=MAX_IN
        ).to(DEVICE)
        outs = base_model.generate(**enc, generation_config=gen_cfg)
        for seq, mask in zip(outs, enc.attention_mask):
            L = mask.sum().item()
            preds.append(tokenizer.decode(seq[L:], skip_special_tokens=True))

    # Compute BLEU
    b = bleu.compute(predictions=preds, references=[[r] for r in refs])["bleu"]

    # Compute ROUGE and normalize
    raw = rouge.compute(predictions=preds, references=refs,
                        rouge_types=["rouge1","rouge2","rougeL"])
    def _fm(val):
        # if it’s a Score object, grab val.mid.fmeasure, else assume float
        return getattr(val, "mid", val).fmeasure if hasattr(val, "mid") else float(val)
    r1 = _fm(raw["rouge1"])
    r2 = _fm(raw["rouge2"])
    rL = _fm(raw["rougeL"])

    # Log to W&B
    wandb.log({
        f"{split}/zs_bleu": b,
        f"{split}/zs_r1":   r1,
        f"{split}/zs_r2":   r2,
        f"{split}/zs_rL":   rL
    })

    # Save predictions
    out = df.copy()
    out["pred_zs"] = preds
    out.to_csv(f"{OUTPUT_DIR}/{split}_zs.csv", index=False)

# Run with progress bars
run_zero_shot(df_val,  val_prompts,  val_refs,  "val")
run_zero_shot(df_test, test_prompts, test_refs, "test")
wandb.finish()

val zero-shot:   0%|          | 0/109 [00:00<?, ?it/s]

test zero-shot:   0%|          | 0/218 [00:00<?, ?it/s]

0,1
test/zs_bleu,▁
test/zs_r1,▁
test/zs_r2,▁
test/zs_rL,▁
val/zs_bleu,▁
val/zs_r1,▁
val/zs_r2,▁
val/zs_rL,▁

0,1
test/zs_bleu,0.00106
test/zs_r1,0.0965
test/zs_r2,0.00507
test/zs_rL,0.07692
val/zs_bleu,0.00139
val/zs_r1,0.09478
val/zs_r2,0.00514
val/zs_rL,0.07557
