In [None]:
# ============================================================
# MULTILINGUAL HARD NEGATIVE GENERATOR  (MULTIPLE NEGATIVES)
# ============================================================

!pip install -q transformers accelerate sentencepiece torch

import os
import json
import torch
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForCausalLM
import gc

# -------------------------
# CONFIG
# -------------------------
INPUT_DIR = "/content"
OUTPUT_DIR = "/content/out_neg"
os.makedirs(OUTPUT_DIR, exist_ok=True)

LANGS = ["ar", "bn", "en", "fi", "id", "ja", "ko", "ru", "sw", "te", "th"]
MAX_ROWS = 100
MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct"

FALLBACK_NEGATIVES = {
    "ar": "هذه الإجابة غير صحيحة ولا تتوافق مع الحقائق المعروفة.",
    "bn": "এই উত্তরটি ভুল এবং প্রকৃত তথ্যের সাথে মেলে না।",
    "en": "This answer is incorrect and does not align with factual information.",
    "fi": "Tämä vastaus on virheellinen eikä vastaa faktoja.",
    "id": "Jawaban ini salah dan tidak sesuai dengan fakta yang benar.",
    "ja": "この回答は誤りであり、事実とは異なります。",
    "ko": "이 답변은 잘못되었으며 사실과 다릅니다.",
    "ru": "Этот ответ неверный и не соответствует фактам.",
    "sw": "Jibu hili si sahihi na halilingani na ukweli.",
    "te": "ఈ సమాధానం తప్పు మరియు వాస్తవాలకు అనుగుణంగా లేదు.",
    "th": "คำตอบนี้ไม่ถูกต้องและไม่ตรงกับข้อเท็จจริง."
}

print("🚀 Loading model...")
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"📱 Using device: {device}")

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
    device_map="auto" if device == "cuda" else None,
    trust_remote_code=True
)
if device == "cpu":
    model = model.to(device)
print("✅ Model loaded successfully!\n")

# -------------------------
# LOAD JSON ARRAY
# -------------------------
def load_json_array(path, limit):
    rows = []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            try:
                obj = json.loads(line)
                rows.append(obj)
            except:
                continue
            if len(rows) >= limit:
                break
    return rows

# -------------------------
# QUALITY CHECK
# -------------------------
def is_bad(neg, pos, query):
    if not neg or not isinstance(neg, str):
        return True
    neg_clean = neg.strip()
    pos_clean = pos.strip()

    if len(neg_clean) < 10:
        return True
    if neg_clean.lower() == pos_clean.lower():
        return True
    if pos_clean.lower() in neg_clean.lower() and len(pos_clean) > 15:
        return True

    generic = ["yes", "no", "true", "false", "maybe", "unknown"]
    if neg_clean.lower() in generic:
        return True

    if "wrong answer:" in neg_clean.lower():
        return True
    if query.lower().strip() in neg_clean.lower():
        return True

    return False

# -------------------------
# CLEAN TEXT
# -------------------------
def clean_generated_text(text, prompt):
    if prompt in text:
        text = text.replace(prompt, "").strip()

    markers = ["Wrong answer:", "Incorrect answer:", "Negative:", "Hard negative:"]
    for m in markers:
        if m in text:
            text = text.split(m)[-1].strip()

    text = text.split("\n")[0].strip()

    if text.startswith('"') and text.endswith('"'):
        text = text[1:-1].strip()

    return text


# ============================================================
# ✅ FIXED FUNCTION — NOW RETURNS 3 NEGATIVES
# ============================================================
def generate_negative(query, positive, lang, num_negatives=3, max_attempts=3):

    prompt = f"""You are generating HARD NEGATIVE answers for information retrieval.

Rules:
- MUST be factually wrong
- MUST sound fluent and natural
- MUST be relevant but incorrect
- MUST NOT copy the correct answer
- Output 1–2 sentences
- Same language as query

Query: {query}
Correct Answer: {positive}

Generate {num_negatives} different wrong answers:"""

    collected = []

    for attempt in range(max_attempts):

        try:
            inputs = tokenizer(prompt, return_tensors="pt").to(device)

            output = model.generate(
                **inputs,
                max_new_tokens=80,
                do_sample=True,
                top_p=0.90,
                top_k=50,
                temperature=0.85,
                num_return_sequences=num_negatives,
                pad_token_id=tokenizer.eos_token_id,
            )

            candidates = []
            for i in range(num_negatives):
                text = tokenizer.decode(output[i], skip_special_tokens=True)
                neg = clean_generated_text(text, prompt)
                if not is_bad(neg, positive, query):
                    candidates.append(neg)

            if len(candidates) >= num_negatives:
                return candidates

        except Exception as e:
            print("Generation error:", e)

    # Fallback
    fb = FALLBACK_NEGATIVES.get(lang, FALLBACK_NEGATIVES["en"])
    return [fb, fb + " (2)", fb + " (3)"]


# -------------------------
# PROCESS LANGUAGE
# -------------------------
def process_lang(lang):
    infile = f"{INPUT_DIR}/mrtydi_{lang}.json"
    outfile = f"{OUTPUT_DIR}/mrtydi_neg_{lang}.jsonl"

    print(f"\nPROCESSING {lang.upper()}")
    rows = load_json_array(infile, MAX_ROWS)

    with open(outfile, "w", encoding="utf-8") as out:
        for row in tqdm(rows):

            query = row.get("query", "")
            positive = row.get("positive_passage") or row.get("positive") or ""

            if not query or not positive:
                row["negatives"] = [FALLBACK_NEGATIVES[lang]]
                out.write(json.dumps(row, ensure_ascii=False) + "\n")
                continue

            negs = generate_negative(query, positive, lang, num_negatives=3)
            row["negatives"] = negs
            out.write(json.dumps(row, ensure_ascii=False) + "\n")

    print("Saved:", outfile)

# -------------------------
# MAIN
# --------------a----------
def main():
    for lang in LANGS:
        process_lang(lang)

if __name__ == "__main__":
    main()


🚀 Loading model...
📱 Using device: cuda


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/661 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/3.97G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

✅ Model loaded successfully!


PROCESSING AR


100%|██████████| 100/100 [07:54<00:00,  4.75s/it]


Saved: /content/out_neg/mrtydi_neg_ar.jsonl

PROCESSING BN


100%|██████████| 100/100 [10:26<00:00,  6.27s/it]


Saved: /content/out_neg/mrtydi_neg_bn.jsonl

PROCESSING EN


100%|██████████| 100/100 [14:46<00:00,  8.86s/it]


Saved: /content/out_neg/mrtydi_neg_en.jsonl

PROCESSING FI


100%|██████████| 100/100 [08:05<00:00,  4.86s/it]


Saved: /content/out_neg/mrtydi_neg_fi.jsonl

PROCESSING ID


100%|██████████| 100/100 [07:44<00:00,  4.64s/it]


Saved: /content/out_neg/mrtydi_neg_id.jsonl

PROCESSING JA


100%|██████████| 100/100 [08:11<00:00,  4.91s/it]


Saved: /content/out_neg/mrtydi_neg_ja.jsonl

PROCESSING KO


100%|██████████| 100/100 [07:42<00:00,  4.62s/it]


Saved: /content/out_neg/mrtydi_neg_ko.jsonl

PROCESSING RU


100%|██████████| 100/100 [08:16<00:00,  4.96s/it]


Saved: /content/out_neg/mrtydi_neg_ru.jsonl

PROCESSING SW


100%|██████████| 100/100 [08:21<00:00,  5.02s/it]


Saved: /content/out_neg/mrtydi_neg_sw.jsonl

PROCESSING TE


 86%|████████▌ | 86/100 [09:43<00:59,  4.28s/it]

Generation error: CUDA out of memory. Tried to allocate 6.70 GiB. GPU 0 has a total capacity of 14.74 GiB of which 750.12 MiB is free. Process 2575 has 14.01 GiB memory in use. Of the allocated memory 13.43 GiB is allocated by PyTorch, and 458.71 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Generation error: CUDA out of memory. Tried to allocate 6.70 GiB. GPU 0 has a total capacity of 14.74 GiB of which 888.12 MiB is free. Process 2575 has 13.87 GiB memory in use. Of the allocated memory 7.01 GiB is allocated by PyTorch, and 6.73 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https:

100%|██████████| 100/100 [11:09<00:00,  6.70s/it]


Saved: /content/out_neg/mrtydi_neg_te.jsonl

PROCESSING TH


100%|██████████| 100/100 [08:03<00:00,  4.84s/it]

Saved: /content/out_neg/mrtydi_neg_th.jsonl





In [None]:
# adversarial_filter_pipeline.py
# Install prerequisites if needed:
# pip install -q sentence-transformers jsonlines torch numpy tqdm
# ============================================================
# STEP 3 — ADVERSARIAL FILTERING WITH SCORES
# ============================================================

import os, json, math, sys
from tqdm import tqdm
import jsonlines
import numpy as np
from sentence_transformers import SentenceTransformer, util

# -------------------------
# CONFIG - EDIT AS NEEDED
# -------------------------
INPUT_DIR = "/content/out_neg"          # folder containing mrtydi_neg_<lang>.jsonl (generator output)
OUTPUT_DIR = "/content/final_filtered" # folder to save filtered outputs
os.makedirs(OUTPUT_DIR, exist_ok=True)

LANGS = ["ar","bn","en","fi","id","ja","ko","ru","sw","te","th"]
MAX_ROWS_PER_LANG = None   # None => all rows, or set e.g. 1000
EMBED_MODEL = "intfloat/multilingual-e5-base"
DEVICE = "cuda" if (torch := __import__("torch")).cuda.is_available() else "cpu"

# Filtering / selection hyperparams
# Option A: relative threshold: keep negatives that have neg_sim >= pos_sim * SIM_SCORE_RATIO_THRESHOLD
use_relative_threshold = True
SIM_SCORE_RATIO_THRESHOLD = 0.80

# Option B: absolute threshold (if preferred)
use_absolute_threshold = False
ABS_SIM_THRESHOLD = 0.20

# final selection
KEEP_TOP_K = 1    # keep top-K negatives per query (1 keeps highest-scoring only). Set >1 to keep more.
FALLBACK_KEEP_ONE = True  # if no neg passes threshold, keep the single highest-scoring negative anyway

# Cleaning heuristics
MIN_NEG_LENGTH = 8      # minimum characters for a candidate negative
MAX_NEG_TOKEN_OVERLAP_RATIO = 0.6  # if negative shares >60% tokens with positive => reject (too similar)
PROMPT_LEAK_MARKERS = ["wrong answer:", "correct answer:", "generate", "hard negative"]  # lowercase

# -------------------------
# Utilities
# -------------------------
def safe_load_json_or_jsonl(path, limit=None):
    """Read a file that can be a JSON array or JSONL. Returns list of dicts."""
    if not os.path.exists(path):
        return []
    # Try JSON array
    try:
        with open(path, "r", encoding="utf-8") as f:
            obj = json.load(f)
        if isinstance(obj, list):
            return obj[:limit] if limit else obj
    except Exception:
        pass

    # Fallback JSONL
    rows = []
    try:
        with jsonlines.open(path, "r") as reader:
            for i, row in enumerate(reader):
                rows.append(row)
                if limit and len(rows) >= limit:
                    break
    except Exception:
        # Last fallback: try line-by-line manual JSON parse (some files may have stray blank lines)
        with open(path, "r", encoding="utf-8") as f:
            for i, line in enumerate(f):
                if not line.strip():
                    continue
                try:
                    rows.append(json.loads(line))
                except Exception:
                    continue
                if limit and len(rows) >= limit:
                    break
    return rows

def is_prompt_leak(text):
    if not isinstance(text, str):
        return True
    low = text.lower()
    for m in PROMPT_LEAK_MARKERS:
        if m in low:
            return True
    return False

def token_overlap_ratio(a, b):
    """conservative token overlap ratio (word-level)"""
    if not a or not b:
        return 0.0
    sa = set(str(a).lower().split())
    sb = set(str(b).lower().split())
    inter = sa & sb
    denom = max(1, len(sb))
    return len(inter) / denom

def clean_negative_candidate(neg, positive):
    """Return False if candidate should be discarded as useless/unsafe/too-similar."""
    if not neg or not isinstance(neg, str):
        return False
    s = neg.strip()
    if len(s) < MIN_NEG_LENGTH:
        return False
    if is_prompt_leak(s):
        return False
    # reject if exactly same as positive (case-insensitive)
    if positive and s.lower() == str(positive).strip().lower():
        return False
    # too much token overlap
    if positive and token_overlap_ratio(s, positive) > MAX_NEG_TOKEN_OVERLAP_RATIO:
        return False
    return True

# -------------------------
# Load embedder
# -------------------------
print("Loading embedder:", EMBED_MODEL, "-> device:", DEVICE)
embedder = SentenceTransformer(EMBED_MODEL)
embedder = embedder.to(DEVICE)

def cosine(a, b):
    return util.cos_sim(a, b).item()

# -------------------------
# Core filter function
# -------------------------
def filter_row_negatives(query, positive, negatives_list):
    """
    Input:
      query (str), positive (str), negatives_list (list[str])
    Returns:
      filtered_with_scores: list of (neg_text, score) sorted by score desc
    """

    # clean candidates first
    candidates = []
    for neg in negatives_list:
        # some generator outputs may be dicts or lists — handle only strings
        if isinstance(neg, (list, dict)):
            # try to extract string inside
            if isinstance(neg, list) and len(neg) > 0 and isinstance(neg[0], str):
                neg_text = neg[0]
            else:
                continue
        else:
            neg_text = neg

        if clean_negative_candidate(neg_text, positive):
            candidates.append(neg_text)

    if not candidates:
        return []

    # compute embeddings in batch for efficiency
    texts_to_embed = [query, positive] + candidates
    embeddings = embedder.encode(texts_to_embed, convert_to_tensor=True, show_progress_bar=False)
    q_emb = embeddings[0]
    pos_emb = embeddings[1]
    neg_embs = embeddings[2:]

    pos_sim = cosine(q_emb, pos_emb)

    out = []
    for neg_text, neg_emb in zip(candidates, neg_embs):
        neg_sim = cosine(q_emb, neg_emb)
        out.append((neg_text, float(neg_sim)))

    # sort by neg_sim desc
    out.sort(key=lambda x: x[1], reverse=True)

    # apply thresholding
    filtered = []
    for neg_text, neg_sim in out:
        keep = False
        if use_relative_threshold:
            if neg_sim >= pos_sim * SIM_SCORE_RATIO_THRESHOLD:
                keep = True
        if use_absolute_threshold:
            if neg_sim >= ABS_SIM_THRESHOLD:
                keep = True
        # if neither threshold mechanism is on, include all (but still cleaned)
        if (not use_relative_threshold and not use_absolute_threshold):
            keep = True
        if keep:
            filtered.append((neg_text, neg_sim))

    # if none passed and fallback allowed, keep single best candidate
    if len(filtered) == 0 and FALLBACK_KEEP_ONE:
        filtered = [out[0]]

    # limit to top K
    if KEEP_TOP_K and KEEP_TOP_K > 0:
        filtered = filtered[:KEEP_TOP_K]

    return filtered

# -------------------------
# Process one language file
# -------------------------
def process_lang_file(lang):
    inpath_jsonl = os.path.join(INPUT_DIR, f"mrtydi_neg_{lang}.jsonl")
    if not os.path.exists(inpath_jsonl):
        # also accept mrtydi_<lang>.json (generator might have produced .json)
        inpath_json = os.path.join(INPUT_DIR, f"mrtydi_neg_{lang}.json")
        if os.path.exists(inpath_json):
            inpath = inpath_json
        else:
            print(f"[skip] no input found for {lang}")
            return
    else:
        inpath = inpath_jsonl

    rows = safe_load_json_or_jsonl(inpath, limit=MAX_ROWS_PER_LANG)

    outpath = os.path.join(OUTPUT_DIR, f"mrtydi_filtered_{lang}.jsonl")
    with jsonlines.open(outpath, "w") as writer:
        stats = {"total_rows": 0, "rows_with_filtered": 0, "total_negatives": 0, "kept_negatives": 0}
        for row in tqdm(rows, desc=f"Filtering {lang}", leave=False):
            stats["total_rows"] += 1
            query = (row.get("query") or row.get("query_text") or "").strip()
            positive = (row.get("positive_passage") or row.get("positive") or row.get("text") or "").strip()
            negatives = row.get("negatives", [])

            # normalize negatives: ensure list of strings
            norm_negs = []
            if isinstance(negatives, list):
                for n in negatives:
                    if isinstance(n, str):
                        norm_negs.append(n)
                    elif isinstance(n, (list, dict)):
                        # try to extract string inside if present
                        if isinstance(n, list) and len(n) and isinstance(n[0], str):
                            norm_negs.append(n[0])
                        # skip dicts
            else:
                # single string
                if isinstance(negatives, str):
                    norm_negs = [negatives]

            stats["total_negatives"] += len(norm_negs)

            if not query or not positive or not norm_negs:
                row["filtered_negatives_with_scores"] = []
                writer.write(row)
                continue

            filtered = filter_row_negatives(query, positive, norm_negs)
            row["filtered_negatives_with_scores"] = [
                {"neg": n, "score": float(s)} for n, s in filtered
            ]

            stats["rows_with_filtered"] += 1 if filtered else 0
            stats["kept_negatives"] += len(filtered)

            writer.write(row)

    # print some summary
    print(f"Saved filtered -> {outpath} (rows: {stats['total_rows']} kept_negatives: {stats['kept_negatives']})")
    return outpath

# -------------------------
# Main
# -------------------------
def main():
    print("Adversarial filtering pipeline starting...")
    print("Embedder device:", DEVICE)
    for lang in LANGS:
        try:
            process_lang_file(lang)
        except Exception as e:
            print(f"[error] {lang}: {e}")

if __name__ == "__main__":
    main()


Loading embedder: intfloat/multilingual-e5-base -> device: cuda
Adversarial filtering pipeline starting...
Embedder device: cuda




Saved filtered -> /content/final_filtered/mrtydi_filtered_ar.jsonl (rows: 100 kept_negatives: 100)




Saved filtered -> /content/final_filtered/mrtydi_filtered_bn.jsonl (rows: 100 kept_negatives: 55)




Saved filtered -> /content/final_filtered/mrtydi_filtered_en.jsonl (rows: 66 kept_negatives: 66)




Saved filtered -> /content/final_filtered/mrtydi_filtered_fi.jsonl (rows: 100 kept_negatives: 100)




Saved filtered -> /content/final_filtered/mrtydi_filtered_id.jsonl (rows: 100 kept_negatives: 100)




Saved filtered -> /content/final_filtered/mrtydi_filtered_ja.jsonl (rows: 100 kept_negatives: 100)




Saved filtered -> /content/final_filtered/mrtydi_filtered_ko.jsonl (rows: 100 kept_negatives: 100)




Saved filtered -> /content/final_filtered/mrtydi_filtered_ru.jsonl (rows: 100 kept_negatives: 100)




Saved filtered -> /content/final_filtered/mrtydi_filtered_sw.jsonl (rows: 100 kept_negatives: 98)




Saved filtered -> /content/final_filtered/mrtydi_filtered_te.jsonl (rows: 100 kept_negatives: 100)


                                                               

Saved filtered -> /content/final_filtered/mrtydi_filtered_th.jsonl (rows: 100 kept_negatives: 100)




In [2]:
import argparse
import sys

def parse_args():
    parser = argparse.ArgumentParser()

    parser.add_argument("--input_dir", type=str, default="/content")
    parser.add_argument("--output_dir", type=str, default="/content/out")
    parser.add_argument("--langs", nargs="+", default=["ar","en"])
    parser.add_argument("--max_rows", type=int, default=100)
    parser.add_argument("--model_name", type=str, default="Qwen/Qwen2.5-3B-Instruct")
    parser.add_argument("--keep_ratio", type=float, default=0.8)
    parser.add_argument("--top_k", type=int, default=1)
    parser.add_argument("--device", type=str, default="cuda")

    # 🛑 FIX: Prevent Jupyter arguments from breaking argparse
    args, unknown = parser.parse_known_args()

    return args


In [7]:
%%writefile /content/run_filter.py
print("Filter script executed correctly!")


Writing /content/run_filter.py


In [8]:
!python3 /content/run_filter.py


Filter script executed correctly!


In [9]:
# select_top_negatives.py
import json, glob, os

IN_DIR = "/content"   # folder with mrtydi_filtered_{lang}.jsonl
OUT_DIR = "/content/only1_neg"
os.makedirs(OUT_DIR, exist_ok=True)
LANGS = ["ar","bn","en","fi","id","ja","ko","ru","sw","te","th"]

def pick_top(filtered_with_scores, keep_top=1):
    # filtered_with_scores is list of dicts {'neg':..., 'score':...}
    if not filtered_with_scores:
        return []
    sorted_list = sorted(filtered_with_scores, key=lambda x: x['score'], reverse=True)
    return [item['neg'] for item in sorted_list[:keep_top]]

for lang in LANGS:
    inpath = os.path.join(IN_DIR, f"mrtydi_filtered_{lang}.jsonl")
    outpath = os.path.join(OUT_DIR, f"mrtydi_final_{lang}.jsonl")
    if not os.path.exists(inpath):
        print("skip", inpath)
        continue
    out_lines = []
    with open(inpath, 'r', encoding='utf-8') as fin, open(outpath, 'w', encoding='utf-8') as fout:
        for line in fin:
            j = json.loads(line)
            filtered = j.get('filtered_negatives_with_scores') or j.get('filtered_negatives') or []
            # if filtered has dicts with scores, pick top
            if filtered and isinstance(filtered[0], dict) and 'neg' in filtered[0]:
                negs = pick_top(filtered, keep_top=1)   # change keep_top=3 to keep 3
            else:
                # if just list of strings, pick first
                negs = filtered[:1]
            j['negatives'] = negs
            fout.write(json.dumps(j, ensure_ascii=False) + "\n")
    print("wrote", outpath)


wrote /content/only1_neg/mrtydi_final_ar.jsonl
wrote /content/only1_neg/mrtydi_final_bn.jsonl
wrote /content/only1_neg/mrtydi_final_en.jsonl
wrote /content/only1_neg/mrtydi_final_fi.jsonl
wrote /content/only1_neg/mrtydi_final_id.jsonl
wrote /content/only1_neg/mrtydi_final_ja.jsonl
wrote /content/only1_neg/mrtydi_final_ko.jsonl
wrote /content/only1_neg/mrtydi_final_ru.jsonl
wrote /content/only1_neg/mrtydi_final_sw.jsonl
wrote /content/only1_neg/mrtydi_final_te.jsonl
wrote /content/only1_neg/mrtydi_final_th.jsonl
