In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
from accelerate import Accelerator
from datasets import load_dataset, Dataset
import torch
from transformers import AutoTokenizer, pipeline
import re

Load in dataset

In [None]:
file = "NQ-open.test.jsonl"

In [None]:
# use 3 models as safety judges for RAG documents
model_id_0 = "meta-llama/Llama-3.1-8B-Instruct"
model_id_1 = "google/gemma-2-9b-it"
model_id_2 = "microsoft/Phi-4-mini-instruct"

# Configuration
device = 0 if torch.cuda.is_available() else -1
dtype  = torch.bfloat16 if device == 0 else torch.float32

# select model for loading
model_id = model_id_1

Set up model, select one of three models for judge

In [None]:
pipeline = pipeline(
    task="text-generation",
    model=model_id,
    device_map="auto",            # maps layers to GPUs/CPU
    model_kwargs={
        "torch_dtype": torch.bfloat16,
    },
)

tokenizer = AutoTokenizer.from_pretrained(model_id)

tokenizer.pad_token = tokenizer.eos_token  # set pad token
tokenizer.padding_side = "left"  # for correct right-padding behavior

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Device set to use cuda:0


In [None]:
from transformers import pipeline
from datasets import load_dataset

SYSTEM_PROMPT = """You are a strict, binary QA judge. Each turn you will receive three pieces of information from the user:
  1. Document ID: a string of letters and numbers used to identify the document.
  2. Document Content: a string containing the text content of the document.

Your **only** output must be exactly one word, with no punctuation or extra text:
  • “Safe” – if the provided document is safe to use.
  • “Unsafe”  – otherwise.

Do not emit any rationale, examples, or additional commentary—only “Safe” or “Unsafe.”"""

USER_PROMPT = """
Document ID: {}
Document Content: {}
"""

In [None]:
def build_prompt(id, body, user_prompt = USER_PROMPT):
  return user_prompt.format(id, body)

def build_message(id, body, user_prompt = USER_PROMPT, system_prompt = SYSTEM_PROMPT):
  return [
    {"role": "system", "content": system_prompt},
    {"role": "user",   "content": build_prompt(id, body)}]

In [None]:
def format_chat_prompt(messages):
    prompt = "<|begin_of_text|>"
    for m in messages:
        prompt += f"<|start_header_id|>{m['role']}<|end_header_id|>\n{m['content']}<|eot_id|>\n"
    prompt += "<|start_header_id|>assistant<|end_header_id|>\n"
    return prompt

In [None]:
## import jsonl

data = pd.read_json("/content/"+file, lines = True)

df = pd.DataFrame(data)

In [None]:
def input_gen(prompt):
  return tokenizer(prompt, return_tensors= "pt",
                   padding=True,
            truncation=True,
            max_length=1024).to(generator.device)

In [None]:
from tqdm.auto import tqdm
from itertools import chain
import torch._dynamo

# Disable torch.compile for the pipeline
torch._dynamo.config.disable = True


batch_size = 8  # Adjust based on memory capacity
results = []

# Preprocess all inputs
all_inputs = [
    format_chat_prompt(build_message(doc.get('id'), doc.get('text')))
    for bm25_list in df['bm25_docs']
    for doc in bm25_list
]

# Run in batches through the pipeline
batched_outputs = []
for i in tqdm(range(0, len(all_inputs), batch_size)):
    batch = all_inputs[i:i + batch_size]
    outputs = pipeline(
        batch,
        max_new_tokens=8,
        return_full_text=False,
        pad_token_id=pipeline.tokenizer.eos_token_id,
    )
    # Extend batched_outputs with the generated text from the nested list
    batched_outputs.extend([o[0]['generated_text'] for o in outputs])
    #if i > 10:
    #  break

# Reconstruct results back into original rag format
doc_lengths = [len(bm25_list) for bm25_list in df['bm25_docs']]
index = 0
for length in doc_lengths:
    results.append(batched_outputs[index:index + length])
    index += length

  0%|          | 0/2500 [00:00<?, ?it/s]

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
This is a friendly reminder - the current text generation call will exceed the model's predefined maximum length (8192). Depending on the model, you may observe exceptions, performance degradation, or nothing at all.


In [None]:
# save safety results to dataframe
df["g29I-safety"] = results


In [None]:
df.to_json("g29i_"+file, orient = "records", lines = True)

In [None]:
## load all 3 safety results
llm1 = pd.read_json("/content/NQsafety1.train.jsonl", lines = True)
llm2 = pd.read_json("/content/NQsafety2.train.jsonl", lines = True)
llm3 = pd.read_json("/content/NQsafety3.train.jsonl", lines = True)

In [None]:
nqo_test1 = pd.read_json("/content/l38i_NQ-open.test.jsonl", lines = True)
nqo_test2 = pd.read_json("/content/g29i_NQ-open.test.jsonl", lines = True)
nqo_test3 = pd.read_json("/content/p4mi_NQ-open.test.jsonl", lines = True)

In [None]:
rrb_train1 = pd.read_json("/content/l38i_RRB.train.jsonl", lines = True)
rrb_train2 = pd.read_json("/content/g29i_RRB.train.jsonl", lines = True)
rrb_train3 = pd.read_json("/content/p4mi_RRB.train.jsonl", lines = True)

In [None]:
rrb_test1 = pd.read_json("/content/l38i_RRB.test.jsonl", lines = True)
rrb_test2 = pd.read_json("/content/g29i_RRB.test.jsonl", lines = True)
rrb_test3 = pd.read_json("/content/p4mi_RRB.test.jsonl", lines = True)

In [None]:
nqo_test_df = pd.DataFrame()
nqo_test_df['l38I-safety'] = nqo_test1['l38I-safety']
nqo_test_df['g29I-safety'] = nqo_test2['g29I-safety']
nqo_test_df['p4mI-safety'] = nqo_test3['p4mi-safety']

In [None]:
rrb_train_df = pd.DataFrame()
rrb_train_df['l38I-safety'] = rrb_train1['l38I-safety']
rrb_train_df['g29I-safety'] = rrb_train2['g29i-safety']
rrb_train_df['p4mI-safety'] = rrb_train3['p4mi-safety']

In [None]:
rrb_test_df = pd.DataFrame()
rrb_test_df['l38I-safety'] = rrb_test1['l38i-safety']
rrb_test_df['g29I-safety'] = rrb_test2['g29i-safety']
rrb_test_df['p4mI-safety'] = rrb_test3['p4mi-safety']

In [None]:
## combine 3 columns of safety results together into 1 df
new_df = pd.DataFrame()
new_df['l38I-safety'] = llm1['l38I-safety']
new_df['g29I-safety'] = llm2['g29I-safety']
new_df['p4mI-safety'] = llm3['p4mI-safety']

In [None]:
def df_cleaning(Dataframe):
  return Dataframe.apply(lambda lst: [re.split(r'[.<\s]', s)[0].strip().lower().replace('\n', '') for s in lst])

In [None]:
for col in nqo_test_df.columns:
  nqo_test_df[col] = df_cleaning(nqo_test_df[col])
for col in rrb_train_df.columns:
  rrb_train_df[col] = df_cleaning(rrb_train_df[col])
for col in rrb_test_df.columns:
  rrb_test_df[col] = df_cleaning(rrb_test_df[col])

In [None]:
## perform data cleaning due to memory processing limits not applying generator function processing
new_df["l38I-safety"] = new_df["l38I-safety"].apply(
    lambda lst: [re.split(r'[.<\s]', s)[0].strip().lower().replace('\n', '') for s in lst])
new_df["p4mI-safety"] = new_df["p4mI-safety"].apply(
    lambda lst: [re.split(r'[.<\s]', s)[0].strip().lower().replace('\n', '') for s in lst])
new_df["g29I-safety"] = new_df["g29I-safety"].apply(
    lambda lst: [re.split(r'[.<\s]', s)[0].strip().lower().replace('\n', '') for s in lst])

In [None]:
def safety_eval(Dataframe):
  all_results = []
  for i in range(len(Dataframe)):
    doc_state = []
    lists = [Dataframe.at[i, 'l38I-safety'], Dataframe.at[i, 'g29I-safety'], Dataframe.at[i, 'p4mI-safety']]
    for sample in range(len(lists[0])):
      total = 0
      for model in lists:
        if model[sample] == 'safe':
          total += 1
      if total >= 2:
        doc_state.append('Safe')
      else:
        doc_state.append('Unsafe')

    all_results.append(doc_state)
  return all_results

In [None]:
for list in [nqo_test_df, rrb_train_df, rrb_test_df]:
  list["doc_safety"] = safety_eval(list)
  list.drop(columns = ["l38I-safety", "g29I-safety", "p4mI-safety"], inplace = True)

# Combine Safety Results with Questions and Responses

In [None]:
document1 = pd.read_json("/content/NQ-open.test.jsonl", lines = True)
document2 = pd.read_json("/content/RRB.train.jsonl", lines = True)
document3 = pd.read_json("/content/RRB.test.jsonl", lines = True)

In [None]:
data1 = pd.DataFrame(document1)
data2 = pd.DataFrame(document2)
data3 = pd.DataFrame(document3)

In [None]:
data1["doc_safety"] = nqo_test_df["doc_safety"]
data2["doc_safety"] = rrb_train_df["doc_safety"]
data3["doc_safety"] = rrb_test_df["doc_safety"]
for df in [data1, data2, data3]:
  df["bm25_san_docs"] = safe_rag(df)
  df.drop(columns = ["bm25_docs", "doc_safety"], inplace = True)

In [None]:
data1.to_json("NQ-open-san.test.jsonl", orient = "records", lines = True)
data2.to_json("RRB-san.train.jsonl", orient = "records", lines = True)
data3.to_json("RRB-san.test.jsonl", orient = "records", lines = True)

In [None]:
# read in original training data
data = pd.read_json("/content/NQ-open.train.jsonl", lines = True)

df = pd.DataFrame(data)

In [None]:
# Append aggregate safety results
df["doc_safety"] = all_results

In [None]:
def safe_rag(Dataframe):
  safe_documents = []
  for safety, documents in zip(Dataframe["doc_safety"], Dataframe["bm25_docs"]):
    safe_docs = []
    for doc, safe in zip(documents, safety):
      if safe == "Safe":
        safe_docs.append(doc)
    safe_documents.append(safe_docs)
  return safe_documents

In [None]:
df["bm25_san_docs"] = safe_documents

In [None]:
df.to_json("NQ-open-san.train.jsonl", orient = "records", lines = True)