In [None]:
"""Run inference with fine tuned bert-based model"""
import pandas as pd
import json
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_path = "MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli"
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True)
model = AutoModelForSequenceClassification.from_pretrained(model_path)
model.to(device)
model.eval()

with open("label_mapping.json") as f:
    label_map = json.load(f)

def predict_label(claim, abstract):
    text = claim + "[SEP]" + abstract
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=512)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    
    with torch.no_grad():
        outputs = model(**inputs)
        prediction = torch.argmax(outputs.logits, dim=1).item()

    return label_map[str(prediction)]

predictions_df = pd.read_csv("./predictions_best.csv")
test_df = pd.read_parquet("test-00000-of-00001.parquet")
abstracts_df = pd.read_parquet("climatecheck_publications_corpus.parquet")

claim_map = test_df.set_index("claim_id")["claim"].to_dict()
abstract_map = abstracts_df.set_index("abstract_id")["abstract"].to_dict()

import time
start_time = time.time()

labels = []
for _, row in tqdm(predictions_df.iterrows(), total=len(predictions_df), desc="Predicting labels"):
    claim = claim_map[row["claim_id"]]
    abstract = abstract_map[row["abstract_id"]]
    label = predict_label(claim, abstract)
    labels.append(label)

end_time = time.time()
time_delta_seconds = end_time - start_time
print(f"Time delta (seconds): {time_delta_seconds}")
filename = "time_log_deberta.txt"
with open(filename, "w") as f:
    f.write(f"Start Time: {start_time}\n")
    f.write(f"End Time: {end_time}\n")
    f.write(f"Time Delta (seconds): {time_delta_seconds}\n")

predictions_df["label"] = labels
predictions_df.to_csv("predictions_new.csv", index=False)
print("Saved predictions with labels to predictions_new.csv")


In [None]:
"""Run inference with Phi 4 model"""
import pandas as pd
import json
import torch
from tqdm import tqdm
from vllm import LLM, SamplingParams
import time

model_path = "microsoft/phi-4"
#model_path = "microsoft/Phi-4-mini-instruct"
#model_path = "./models/phi4-1e"
llm = LLM(model=model_path)
sampling_params = SamplingParams(max_tokens=256)

def build_prompt(claim, abstract):
    return (
        "<|im_start|>system<|im_sep|>You are a professional fact checker."
        "You get a claim and an abstract of a scientific paper. Assess if the claim is supported or refuted by the abstract! Return only your verdict! Either 'Supports', 'Refutes' or 'Not Enough Information'. <|im_end|>"
        f"<|im_start|>user<|im_sep|>The claim: {claim}\nThe abstract: {abstract}\nYour verdict: <|im_end|>"
        "<|im_start|>assistant<|im_sep|>"
    )

df = pd.read_parquet("climatecheck_publications_corpus.parquet")

abstracts = df["abstract"].fillna("").astype(str).tolist()
batch_size = 1

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def predict_label(pred):
    print(pred)
    label = "Not Enough Information"
    if "support" in pred.lower():
        label = "Supports"
    if "refute" in pred.lower():
        label = "Refutes"
    if "not enough" in pred.lower():
        label = "Not Enough Information"
    return label

predictions_df = pd.read_csv("./predictions_best.csv")
test_df = pd.read_parquet("test-00000-of-00001.parquet")
abstracts_df = pd.read_parquet("climatecheck_publications_corpus.parquet")

claim_map = test_df.set_index("claim_id")["claim"].to_dict()
abstract_map = abstracts_df.set_index("abstract_id")["abstract"].to_dict()

all_labels = []
start_time = time.time()
for _, row in tqdm(predictions_df.iterrows(), total=len(predictions_df), desc="Predicting labels"):
    claim = claim_map[row["claim_id"]]
    abstract = abstract_map[row["abstract_id"]]
    prompt = build_prompt(claim, abstract)
    output = llm.generate([prompt], sampling_params)
    label = predict_label(output[0].outputs[0].text.strip())
    all_labels.append(label)

end_time = time.time()
time_delta_seconds = end_time - start_time
print(f"Time delta (seconds): {time_delta_seconds}")
filename = "time_log_phi.txt"
with open(filename, "w") as f:
    f.write(f"Start Time: {start_time}\n")
    f.write(f"End Time: {end_time}\n")
    f.write(f"Time Delta (seconds): {time_delta_seconds}\n")

predictions_df["label"] = all_labels
f_name = "predictions_new_phi4_2.csv"
predictions_df.to_csv(f_name, index=False)
print(f"Saved predictions with labels to {f_name}.csv")


In [None]:
"""Run inference with Qwen3 models"""
import pandas as pd
import torch
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer
import time

#model_name = "Qwen/Qwen3-14B"
#model_name = "Qwen/Qwen3-8B"
model_name = "Qwen/Qwen3-1.7B"
tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left")
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto"
)

reasoning=False
no_think_tag = "/no_think "
if reasoning:
    no_think_tag = ""

def build_prompt(claim, abstract):
    return (
        f"{no_think_tag}You are a professional fact checker."
        " You get a claim and an abstract of a scientific paper."
        " Assess if the claim is supported or refuted by the abstract!"
        " Return only your verdict! Either 'Supports', 'Refutes' or 'Not Enough Information'."
        f"\n\nThe claim: {claim}\nThe abstract: {abstract}\nYour verdict:"
    )

def predict_label(pred):
    pred = pred.lower()
    if "support" in pred:
        return "Supports"
    if "refute" in pred:
        return "Refutes"
    if "not enough" in pred:
        return "Not Enough Information"
    return "Not Enough Information"

predictions_df = pd.read_csv("./predictions_best.csv")
test_df = pd.read_parquet("test-00000-of-00001.parquet")
abstracts_df = pd.read_parquet("climatecheck_publications_corpus.parquet")

claim_map = test_df.set_index("claim_id")["claim"].to_dict()
abstract_map = abstracts_df.set_index("abstract_id")["abstract"].to_dict()

all_labels = []

batch_size = 1
prompts = []
rows = []

#torch.cuda.empty_cache()

start_time = time.time()

for _, row in tqdm(predictions_df.iterrows(), total=len(predictions_df), desc="Preparing prompts"):
    claim = claim_map[row["claim_id"]]
    abstract = abstract_map[row["abstract_id"]]
    prompt = build_prompt(claim, abstract)
    messages = [{"role": "user", "content": prompt}]
    chat_text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
        enable_thinking=True
    )
    prompts.append(chat_text)
    rows.append(row)

    if len(prompts) == batch_size:
        with torch.no_grad():
            inputs = tokenizer(prompts, return_tensors="pt", padding=True, truncation=True).to(model.device)
            outputs = model.generate(
                **inputs,
                max_new_tokens=512
            )

        for i in range(batch_size):
            output_ids = outputs[i][len(inputs["input_ids"][i]):].tolist()
            try:
                index = len(output_ids) - output_ids[::-1].index(tokenizer.convert_tokens_to_ids("</think>"))
            except ValueError:
                index = 0
            content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")
            label = predict_label(content)
            all_labels.append(label)

        prompts.clear()
        rows.clear()
        torch.cuda.empty_cache()

if prompts:
    with torch.no_grad():
        inputs = tokenizer(prompts, return_tensors="pt", padding=True, truncation=True).to(model.device)
        outputs = model.generate(
            **inputs,
            max_new_tokens=512
        )

    for i in range(len(prompts)):
        output_ids = outputs[i][len(inputs["input_ids"][i]):].tolist()
        try:
            index = len(output_ids) - output_ids[::-1].index(tokenizer.convert_tokens_to_ids("</think>"))
        except ValueError:
            index = 0
        content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")
        label = predict_label(content)
        all_labels.append(label)

    torch.cuda.empty_cache()

end_time = time.time()
time_delta_seconds = end_time - start_time
print(f"Time delta (seconds): {time_delta_seconds}")
filename = f"{model_name}_time_log.txt"
with open(filename, "w") as f:
    f.write(f"Start Time: {start_time}\n")
    f.write(f"End Time: {end_time}\n")
    f.write(f"Time Delta (seconds): {time_delta_seconds}\n")
    
predictions_df["label"] = all_labels
output_file = "predictions_{model_name}.csv"
predictions_df.to_csv(output_file, index=False)
print(f"Saved predictions with labels to {output_file}")
