# Setup

In [None]:
import os
# Sent Environment Variables
print("Environment variables set successfully!")

In [None]:
import os
import numpy as np
from collections import Counter
import random

from transformers import  AutoTokenizer
from datasets import load_dataset, Dataset
from utils import process_dataset, MODELS, apply_chat_template

# CURRENT_MODELS = ["Qwen/Qwen3-8B",  "meta-llama/Llama-3.1-8B-Instruct", "Qwen/Qwen3-14B"]
QWEN_MODEL_NAMES = ["Qwen/Qwen3-8B"] #, "Qwen/Qwen3-32B"]
LLAMA_MODEL_NAMES = ["meta-llama/Llama-3.1-8B-Instruct"] #, "meta-llama/Llama-3.1-70B-Instruct"]
MODELS = QWEN_MODEL_NAMES + LLAMA_MODEL_NAMES 
TASKS = ["task_elections", "task_sales" , "task_sm"]
TASKS = [ "task_elections",]
SPLITS = ["train"]

# Prepare Second Stage Data

In [None]:
ds_names = ["task_elections", "task_sales" , "task_sm"]
splits = ["train" , "test"]

datasets = {ds_name: {split: load_dataset("json", data_files=f"data/{ds_name}/{split}.json")['train'] for split in splits} for ds_name in ds_names}

In [None]:
def get_rft(dataset):
    rft_dataset = []
    for idx in range(len(dataset)):
        curr_votes = dataset[idx]["voter_votes"]
        vote_count = Counter(curr_votes)
        vote_count_diff = vote_count[0] - vote_count[1]
        if vote_count_diff > 0:
            filtered_entry = {k: v[0] for k, v in dataset[idx].items() if k in ['prompt', 'completion']}
        elif vote_count_diff < 0:
            filtered_entry = {k: v[1] for k, v in dataset[idx].items() if k in ['prompt', 'completion']}
        else:
            pass
        rft_dataset.append(filtered_entry)
    return rft_dataset

In [None]:
XML_INSTRUCTION_THINK = (
    "You are a helpful assistant. "
    "When you reply, enclose your thoughts inside <think> … </think>. Do not output anything else."
)

# Task Prompts
def process_task_elections(s, candidates, tokenizer):
    candidate_webname = s["candidate_webname"]
    cand_party = s["cand_party"] # not used
    biography_text = s["biography_text"]

    candidates_content = "\n\n".join([ f"## Option {idx+1} (A or B): {c.strip()}"  for idx , c in enumerate(candidates)])

    prompt_content = (
        "# Instructions\n"
        "Carefully review the candidate profile and the candidate's campaign statements."
        "You are participating in an online election and you read the two campaign statements."
        "You will eventually choose one of them."
        "Considering your character, express your thoughts about the campaign statements.\n\n"
        f"# Candidate Name\n{candidate_webname}\n\n"
        f"# Candidate Bio\n{biography_text}\n\n"
        f"# Campaign Statements\n{candidates_content}\n\n"
        f"# Formatting Instructions\n{XML_INSTRUCTION_THINK}\n"
    )
    return apply_chat_template(tokenizer, prompt_content)

def process_task_sales(s, candidates, tokenizer):
    categories = s["categories"]
    title = s["title"]
    description = s["description"]

    candidates_content = "\n\n".join([ f"## Option {idx+1} (A or B): {c.strip()}"  for idx , c in enumerate(candidates)])

    prompt_content = (
        "# Instructions\n"
        "Carefully review the product information and the product's sales pitches." 
        "You are shopping at an online store and come across the two sales pitches. "
        "You will eventually choose one of them."
        "Considering your character, express your thoughts about the sales pitches.\n\n"
        f"# Product Title\n{title}\n\n"
        f"# Product Categories\n{", ".join(categories)}\n\n"
        f"# Product Description\n{description}\n\n"
        f"# Sales Pitches\n{candidates_content}\n\n"
        f"# Formatting Instructions\n{XML_INSTRUCTION_THINK}\n"
    )
    return apply_chat_template(tokenizer, prompt_content)

def process_task_sm(s, candidates, tokenizer):
    article = s["article"]

    candidates_content = "\n\n".join([ f"## Option {idx+1} (A or B): {c.strip()}"  for idx , c in enumerate(candidates)])

    prompt_content = (
        "# Instructions\n"
        "Carefully review the news article and the social media posts related to the article."
        "You are scrolling through your social media feed and see the two posts. "
        "You will eventually choose one of them."
        "Considering your character, express your thoughts about the social media posts.\n\n"
        f"# Article\n{article}\n\n"
        f"# Social Media Posts\n{candidates_content}\n\n"
        f"# Formatting Instructions\n{XML_INSTRUCTION_THINK}\n"
    )
    return apply_chat_template(tokenizer, prompt_content)

def process_candidates(s, candidates, tokenizer, task):
    if task == "task_elections":
        return process_task_elections(s, candidates, tokenizer)
    elif task == "task_sales":
        return process_task_sales(s, candidates, tokenizer)
    elif task == "task_sm":
        return process_task_sm(s, candidates, tokenizer)
    else:
        raise NotImplementedError(f"Dataset processing not implemented for: {task}")


def get_tfb(dataset, dataset_base, Tokenizer, task):
    tfb_data = []

    for idx in range(len(dataset)):
        curr_candidates = dataset[idx]["player_candidates"] # feature
        curr_thinks = dataset[idx]["voter_thinks"] # target
        
        example = dataset_base[idx]
        prompt = process_candidates(example, curr_candidates, Tokenizer, task)

        completions =  [f"<think> {think.strip()} </think>" for think in curr_thinks if think]
        for completion in completions:
            tfb_data.append({"prompt": prompt, "completion": completion})
    
    return tfb_data

# In comparing these two options, what is a potential response a user can give?

In [None]:
ds_names = ["task_elections", "task_sales" , "task_sm"] 
splits = ["train" , "test"]

# datasets = {ds_name: {split: load_dataset("json", data_files=f"data/{ds_name}/{split}.json")['train'] for split in splits} for ds_name in ds_names}
# from utils2 import get_rft, get_tfb

results_path_root = "data"
ds_paths = {}

for task in TASKS:
    for split in SPLITS:
        for model_name in MODELS:
            results_path  = os.path.join(results_path_root,  task, model_name,  f"{split}_step1.json")
            dataset = load_dataset("json", data_files=results_path)["train"]
            Tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
            dataset_base = datasets[task][split]

            ### RFT ###
            rft_ds_list = get_rft(dataset)
            rft_ds_list = rft_ds_list * 3
            random.shuffle(rft_ds_list)
            rft_ds = Dataset.from_list(rft_ds_list)

            rft_path  = os.path.join(results_path_root,  task, model_name,  f"{split}_rft.json")
            if os.path.exists(rft_path):
                os.remove(rft_path)
            rft_ds.to_json(rft_path)

            ### TFB ###
            tfb_ds = get_tfb(dataset, dataset_base, Tokenizer, task=task)
            random.shuffle(tfb_ds)
            tfb_ds = tfb_ds[:len(rft_ds_list)] + rft_ds_list
            tfb_ds = Dataset.from_list(tfb_ds)

            tfb_path  = os.path.join(results_path_root,  task, model_name,  f"{split}_tfb.json")
            if os.path.exists(tfb_path):
                os.remove(tfb_path)
            tfb_ds.to_json(tfb_path)