### Installation

In [None]:
%%capture
import os
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth vllm
else:
    # [NOTE] Do the below ONLY in Colab! Use [[pip install unsloth vllm]]
    !pip install --no-deps unsloth vllm

In [None]:
#@title Colab Extra Install { display-mode: "form" }
%%capture
import os
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth vllm
else:
    !pip install --no-deps unsloth vllm
    # [NOTE] Do the below ONLY in Colab! Use [[pip install unsloth vllm]]
    # Skip restarting message in Colab
    import sys, re, requests; modules = list(sys.modules.keys())
    for x in modules: sys.modules.pop(x) if "PIL" in x or "google" in x else None
    !pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl==0.15.2 triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf datasets huggingface_hub hf_transfer

    # vLLM requirements - vLLM breaks Colab due to reinstalling numpy
    f = requests.get("https://raw.githubusercontent.com/vllm-project/vllm/refs/heads/main/requirements/common.txt").content
    with open("vllm_requirements.txt", "wb") as file:
        file.write(re.sub(rb"(transformers|numpy|xformers)[^\n]{1,}\n", b"", f))
    !pip install -r vllm_requirements.txt

### Unsloth

In [None]:
from unsloth import FastLanguageModel, is_bfloat16_supported
import torch
max_seq_length = 5000 # Can increase for longer reasoning traces
lora_rank = 16

# Load saved base summarizer
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "/content/drive/Shareddrives/FYP 2024-2025/Phase-2/SummaryGen/models/phi-3.5-lora-r-16", # Base roundup generation model
    max_seq_length = max_seq_length,
    dtype = None,
    load_in_4bit = True,
    fast_inference = True, # change to False; issues with unsloth
    max_lora_rank = lora_rank,
    gpu_memory_utilization = 0.7,
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
INFO 03-20 18:48:42 [__init__.py:256] Automatically detected platform cuda.
==((====))==  Unsloth 2025.3.17: Fast Llama patching. Transformers: 4.49.0. vLLM: 0.8.1.
   \\   /|    NVIDIA A100-SXM4-40GB. Num GPUs = 1. Max memory: 39.557 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.0. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: vLLM loading unsloth/phi-3.5-mini-instruct-bnb-4bit with actual GPU utilization = 69.2%
Unsloth: Your GPU has CUDA compute capability 8.0 with VRAM = 39.56 GB.
Unsloth: Using conservativeness = 1.0. Chunked prefill tokens = 5000. Num Sequences = 320.
Unsloth: vLLM's KV Cache can use up to 24.84

Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


INFO 03-20 18:49:14 [punica_selector.py:18] Using PunicaWrapperGPU.
INFO 03-20 18:49:15 [model_runner.py:1146] Model loading took 2.1981 GB and 4.404944 seconds
INFO 03-20 18:49:18 [worker.py:267] Memory profiling takes 2.98 seconds
INFO 03-20 18:49:18 [worker.py:267] the current vLLM instance can use total_gpu_memory (39.56GiB) x gpu_memory_utilization (0.69) = 27.37GiB
INFO 03-20 18:49:18 [worker.py:267] model weights take 2.20GiB; non_torch_memory takes 0.09GiB; PyTorch activation peak memory takes 0.54GiB; the rest of the memory reserved for KV Cache is 24.54GiB.
INFO 03-20 18:49:19 [executor_base.py:111] # cuda blocks: 4188, # CPU blocks: 1024
INFO 03-20 18:49:19 [executor_base.py:116] Maximum concurrency for 5000 tokens per request: 13.40x
INFO 03-20 18:49:23 [model_runner.py:1442] Capturing cudagraphs for decoding. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI. If 

Capturing CUDA graph shapes: 100%|██████████| 43/43 [00:55<00:00,  1.28s/it]

INFO 03-20 18:50:18 [model_runner.py:1570] Graph capturing finished in 55 secs, took 1.02 GiB
INFO 03-20 18:50:18 [llm_engine.py:447] init engine (profile, create kv cache, warmup model) took 63.44 seconds



Unsloth 2025.3.17 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


### Data Preparation

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import json
import pandas as pd
from collections import defaultdict

input_file = "/content/drive/Shareddrives/FYP 2024-2025/Phase-2/AllSides/allsides_clean_sentences_quotes.jsonl"

# Load records
records = []
with open(input_file, "r", encoding="utf-8") as f:
    for line in f:
        records.append(json.loads(line.strip()))

# Convert to DataFrame
df = pd.DataFrame(records)
print("Size of df: ", df.shape)

Size of df:  (430, 7)


In [None]:
# prompt: create new_df , where columns are event, left_article, right_article, center_article, roundup

rows = []
for index, row in df.iterrows():
  event = row["issue"]
  left_article = row["news"].get("left", {}).get("newsContent", "")
  left_article = ' '.join(s for s in left_article)
  right_article = row["news"].get("right", {}).get("newsContent", "")
  right_article = ' '.join(s for s in right_article)
  center_article = row["news"].get("center", {}).get("newsContent", "")
  center_article = ' '.join(s for s in center_article)
  roundup = ' '.join(row["roundup"])
  rows.append([event, left_article, right_article, center_article, roundup])

df = pd.DataFrame(rows, columns=["event", "left_article", "right_article", "center_article", "roundup"])


In [None]:
df

Unnamed: 0,event,left_article,right_article,center_article,roundup
0,DOJ Rejects McCabe's Appeal to Avoid Charges,WASHINGTON — The Justice Department rejected a...,"Former FBI Deputy Director Andrew McCabe, a fr...",WASHINGTON – Federal prosecutors recommended s...,The Department of Justice rejected former FBI ...
1,Thousands Walk Off GM Jobs as UAW Strike Starts,General Motors workers across the country walk...,The United Auto Workers union went on strike a...,DETROIT — President Donald Trump on Sunday urg...,After negotiations for a new contract fell thr...
2,Pressley To Push For Kavanaugh Impeachment Inq...,WASHINGTON — House Judiciary Committee Chairma...,"Rep. Ayanna Pressley, D-Mass., one of the four...","WASHINGTON – Rep. Ayanna Pressley, D-Mass., in...",Representative Ayanna Pressley (D-MA) is plann...
3,Federal Reserve Cuts Interest Rates for Second...,The Federal Reserve cut its benchmark lending ...,Stocks gained after the Federal Reserve on Wed...,The Federal Reserve has cut its benchmark inte...,The Federal Reserve reduced its benchmark lend...
4,Whistleblower Alleges Trump Had Inappropriate ...,The whistleblower complaint that has triggered...,President Trump on Thursday ripped reports tha...,House intelligence committee Chairman Adam Sch...,A government watchdog alleges that President D...
...,...,...,...,...,...
425,Treasury Says $600 Coronavirus Payments Start ...,Treasury Secretary Steven Mnuchin said Tuesday...,The U.S. Treasury Secretary Steve Mnuchin said...,Stimulus money could be coming to your account...,Treasury Secretary Steven Mnuchin said Tuesday...
426,Gallup Names Donald Trump and Michelle Obama a...,President Donald Trump is the most admired man...,Former President Obama's 12-year reign is over...,Story Highlights 18% name Trump as most admire...,President Donald Trump and former first lady M...
427,House Overrides Trump's Defense Bill Veto,The House voted Monday to reject President Tru...,The House of Representatives dealt a blow to P...,House Rejects Trump's Defense Bill Veto In Hig...,The U.S. House voted 322-87 Monday to override...
428,New Coronavirus Variant Emerges Abroad,Frontline NHS staff have been denied the Pfize...,The United Kingdom is set to approve a COVID-1...,Coronavirus: Cases of new variant appear world...,A new and possibly more contagious version of ...


In [None]:
from sklearn.model_selection import train_test_split

# Assuming new_df is your DataFrame
train_df, temp_df = train_test_split(df, test_size=0.1, random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)
del temp_df

print(f"Train set size: {len(train_df)}")
print(f"Validation set size: {len(val_df)}")
print(f"Test set size: {len(test_df)}")


Train set size: 387
Validation set size: 21
Test set size: 22


In [None]:
from datasets import Dataset

train_dataset = Dataset.from_pandas(train_df, preserve_index=False)
test_dataset = Dataset.from_pandas(test_df, preserve_index=False)
val_dataset = Dataset.from_pandas(val_df, preserve_index=False)
del train_df, test_df, val_df


In [None]:
train_dataset

Dataset({
    features: ['event', 'left_article', 'right_article', 'center_article', 'roundup'],
    num_rows: 387
})

In [None]:
# system_message = "You are given an event headline and news articles from the left, right, and center perspectives. Your task is to summarize the event in three to four sentences, and mention how different the different perspectives covered the event, to do this, you can highlight what each perspective emphasized on. This summary must be factual and neutral without any bias."
system_message = "You are given an event headline and news articles from the left, right, and center perspectives. Your task is to summarize the main event in three to four sentences. This summary must be factual and neutral without any bias."
def format_data(data) :
    data = data.map(lambda x: { # type: ignore
        'prompt': [
            {'role': 'system', 'content': system_message},
            {'role': 'user', 'content':  f"### Event Headline\n{x['event']}\n"+f"### Left Article\n{x['left_article']}\n"+f"### Center Article\n{x['center_article']}\n"+f"### Right Article\n{x['right_article']}"}
          ],
        'roundup': x['roundup']
    }) # type: ignore
    data = data.remove_columns(['event', 'left_article', 'right_article', 'center_article'])
    return data # type: ignore

In [None]:
train_dataset = format_data(train_dataset)
test_dataset = format_data(test_dataset)
val_dataset = format_data(val_dataset)

Map:   0%|          | 0/387 [00:00<?, ? examples/s]

Map:   0%|          | 0/22 [00:00<?, ? examples/s]

Map:   0%|          | 0/21 [00:00<?, ? examples/s]

In [None]:
# Reward Functions
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
content_pres_path = "/content/drive/Shareddrives/FYP 2024-2025/Phase-2/Content-Preservation/modernBERT_stsb_finetuned"
polarity_min_path = "/content/drive/Shareddrives/FYP 2024-2025/Phase-2/Polarity-Minimization/modernBERT_polaritydet_finetuned"

content_pres_tokenizer = AutoTokenizer.from_pretrained(content_pres_path)
content_pres_model = AutoModelForSequenceClassification.from_pretrained(content_pres_path)
content_pres_model.to(device)
content_pres_model.eval()

polarity_min_tokenizer = AutoTokenizer.from_pretrained(polarity_min_path)
polarity_min_model = AutoModelForSequenceClassification.from_pretrained(polarity_min_path)
polarity_min_model.to(device)
polarity_min_model.eval()


ModernBertForSequenceClassification(
  (model): ModernBertModel(
    (embeddings): ModernBertEmbeddings(
      (tok_embeddings): Embedding(50368, 768, padding_idx=50283)
      (norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (drop): Dropout(p=0.0, inplace=False)
    )
    (layers): ModuleList(
      (0): ModernBertEncoderLayer(
        (attn_norm): Identity()
        (attn): ModernBertAttention(
          (Wqkv): Linear(in_features=768, out_features=2304, bias=False)
          (rotary_emb): ModernBertRotaryEmbedding()
          (Wo): Linear(in_features=768, out_features=768, bias=False)
          (out_drop): Identity()
        )
        (mlp_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): ModernBertMLP(
          (Wi): Linear(in_features=768, out_features=2304, bias=False)
          (act): GELUActivation()
          (drop): Dropout(p=0.0, inplace=False)
          (Wo): Linear(in_features=1152, out_features=768, bias=False)
        )
      

In [None]:
def polarity_reward_func(prompts, completions, roundup, **kwargs) -> list[float]:
    """
    Calculate polarity minimization reward.
    Reward is higher when polarity of roundup is closer to average polarity of input articles.
    """
    rewards = []

    for r, prompt, completion in zip(roundup, prompts, completions):
        # Extract articles from prompt
        prompt_content = prompt[-1]['content']

        # Parse the articles from the prompt content
        sections = prompt_content.split('###')
        left_article = sections[2].replace('Left Article', '').strip()
        center_article = sections[3].replace('Center Article', '').strip()
        right_article = sections[4].replace('Right Article', '').strip()

        # Extract the generated roundup
        gen_roundup = completion[0]['content']

        # Calculate polarity scores (0-1 where 1 is conservative)
        with torch.no_grad():
            left_polarity = get_polarity_score(polarity_min_model, left_article)
            center_polarity = get_polarity_score(polarity_min_model, center_article)
            right_polarity = get_polarity_score(polarity_min_model, right_article)
            roundup_polarity = get_polarity_score(polarity_min_model, gen_roundup)

        # Calculate average polarity of input articles
        avg_polarity = (left_polarity + center_polarity + right_polarity) / 3

        # # Calculate polarity distance (lower is better)
        polarity_distance = abs(roundup_polarity - avg_polarity)

        # Convert distance to reward (invert so smaller distance = higher reward)
        # Normalize to 0-1 range where 1 is perfect match
        polarity_reward = 1 - roundup_polarity

        rewards.append(polarity_reward)

    return rewards

def content_preservation_reward_func(prompts, completions, roundup, **kwargs) -> list[float]:
    """
    Calculate content preservation reward.
    Reward is higher when roundup preserves content from all three perspectives.
    """
    rewards = []

    for r, prompt, completion in zip(roundup, prompts, completions):
        # Extract articles from prompt
        prompt_content = prompt[-1]['content']

        # Parse the articles from the prompt content
        sections = prompt_content.split('###')
        left_article = sections[2].replace('Left Article', '').strip()
        center_article = sections[3].replace('Center Article', '').strip()
        right_article = sections[4].replace('Right Article', '').strip()

        # Extract the generated roundup
        gen_roundup = completion[0]['content']

        # Calculate similarity scores (normalized to 0-1)
        with torch.no_grad():
            left_sim = get_similarity_score(content_pres_model, gen_roundup, left_article) / 5.0
            center_sim = get_similarity_score(content_pres_model, gen_roundup, center_article) / 5.0
            right_sim = get_similarity_score(content_pres_model, gen_roundup, right_article) / 5.0

        # Calculate average similarity (higher is better)
        avg_similarity = (left_sim + center_sim + right_sim) / 3

        rewards.append(avg_similarity)

    return rewards

# Helper functions for the reward models
def get_polarity_score(model, text):
    """
    Get polarity score from the polarity model.
    Returns probability of conservative class (0-1).
    """
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=768)
    inputs.to("cuda")
    outputs = model(**inputs)
    del inputs
    probabilities = torch.softmax(outputs.logits, dim=-1)
    conservative_prob = probabilities[0, 1].item()
    return conservative_prob

def get_similarity_score(model, text1, text2):
    """
    Get similarity score from the similarity model.
    Returns similarity score (0-5).
    """
    inputs = tokenizer(text1, text2, return_tensors="pt", truncation=True, max_length=768)
    inputs.to("cuda")
    outputs = model(**inputs)
    del inputs
    similarity_score = outputs.logits[0].item()
    return similarity_score

<a name="Train"></a>
### Train the model

Now set up GRPO Trainer and all configurations!

In [None]:
from trl import GRPOConfig, GRPOTrainer

# training for 1 epoch: one complete run of the model
training_args = GRPOConfig(
    use_vllm = True,
    learning_rate = 5e-6,
    adam_beta1 = 0.9,
    adam_beta2 = 0.99,
    weight_decay = 0.1,
    warmup_ratio = 0.1,
    lr_scheduler_type = "cosine",
    optim = "paged_adamw_8bit",
    logging_steps = 1,
    bf16 = is_bfloat16_supported(),
    fp16 = not is_bfloat16_supported(),
    per_device_train_batch_size = 4,
    gradient_accumulation_steps = 4,  # Increased for smoother training
    num_generations = 4,
    max_prompt_length = 5000,  # Increased for longer news articles
    max_completion_length = 256,  # Adjusted for 3-4 sentence summaries
    num_train_epochs=1,
    # max_steps=60,
    # save_steps = 250,
    max_grad_norm = 0.1,
    report_to = "none",
    output_dir = "outputs/neutral_roundup_model"
)

In [None]:
trainer = GRPOTrainer(
    model = model,
    processing_class = tokenizer,
    reward_funcs = [
        polarity_reward_func,
        content_preservation_reward_func,
    ],
    args = training_args,
    train_dataset = train_dataset,
    # eval_dataset = val_dataset
)
trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 387 | Num Epochs = 1 | Total steps = 96
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 4 x 1) = 16
 "-____-"     Trainable parameters = 17,301,504/4,000,000,000 (0.43% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss,reward,reward_std,completion_length,kl,rewards / polarity_reward_func,rewards / content_preservation_reward_func
1,0.0,1.516079,0.06454,209.375,0.0,0.9519,0.564179
2,0.0,1.530125,0.070046,194.5625,0.0,0.952443,0.577683
3,0.0003,1.482027,0.052191,202.5625,0.00872,0.9545,0.527527
4,0.0002,1.533092,0.048772,175.9375,0.00499,0.939178,0.593913
5,0.0004,1.491464,0.047249,208.6875,0.010016,0.946904,0.54456
6,0.0007,1.552894,0.047764,205.0,0.017233,0.954812,0.598083
7,0.0003,1.554484,0.038595,183.3125,0.006664,0.948877,0.605607
8,0.0005,1.556058,0.059033,212.375,0.011705,0.95166,0.604399
9,0.0003,1.528792,0.072077,179.25,0.007955,0.947383,0.58141
10,0.0007,1.522261,0.079183,184.75,0.017857,0.955082,0.567179


TrainOutput(global_step=96, training_loss=0.0005056499713838036, metrics={'train_runtime': 4087.4089, 'train_samples_per_second': 0.095, 'train_steps_per_second': 0.023, 'total_flos': 0.0, 'train_loss': 0.0005056499713838036})

And now with the LoRA we just trained with GRPO - we first save the LoRA first!

In [None]:
model.save_lora("grpo_saved_lora")

### Inference

Now we load the LoRA and test:

In [None]:
print(test_dataset['prompt'][3][1]['content'])

### Event Headline
December 2017 Jobs Report
### Left Article
The U.S. economy added 148,000 jobs in December after a year of steady hiring, missing expectations for a larger last-minute surge, the government reported Friday. The unemployment rate stayed at 4.1 percent last month, the lowest point since 2000. Wages continued their slow climb, rising by 9 cents to $26.63. That’s a 2.5 percent increase since December 2016 (and still below pre-recession levels). President Trump's first year in the White House brought healthy growth and 2.1 million new jobs, a slight drop from 2.2 million positions created during Obama's last year in office, government estimates showed Friday. The average number of jobs employers added each month in 2017 was 173,000, compared to the previous year’s 187,000. One dark spot was retail, which lost 20,000 jobs in December, according to the Bureau of Labor Statistics. About 67,000 positions vanished from stores in 2017, a sharp reversal from the 203,000 jobs tha

In [None]:
text = tokenizer.apply_chat_template(test_dataset['prompt'][3], tokenize = False, add_generation_prompt = True)

from vllm import SamplingParams
sampling_params = SamplingParams(
    temperature = 0.8,
    top_p = 0.95,
    max_tokens = 1024,
)
output = model.fast_generate(
    text,
    sampling_params = sampling_params,
    lora_request = model.load_lora("grpo_saved_lora"),
)[0].outputs[0].text

output

Processed prompts: 100%|██████████| 1/1 [00:03<00:00,  3.22s/it, est. speed input: 911.24 toks/s, output: 67.66 toks/s]


" In December 2017, the U.S. economy added 148,000 jobs, with the unemployment rate remaining steady at 4.1%. Average hourly wages increased by 9 cents, resulting in a 2.5% increase over 2017. While the number of jobs added was lower than predicted, there are six million open positions in the U.S. and 6.6 million people unemployed, indicating potential for continued strong hiring in 2018. Employers are finding it challenging to fill positions due to competitive labor markets, retiring baby boomers, and drug abuse issues, leading to increased wages to attract workers. The event of December 2017's Jobs Report highlighted the tight labor market, with a low unemployment rate and high demand for workers. Despite some regional struggles, overall, the labor market appears poised for continued growth in 2018.<|end|>"

In [None]:
print(test_dataset['prompt'][19][1]['content'])

### Event Headline
President Trump Criticizes Social Media Companies for "Censorship" and "Discrimination" Against the Right
### Left Article
“Let everybody participate, good and bad, and we will all just have to figure it out,” the president tweeted Saturday morning. President Donald Trump used Twitter to tear into social media companies Saturday morning, claiming they were “totally discriminating against Republican/Conservative voices.” But he does have a solution. Sort of. And it involves both sides. Trump tweeted that “too many voices are being destroyed, some good and some bad, and that cannot be allowed to happen.” So, he added: “Let everybody participate, good & bad, and we will all just have to figure it out!” Check out his tweets here: Social Media is totally discriminating against Republican/Conservative voices. Speaking loudly and clearly for the Trump Administration, we won’t let that happen. They are closing down the opinions of many people on the RIGHT, while at the same 

In [None]:
text = tokenizer.apply_chat_template(test_dataset['prompt'][19], tokenize = False, add_generation_prompt = True)

from vllm import SamplingParams
sampling_params = SamplingParams(
    temperature = 0.8,
    top_p = 0.95,
    max_tokens = 1024,
)
output = model.fast_generate(
    text,
    sampling_params = sampling_params,
    lora_request = model.load_lora("grpo_saved_lora"),
)[0].outputs[0].text

output

Processed prompts: 100%|██████████| 1/1 [00:02<00:00,  2.80s/it, est. speed input: 887.80 toks/s, output: 67.55 toks/s]


' President Donald Trump criticized social media companies for allegedly discriminating against conservative voices, claiming they are "totally discriminating against Republican/Conservative voices" and accused them of "closing down the opinions of many people on the RIGHT." He warned against censorship, which he considers "a very dangerous thing & absolutely impossible to police," while acknowledging that he himself sometimes consumes content from what he views as "fake" media outlets like CNN and MSNBC. Trump\'s comments came amidst several high-profile removals of conservative figures from social media platforms, with concerns that such actions constitute a form of "shadowbanning" that suppresses right-leaning viewpoints. The president did not specify any particular actions by the government to address these concerns, leaving the remedy to the issue of perceived bias on social media platforms unclear.<|end|>'

In [None]:
print(test_dataset['prompt'][1][1]['content'])

### Event Headline
Latest on Berlin Attack
### Left Article
BERLIN — For a Germany that likes to see itself as meticulous, the slip-up was startling: Hours after the authorities said they had grabbed a suspect in the deadly truck rampage at a Christmas market in Berlin, they acknowledged they may have detained the wrong man and began a desperate search for the actual driver. The decision on Tuesday to release the suspect and the Islamic State’s claim of responsibility for the bloody attack inflicted a damaging blow to Chancellor Angela Merkel’s government. On a chaotic day of grief and uncertainty, after 12 people were killed and dozens injured on Monday, leaders of the country’s rising far-right Alternative for Germany party assailed the chancellor in blunt, visceral terms, describing the victims as “Merkel’s dead.” The attack, which saw a driver steer a speeding tractor-trailer through crowds of shoppers at a popular Christmas market in central Berlin, is already reshaping what promi

In [None]:
text = tokenizer.apply_chat_template(test_dataset['prompt'][1], tokenize = False, add_generation_prompt = True)

from vllm import SamplingParams
sampling_params = SamplingParams(
    temperature = 0.8,
    top_p = 0.95,
    max_tokens = 1024,
)
output = model.fast_generate(
    text,
    sampling_params = sampling_params,
    lora_request = model.load_lora("grpo_saved_lora"),
)[0].outputs[0].text

output

Processed prompts: 100%|██████████| 1/1 [00:01<00:00,  1.90s/it, est. speed input: 1048.23 toks/s, output: 66.30 toks/s]


' A truck attack at a Berlin Christmas market killed 12 people and injured over 50, with the Islamic State claiming responsibility. German authorities admitted the initial suspect may not have been the actual perpetrator, as no forensic evidence linked the Pakistani citizen in custody to the crime. The method of attack mirrored those used by ISIS in other attacks. The uncertainty surrounding the true identity of the attacker and the potential threat it poses has increased pressure on Chancellor Angela Merkel, who faces criticism over her decision to accept large numbers of refugees.<|end|>'

In [None]:
print(test_dataset['prompt'][0][1]['content'])

### Event Headline
Statue of Liberty poem comment prompts criticism, misleading coverage
### Left Article
Trump’s Acting Director of Citizenship and Immigration Services, Ken Cuccinelli, attempts to rewrite the meaning of the Emma Lazarus poem on the Statue of Liberty, “Give me your tired, your poor, Your huddled masses yearning to breathe free” into something less welcoming. Lawrence O’Donnell explains.
### Center Article
Trump official revises Statue of Liberty poem to defend migrant rule change 14 August 2019 A top US immigration official has revised a quote inscribed on the Statue of Liberty in defence of a new policy that denies food aid to legal migrants. The head of Citizenship and Immigration Services tweaked the passage: "Give me your tired, your poor, your huddled masses yearning to breathe free". The official added the words "who can stand on their own two feet and who will not become a public charge". He later said the poem had referred to "people coming from Europe". Ken C

In [None]:
text = tokenizer.apply_chat_template(test_dataset['prompt'][0], tokenize = False, add_generation_prompt = True)

from vllm import SamplingParams
sampling_params = SamplingParams(
    temperature = 0.8,
    top_p = 0.95,
    max_tokens = 1024,
)
output = model.fast_generate(
    text,
    sampling_params = sampling_params,
    lora_request = model.load_lora("grpo_saved_lora"),
)[0].outputs[0].text

output

Processed prompts: 100%|██████████| 1/1 [00:02<00:00,  2.91s/it, est. speed input: 807.06 toks/s, output: 68.37 toks/s]


' The Acting Director of Citizenship and Immigration Services, Ken Cuccinelli, has revised the Emma Lazarus poem on the Statue of Liberty to reflect a more self-sufficient immigration ethos, as part of the Trump administration\'s new "public charge" rule. This rule, set to take effect on October 15, will deny green cards to immigrants who are likely to rely on public benefits like Medicaid and housing vouchers. Critics argue it will primarily affect low-income immigrants, while Cuccinelli and the administration maintain the policy is not targeted at any particular group. The controversy has sparked discussions about the true spirit of America\'s immigration laws and the ideal of welcoming those made poor by oppressive regimes, with some expressing concerns that the changes undermine the Statue of Liberty\'s welcoming message.<|end|>'

### Inference on Test

In [None]:
model_outputs, target_roundups = [], []
for record in test_dataset:
  # print(record['prompt'])
  text = tokenizer.apply_chat_template(record['prompt'], tokenize = False, add_generation_prompt = True)
  output = model.fast_generate(
    text,
    sampling_params = sampling_params,
    lora_request = model.load_lora("grpo_saved_lora"),
  )[0].outputs[0].text
  output = output.split('<|end|>')[0]
  output = output.replace("\n", '')
  output = output.strip()
  target_roundups.append(record['roundup'])
  model_outputs.append(output)

results_df = pd.DataFrame({'model_output': model_outputs, 'target_roundup': target_roundups})
results_df.to_excel('/content/drive/Shareddrives/FYP 2024-2025/Phase-2/model-evaluation/grpo_phi_3.5_apr_4.xlsx', index=False)

Processed prompts: 100%|██████████| 1/1 [00:02<00:00,  2.76s/it, est. speed input: 850.98 toks/s, output: 67.38 toks/s]
Processed prompts: 100%|██████████| 1/1 [00:02<00:00,  2.61s/it, est. speed input: 764.82 toks/s, output: 69.49 toks/s]
Processed prompts: 100%|██████████| 1/1 [00:03<00:00,  3.80s/it, est. speed input: 715.13 toks/s, output: 69.46 toks/s]
Processed prompts: 100%|██████████| 1/1 [00:02<00:00,  2.43s/it, est. speed input: 1208.59 toks/s, output: 66.69 toks/s]
Processed prompts: 100%|██████████| 1/1 [00:02<00:00,  2.77s/it, est. speed input: 600.89 toks/s, output: 70.69 toks/s]
Processed prompts: 100%|██████████| 1/1 [00:04<00:00,  4.44s/it, est. speed input: 715.83 toks/s, output: 68.86 toks/s]
Processed prompts: 100%|██████████| 1/1 [00:03<00:00,  3.29s/it, est. speed input: 812.15 toks/s, output: 69.07 toks/s]
Processed prompts: 100%|██████████| 1/1 [00:02<00:00,  2.40s/it, est. speed input: 774.49 toks/s, output: 70.03 toks/s]
Processed prompts: 100%|██████████| 1/1

### Save models

In [None]:
model.save_lora('/content/drive/Shareddrives/FYP 2024-2025/Phase-2/SummaryGen/models/phi-3.5-grpo-1-epoch-lora')

In [None]:
model.save_pretrained('/content/drive/Shareddrives/FYP 2024-2025/Phase-2/SummaryGen/models/phi-3.5-grpo-1-epoch')
tokenizer.save_pretrained('/content/drive/Shareddrives/FYP 2024-2025/Phase-2/SummaryGen/models/phi-3.5-grpo-1-epoch')

('/content/drive/Shareddrives/FYP 2024-2025/Phase-2/SummaryGen/models/phi-3.5-grpo-1-epoch/tokenizer_config.json',
 '/content/drive/Shareddrives/FYP 2024-2025/Phase-2/SummaryGen/models/phi-3.5-grpo-1-epoch/special_tokens_map.json',
 '/content/drive/Shareddrives/FYP 2024-2025/Phase-2/SummaryGen/models/phi-3.5-grpo-1-epoch/tokenizer.model',
 '/content/drive/Shareddrives/FYP 2024-2025/Phase-2/SummaryGen/models/phi-3.5-grpo-1-epoch/added_tokens.json',
 '/content/drive/Shareddrives/FYP 2024-2025/Phase-2/SummaryGen/models/phi-3.5-grpo-1-epoch/tokenizer.json')