In [None]:
!pip install transformers
!pip install wandb
!pip install trl
!pip install pandas
!pip install datasets
!pip install accelerate
!pip install tyro
!pip install nltk -U

Collecting wandb
  Downloading wandb-0.16.5-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
Collecting GitPython!=3.1.29,>=1.0.0 (from wandb)
  Downloading GitPython-3.1.42-py3-none-any.whl (195 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m195.4/195.4 kB[0m [31m16.5 MB/s[0m eta [36m0:00:00[0m
Collecting sentry-sdk>=1.0.0 (from wandb)
  Downloading sentry_sdk-1.43.0-py2.py3-none-any.whl (264 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m264.6/264.6 kB[0m [31m15.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting docker-pycreds>=0.4.0 (from wandb)
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting setproctitle (from wandb)
  Downloading setproctitle-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30 kB)
Collecting gitdb<5,>=4.0.1 (from GitPython!=3.1.29,>=1.0.0->w

In [None]:

import torch
from tqdm import tqdm
import pandas as pd
import wandb
import os

tqdm.pandas()

from transformers import pipeline, AutoTokenizer
from datasets import load_dataset

from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead
from trl.core import LengthSampler


In [None]:

config = PPOConfig(
    model_name    = "gpt2",
    learning_rate = 1.41e-5,
    ## log_with      = "wandb",
)

sent_kwargs = {
         "return_all_scores": True,
         "function_to_apply": "none",
         "batch_size": 16
}


In [None]:

## wandb.init()

wandb.init(mode="disabled")
os.environ['WANDB_DISABLED'] = 'true'



## Loading NEWS BIAS dataset

This dataset encompasses multiple dimensions of biases in news media, such as political inclinations, hate speech, toxicity, sexism, ageism, and more, establishing its distinctiveness in the realm of similar datasets. It's noteworthy that the dataset explicitly refrains from including any personally identifiable information (PII).

In [None]:
import pandas as pd

def truncate_and_save_dataset(original_file_path, truncated_file_path, percentage=0.1):
    # Load the original dataset
    df = pd.read_csv(original_file_path)

    # Keep only specified columns
    df = df[['text','label']]

    # Calculate the number of rows to keep
    nrows_to_keep = int(len(df) * percentage)

    # Truncate the dataset
    truncated_df = df.sample(n=nrows_to_keep, random_state=1)

    # Save the truncated dataset to a new CSV file
    truncated_df.to_csv(truncated_file_path, index=False)
    print(f"Truncated dataset saved to {truncated_file_path}")

# Specify your original and new file paths
train_original_file_path = 'train_dataset.csv'
train_truncated_file_path = 'trunc_train_dataset.csv'

test_original_file_path = 'test_dataset.csv'
test_truncated_file_path = 'trunc_test_dataset.csv'

# Truncate and save the datasets
truncate_and_save_dataset(train_original_file_path, train_truncated_file_path, percentage=0.1)
truncate_and_save_dataset(test_original_file_path, test_truncated_file_path, percentage=0.1)


Truncated dataset saved to trunc_train_dataset.csv
Truncated dataset saved to trunc_test_dataset.csv


In [None]:
ds = load_dataset('csv', data_files='trunc_train_dataset.csv', split='train')

Generating train split: 0 examples [00:00, ? examples/s]


## Visualize details of dataset


In [None]:

ds


Dataset({
    features: ['text', 'label'],
    num_rows: 293994
})

In [None]:

ds[15:18]


{'text': ['Trudeau performed beyond expectations??!  The bar must have been set very low.  The Intellectual Midget has been an embarassment to Canadians',
  'wondering why no one answers my tweets ',
  'Progress has been made DESPITE the resistance of many (white) Americans. It will not take everyone because not everyone will join in getting rid of racism since benefits are derived from certain groups of people being denied rights and privileges. For example, many rural whites work in the prison industry. They have a material stake in maintaining the disproportionate  incarceration rates of non-whites in general and Blacks in particular. \\n"All lives matter" is a truism and therefore pointless as has been aptly pointed out by many above. \\nYou are entirely incorrect that EVERYONE will be necessary to effectuate change. It\'s never been EVERYONE and change will come despite not EVERYONE participating toward that goal. Certain types of white people have and will continue to hinder such

In [None]:

from datasets import ClassLabel
import random
import pandas as pd
from IPython.display import display, HTML


In [None]:

def show_random_elements(dataset, num_examples=20):
    assert num_examples <= len(dataset), "Can't pick more elements than there are in the dataset."

    picks = []

    for _ in range( num_examples ):

        pick = random.randint(0, len(dataset)-1)
        while pick in picks:
            pick = random.randint(0, len(dataset)-1)
        picks.append(pick)

    df = pd.DataFrame( dataset[picks] )        ## indexing 10 picks

    print(df)
    print(dataset.features.items())

    for column, typ in dataset.features.items():
        print(column)
        print(typ)
        print(ClassLabel)
        ## The isinstance() function returns True if the specified object
        ## is of the specified type, otherwise False
        if isinstance(typ, ClassLabel):
            print("Hello")
            df[column] = df[column].transform(lambda i: typ.names[i])
            ## print(typ.names[i])

    display(HTML(df.to_html()))


In [None]:

show_random_elements(ds)


                                                 text            label
0   Apology \n\nA message from my talk page:\nI ca...    Highly Biased
1   Did you read his book, "Shaking Hands with the...    Highly Biased
2   Alex Kaloi played as a freshman and started fo...          Neutral
3   @DerrickWayneJr because no one was on ! i was ...  Slightly Biased
4   Facebook revealed on Sept. 6 that it had found...  Slightly Biased
5   @MisKoChai am I doing this right!?!? I luuvvv ...          Neutral
6   I know from my research that the wind industry...    Highly Biased
7   theDonald must be a  Democrat, then?\nA Liar, ...    Highly Biased
8   Trudeau should have clarified:  the national i...          Neutral
9   On Mr. Lisi's letter  (that IS the subject, ri...    Highly Biased
10  @thatglamchick ok.... tx  it's so worth the wa...          Neutral
11  @FuchsiaStiletto nah, i didnt get no easter eg...  Slightly Biased
12  watching dieguito right now, baby fabian is st...          Neutral
13    

Unnamed: 0,text,label
0,"Apology \n\nA message from my talk page:\nI can see now. I was placing my personal feelings on a golden pedestal and making them the driver of my outbursts. Hell, i gnored the note of civility on the top of this very talk page. I know my actions were unwarranted, uncalled for and extremely immature. Lashing out won'y solve anything. This could be the insomnia talking, but i can see bright as day i'm in the wrong. And i regret my actions.\n\nI apology, Hodson, for my truly unacceptable conduct. I allowed the hurt i felt months ago come back as bitter anger and made me blind to the real problem. I only hope, that, though you only see me as some 17-year old, you could see me as a 17-year old who wants to try and do all he can for Wikipedia. Will we be able to wipe the slate clean and move past this despicable part in our lives, and maybe emerge from this as collaborators, not squabblers. 12:51 16 January 2012 (UTC)",Highly Biased
1,"Did you read his book, ""Shaking Hands with the Devil""? Dallaire tried everything in his power to get the UN's attention, but he was turned down at every stop. Even getting paper clips was a bureaucratic nightmare for him and his staff, let alone getting deployment of peacekeeping forces and strategic support. Dallaire should not be blamed for the United Nations' total ineptitude. Their actions (or lack thereof) during the Rwanda genocide were shameful and a total travesty.",Highly Biased
2,Alex Kaloi played as a freshman and started for Leleihua !,Neutral
3,@DerrickWayneJr because no one was on ! i was A L O N E !,Slightly Biased
4,"Facebook revealed on Sept. 6 that it had found 470 pages and profiles linked to the Russian troll farm known as the Internet Research Agency.\nFacebook on Monday turned over more than 3,000 of the Russia-linked advertisements from its site over to the Senate and House intelligence committees.\n\nThe ads and accounts were created to push false information and lies in order to amplify divisive political issues across the political spectrum, including gun rights, gay rights issues and the Black Lives Matter movement.",Slightly Biased
5,@MisKoChai am I doing this right!?!? I luuvvv u ms 22 year old,Neutral
6,"I know from my research that the wind industry's mortality studies have been non scientific since 1985. So how do you even know where to start? Do you want to reduce turbine bat mortality down from 200-300 per year or down from 20 per year as the industry would like you to believe? You also hit the root of this entire problem....""The wind industry voluntarily studies and mitigates for wildlife impacts"". The fact is they have no accountability and what they do is Voluntary. That is why thousands of eagles and millions of birds being killed by this industry are not accounted for. The Denver Eagle repository has received over 33,000 eagle carcasses since 1997 and the origin/source of all the eagles is an Interior Department top secret. Yet when this facility was opened the FWS admitted that wind turbines were a primary source of their dead eagles. Explain to all readers how any mitigation for impacts can ever be fair and honest when fake research has been hiding the truth.",Highly Biased
7,"theDonald must be a Democrat, then?\nA Liar, Deceiver, and a Sex Fiend too?",Highly Biased
8,"Trudeau should have clarified: the national interest of Canada, not Ukraine.",Neutral
9,"On Mr. Lisi's letter (that IS the subject, right?) we have a good example, as we went ""down this slippery slope of denaming"" a few years ago. In 2002 Jeff Grayson's name was taken off ""Grayson Hall"" and it became the safe ""McKenzie Hall."" Seems this very wealthy financier and UO fundraiser had given the UO around a million dollars which had been used to remodel this previous law school building. \n\nHowever, it was then reported that he'd defrauded his clients and the money he'd given the UO had been from his profits from that nefarious scheme. This was a huge embarrassment to the UO and even worse, it was reported the the victims wanted their money back and the UO had to return much of the money they'd already spent remodeling. OUCH! Thus the denaming, which seemed to bother no one.\n\nIn light of our current Deady controversy, should we instead have kept the name Grayson but put up signage explaining his ""sins"" as a warning to our business students not to rip off clients?",Highly Biased


In [None]:

tokenizer           = AutoTokenizer.from_pretrained(config.model_name)
tokenizer.pad_token = tokenizer.eos_token


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [None]:
def tokenize(sample):
    encoding = tokenizer.encode_plus(
        sample["text"],
        add_special_tokens=True,
        max_length=1024,
        truncation=True,
        padding='max_length',
        return_tensors='pt'
    )

    sample["input_ids"] = encoding["input_ids"].squeeze()[:20]  # Further truncate to 20 tokens for your specific use case
    sample["query"] = tokenizer.decode(sample["input_ids"].tolist())

    return sample
ds = ds.map(tokenize, batched=False)
ds

Map:   0%|          | 0/293994 [00:00<?, ? examples/s]

Dataset({
    features: ['text', 'label', 'input_ids', 'query'],
    num_rows: 293994
})

In [None]:

ds[15:18]


{'text': ['Trudeau performed beyond expectations??!  The bar must have been set very low.  The Intellectual Midget has been an embarassment to Canadians',
  'wondering why no one answers my tweets ',
  'Progress has been made DESPITE the resistance of many (white) Americans. It will not take everyone because not everyone will join in getting rid of racism since benefits are derived from certain groups of people being denied rights and privileges. For example, many rural whites work in the prison industry. They have a material stake in maintaining the disproportionate  incarceration rates of non-whites in general and Blacks in particular. \\n"All lives matter" is a truism and therefore pointless as has been aptly pointed out by many above. \\nYou are entirely incorrect that EVERYONE will be necessary to effectuate change. It\'s never been EVERYONE and change will come despite not EVERYONE participating toward that goal. Certain types of white people have and will continue to hinder such



    
## Now this for actual RLHF  



In [None]:
from datasets import Dataset, load_dataset, DatasetDict
from transformers import AutoTokenizer
import pandas as pd

# This is the function for building the dataset from our CSV file which can be used for RLHF
def build_dataset_from_csv(config, train_csv_file_path, test_csv_file_path):
    tokenizer = AutoTokenizer.from_pretrained(config.model_name)
    tokenizer.pad_token = tokenizer.eos_token

    # Load CSV files into Pandas DataFrames
    train_df = pd.read_csv(train_csv_file_path)
    test_df = pd.read_csv(test_csv_file_path)

    # Convert DataFrames into Hugging Face Dataset format
    train_ds = Dataset.from_pandas(train_df)
    test_ds = Dataset.from_pandas(test_df)

    # Combine into DatasetDict
    dataset = DatasetDict({
        'train': train_ds,
        'test': test_ds
    })

    # Define tokenize function
    def tokenize(example):
        result = tokenizer(example['text'], truncation=True, padding='max_length', max_length=512)
        return result

    # Apply tokenize function
    dataset = dataset.map(tokenize, batched=True)

    # Set format for PyTorch
    dataset.set_format(type="torch", columns=['input_ids', 'attention_mask', 'label'])

    return dataset

In [None]:

dataset = build_dataset_from_csv(config, '/content/trunc_train_dataset.csv','/content/trunc_test_dataset.csv')


Map:   0%|          | 0/293994 [00:00<?, ? examples/s]

Map:   0%|          | 0/73498 [00:00<?, ? examples/s]

In [None]:

def collator(data):
    return dict((key, [d[key] for d in data]) for key in data[0])



## Load pre-trained GPT2 language models
We load the GPT2 model with a value head and the tokenizer. We load the model twice; the first model is optimized while the second model serves as a reference to calculate the KL-divergence from the starting point. This serves as an additional reward signal in the PPO training to make sure the optimized model does not deviate too much from the original language model.


In [None]:

model     = AutoModelForCausalLMWithValueHead.from_pretrained(config.model_name)
ref_model = AutoModelForCausalLMWithValueHead.from_pretrained(config.model_name)

tokenizer = AutoTokenizer.from_pretrained(config.model_name)

tokenizer.pad_token = tokenizer.eos_token


model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/548M [00:00<?, ?B/s]

In [None]:

ppo_trainer = PPOTrainer(
                 config,
                 model,
                 ref_model,
                 tokenizer,
                 dataset=train_dataset,
                 data_collator=collator
)



## Load UnBIAS-classifier (Reward Function)

We load a UnBIAS-classifier fine-tuned on the News Bias dataset.


In [None]:

device = ppo_trainer.accelerator.device
device


device(type='cpu')

In [None]:

if ppo_trainer.accelerator.num_processes == 1:
    device = 0 if torch.cuda.is_available() else "cpu"  # to avoid a `pipeline` bug

device

'cpu'

In [None]:

sentiment_pipe = pipeline("text-classification", model="newsmediabias/UnBIAS-classifier", device=device)


config.json:   0%|          | 0.00/921 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]


The model outputs are the logits for the negative and positive class. We will use the logits for positive class as a reward signal for the language model.


In [None]:

text = "this movie is not good"

sentiment_pipe(text, **sent_kwargs)


[[{'label': 'Highly Biased', 'score': -2.8032827377319336},
  {'label': 'Slightly Biased', 'score': 0.7118046879768372},
  {'label': 'Neutral', 'score': 1.310873031616211}]]

In [None]:
text = "this movie is horrible"

sentiment_pipe(text, **sent_kwargs)

[[{'label': 'Highly Biased', 'score': 0.46684542298316956},
  {'label': 'Slightly Biased', 'score': 1.1993681192398071},
  {'label': 'Neutral', 'score': -2.3412296772003174}]]

In [None]:

text = "this movie is damn horrible"
sentiment_pipe(text, **sent_kwargs)


[[{'label': 'Highly Biased', 'score': 2.442782402038574},
  {'label': 'Slightly Biased', 'score': 0.17955872416496277},
  {'label': 'Neutral', 'score': -3.0951590538024902}]]



## Generation settings

For the response generation we just use sampling and make sure top-k and nucleus sampling are turned off as well as a minimal length.


In [None]:

gen_kwargs = {
         "min_length":   -1,
         "top_k":       0.0,
         "top_p":       1.0,
         "do_sample":  True,
         "pad_token_id": tokenizer.eos_token_id
}



## Optimize model

### Training loop

The training loop consists of the following main steps:

* Get the query and responses from the policy network (GPT-2)
* Get sentiments for query/responses from UnBias
* Optimize policy with PPO using the (query, response, reward) triplet


In [None]:

output_min_length     = 4
output_max_length     = 16
output_length_sampler = LengthSampler(output_min_length, output_max_length)


In [None]:

generation_kwargs = {
    "min_length":     -1,
    "top_k":         0.0,
    "top_p":         1.0,
    "do_sample":    True,
    "pad_token_id": tokenizer.eos_token_id,
}


In [None]:

## ppo_trainer.config.steps = 100    ## 20,000
ppo_trainer.config.steps


20000

In [None]:

for epoch, batch in tqdm(enumerate(ppo_trainer.dataloader)):
    query_tensors = batch["input_ids"]

    print(query_tensors)
    print(len(query_tensors))
    if epoch == 1:
        break


1it [00:00,  1.91it/s]

[tensor([31369,    71,  6081, 30357,  5145,  1521,   389,   345,   477, 42023,
         1909,    30,   845,  1178,  4130,  6851,   220, 50256, 50256, 50256,
        50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
        50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
        50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
        50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
        50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
        50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
        50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
        50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
        50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
        50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
        50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,

1it [00:01,  1.44s/it]

[tensor([   31,    74,  2442,    24,  2078, 42254,    13,  6416,  1143,   326,
          618,   257,  1545,   531,   326,  1165,   986,   887,   339,   338,
          655,  2045,   866,   269, 10277,   339,   338,  9087,   866,   262,
         4831,   220, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
        50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
        50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
        50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
        50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
        50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
        50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
        50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
        50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
        50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,




In [None]:
for epoch, batch in tqdm(enumerate(ppo_trainer.dataloader)):
    query_tensors = batch["input_ids"]
    print(epoch)
    print(batch)
    print('*********************')
    print('*********************')
    print('*********************')
    print('*********************')
    #### Get response from gpt2
    response_tensors = []
    for query in query_tensors:
        gen_len                             = output_length_sampler()
        generation_kwargs["max_new_tokens"] = gen_len
        response                            = ppo_trainer.generate(query, **generation_kwargs)
        response_tensors.append( response.squeeze()[-gen_len:] )
    batch["response"] = [ tokenizer.decode(r.squeeze()) for r in response_tensors ]
    print(batch)
    if epoch == 1:
        break

0it [00:00, ?it/s]

0


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


{'label': ['Slightly Biased', 'Neutral', 'Neutral', 'Highly Biased', 'Neutral', 'Neutral', 'Slightly Biased', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Slightly Biased', 'Slightly Biased', 'Highly Biased', 'Slightly Biased', 'Slightly Biased', 'Neutral', 'Slightly Biased', 'Slightly Biased', 'Neutral', 'Highly Biased', 'Neutral', 'Highly Biased', 'Neutral', 'Neutral', 'Slightly Biased', 'Slightly Biased', 'Highly Biased', 'Neutral', 'Neutral', 'Highly Biased', 'Highly Biased', 'Highly Biased', 'Slightly Biased', 'Neutral', 'Neutral', 'Slightly Biased', 'Neutral', 'Neutral', 'Neutral', 'Slightly Biased', 'Neutral', 'Neutral', 'Slightly Biased', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Slightly Biased', 'Slightly Biased', 'Slightly Biased', 'Slightly Biased', 'Slightly Biased', 'Slightly Biased', 'Slightly Biased', 'Slightly Biased', 'Neutral', 'Slightly Biased', 'Slightly Biased', 'Slightly Biased', 'Highly Biased', 'Neutral', 'Neutral'

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='le

{'label': ['Slightly Biased', 'Neutral', 'Neutral', 'Highly Biased', 'Neutral', 'Neutral', 'Slightly Biased', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Slightly Biased', 'Slightly Biased', 'Highly Biased', 'Slightly Biased', 'Slightly Biased', 'Neutral', 'Slightly Biased', 'Slightly Biased', 'Neutral', 'Highly Biased', 'Neutral', 'Highly Biased', 'Neutral', 'Neutral', 'Slightly Biased', 'Slightly Biased', 'Highly Biased', 'Neutral', 'Neutral', 'Highly Biased', 'Highly Biased', 'Highly Biased', 'Slightly Biased', 'Neutral', 'Neutral', 'Slightly Biased', 'Neutral', 'Neutral', 'Neutral', 'Slightly Biased', 'Neutral', 'Neutral', 'Slightly Biased', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Slightly Biased', 'Slightly Biased', 'Slightly Biased', 'Slightly Biased', 'Slightly Biased', 'Slightly Biased', 'Slightly Biased', 'Slightly Biased', 'Neutral', 'Slightly Biased', 'Slightly Biased', 'Slightly Biased', 'Highly Biased', 'Neutral', 'Neutral'

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


{'label': ['Neutral', 'Highly Biased', 'Neutral', 'Slightly Biased', 'Slightly Biased', 'Neutral', 'Slightly Biased', 'Slightly Biased', 'Neutral', 'Neutral', 'Highly Biased', 'Highly Biased', 'Slightly Biased', 'Neutral', 'Slightly Biased', 'Highly Biased', 'Slightly Biased', 'Slightly Biased', 'Slightly Biased', 'Neutral', 'Highly Biased', 'Slightly Biased', 'Neutral', 'Neutral', 'Neutral', 'Highly Biased', 'Neutral', 'Highly Biased', 'Neutral', 'Neutral', 'Slightly Biased', 'Neutral', 'Highly Biased', 'Highly Biased', 'Highly Biased', 'Neutral', 'Slightly Biased', 'Slightly Biased', 'Neutral', 'Neutral', 'Highly Biased', 'Neutral', 'Slightly Biased', 'Slightly Biased', 'Neutral', 'Slightly Biased', 'Neutral', 'Neutral', 'Highly Biased', 'Highly Biased', 'Neutral', 'Highly Biased', 'Neutral', 'Highly Biased', 'Neutral', 'Neutral', 'Highly Biased', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Highly Biased', 'Neutral', 'Slightly Biased', 'Neutral', 'Highly Biased', 'Highly 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='le

{'label': ['Neutral', 'Highly Biased', 'Neutral', 'Slightly Biased', 'Slightly Biased', 'Neutral', 'Slightly Biased', 'Slightly Biased', 'Neutral', 'Neutral', 'Highly Biased', 'Highly Biased', 'Slightly Biased', 'Neutral', 'Slightly Biased', 'Highly Biased', 'Slightly Biased', 'Slightly Biased', 'Slightly Biased', 'Neutral', 'Highly Biased', 'Slightly Biased', 'Neutral', 'Neutral', 'Neutral', 'Highly Biased', 'Neutral', 'Highly Biased', 'Neutral', 'Neutral', 'Slightly Biased', 'Neutral', 'Highly Biased', 'Highly Biased', 'Highly Biased', 'Neutral', 'Slightly Biased', 'Slightly Biased', 'Neutral', 'Neutral', 'Highly Biased', 'Neutral', 'Slightly Biased', 'Slightly Biased', 'Neutral', 'Slightly Biased', 'Neutral', 'Neutral', 'Highly Biased', 'Highly Biased', 'Neutral', 'Highly Biased', 'Neutral', 'Highly Biased', 'Neutral', 'Neutral', 'Highly Biased', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Highly Biased', 'Neutral', 'Slightly Biased', 'Neutral', 'Highly Biased', 'Highly 




In [None]:

batch.keys()


dict_keys(['label', 'input_ids', 'attention_mask', 'response'])


#### Compute sentiment score


In [None]:

batch["attention_mask"]


[tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0

In [None]:

batch["label"]


['Neutral',
 'Neutral',
 'Neutral',
 'Neutral',
 'Neutral',
 'Neutral',
 'Neutral',
 'Highly Biased',
 'Slightly Biased',
 'Highly Biased',
 'Highly Biased',
 'Neutral',
 'Neutral',
 'Neutral',
 'Neutral',
 'Neutral',
 'Highly Biased',
 'Neutral',
 'Neutral',
 'Slightly Biased',
 'Highly Biased',
 'Slightly Biased',
 'Neutral',
 'Neutral',
 'Highly Biased',
 'Slightly Biased',
 'Slightly Biased',
 'Slightly Biased',
 'Neutral',
 'Neutral',
 'Slightly Biased',
 'Neutral',
 'Neutral',
 'Neutral',
 'Slightly Biased',
 'Slightly Biased',
 'Slightly Biased',
 'Slightly Biased',
 'Neutral',
 'Neutral',
 'Highly Biased',
 'Highly Biased',
 'Neutral',
 'Neutral',
 'Neutral',
 'Slightly Biased',
 'Neutral',
 'Slightly Biased',
 'Neutral',
 'Neutral',
 'Neutral',
 'Highly Biased',
 'Highly Biased',
 'Slightly Biased',
 'Slightly Biased',
 'Neutral',
 'Neutral',
 'Neutral',
 'Highly Biased',
 'Neutral',
 'Highly Biased',
 'Highly Biased',
 'Highly Biased',
 'Neutral',
 'Slightly Biased',
 'Highly

In [None]:

texts = [ q + r for q, r in zip(batch["attention_mask"], batch["response"]) ]


TypeError: unsupported operand type(s) for +: 'Tensor' and 'str'

In [None]:

texts


['This movie is poorly conceived---Monster Hunter is supposed to',
 'i liked this movie, even though it was released in',
 "The first noticeable problem about Tonto: he was fuelling the characters' personalities at a young age",
 'When I spotted that Noah Wyle had made an early cameo appearance at',
 "Modern viewers know this is a critic's I think",
 'Evidently lots of good plot holes, with a',
 'There are some redeeming qualities that make Paterno work in a',
 'This is a very strange film which can be very',
 'good lord! For the first time, even',
 'This is because a compass and compass (another form of sound cancellation) is used to',
 'You know, it wasn\'t half bad, but it had it all."<|endoftext|>',
 'This is part one of the five sections of the film. There are several humorous remarks that border',
 'I mean of all the obscure French and Irish myths that',
 'Okay first of all - I wanted to see the full length',
 'Not that many films have truly had a "Stalker" moment," but Seidl doe

In [None]:

pipe_outputs = sentiment_pipe(texts, **sent_kwargs)
pipe_outputs


[[{'label': 'NEGATIVE', 'score': 2.2946932315826416},
  {'label': 'POSITIVE', 'score': -2.7317020893096924}],
 [{'label': 'NEGATIVE', 'score': -1.925742506980896},
  {'label': 'POSITIVE', 'score': 2.2019805908203125}],
 [{'label': 'NEGATIVE', 'score': -0.18878592550754547},
  {'label': 'POSITIVE', 'score': -0.011356303468346596}],
 [{'label': 'NEGATIVE', 'score': -0.01333677675575018},
  {'label': 'POSITIVE', 'score': -0.11825768649578094}],
 [{'label': 'NEGATIVE', 'score': -1.9098631143569946},
  {'label': 'POSITIVE', 'score': 2.153944730758667}],
 [{'label': 'NEGATIVE', 'score': -1.0707851648330688},
  {'label': 'POSITIVE', 'score': 1.1191564798355103}],
 [{'label': 'NEGATIVE', 'score': -1.4140279293060303},
  {'label': 'POSITIVE', 'score': 1.6278727054595947}],
 [{'label': 'NEGATIVE', 'score': -1.9853274822235107},
  {'label': 'POSITIVE', 'score': 2.2685697078704834}],
 [{'label': 'NEGATIVE', 'score': -1.6584359407424927},
  {'label': 'POSITIVE', 'score': 1.8835939168930054}],
 [{'l

In [None]:

rewards = [ torch.tensor(output[1]["score"]) for output in pipe_outputs]
rewards


[tensor(-2.7317),
 tensor(2.2020),
 tensor(-0.0114),
 tensor(-0.1183),
 tensor(2.1539),
 tensor(1.1192),
 tensor(1.6279),
 tensor(2.2686),
 tensor(1.8836),
 tensor(-0.2499),
 tensor(1.4789),
 tensor(1.3212),
 tensor(0.2753),
 tensor(1.0822),
 tensor(0.8769),
 tensor(1.6423),
 tensor(0.7947),
 tensor(1.6409),
 tensor(2.6443),
 tensor(-0.7279),
 tensor(2.3880),
 tensor(-1.0589),
 tensor(2.3231),
 tensor(-1.1874),
 tensor(2.7740),
 tensor(-0.5302),
 tensor(2.6618),
 tensor(-0.1971),
 tensor(-0.1593),
 tensor(1.9095),
 tensor(-0.0538),
 tensor(1.9543),
 tensor(-1.1134),
 tensor(0.5438),
 tensor(-1.5954),
 tensor(0.1579),
 tensor(-1.8283),
 tensor(1.7351),
 tensor(-0.1821),
 tensor(-1.6691),
 tensor(-1.7416),
 tensor(0.7545),
 tensor(-1.8997),
 tensor(1.0599),
 tensor(-2.6420),
 tensor(0.3579),
 tensor(1.1189),
 tensor(-0.8248),
 tensor(2.8067),
 tensor(-2.7147),
 tensor(-2.6384),
 tensor(-2.2516),
 tensor(0.0565),
 tensor(1.4536),
 tensor(1.7251),
 tensor(2.6113),
 tensor(-0.4088),
 tensor

In [None]:

len(rewards)


128

In [None]:

for epoch, batch in tqdm(enumerate(ppo_trainer.dataloader)):
    query_tensors = batch["input_ids"]
    print(epoch)

    #### Get response from gpt2
    response_tensors = []
    for query in query_tensors:
        gen_len                             = output_length_sampler()
        generation_kwargs["max_new_tokens"] = gen_len
        response                            = ppo_trainer.generate(query, **generation_kwargs)
        response_tensors.append( response.squeeze()[-gen_len:] )
    batch["response"] = [ tokenizer.decode(r.squeeze()) for r in response_tensors ]

    #### Compute sentiment score
    texts = [q + r for q, r in zip(batch["query"], batch["response"])]
    pipe_outputs = sentiment_pipe(texts, **sent_kwargs)
    rewards = [ torch.tensor(output[1]["score"]) for output in pipe_outputs]

    #### Run PPO step
    stats = ppo_trainer.step(
                     query_tensors,
                     response_tensors,
                     rewards
    )
    ppo_trainer.log_stats(stats, batch, rewards)



0it [00:00, ?it/s]

0


1it [00:09,  9.49s/it]

1


2it [00:19,  9.86s/it]

2


3it [00:29, 10.01s/it]

3


4it [00:40, 10.37s/it]

4


5it [00:51, 10.43s/it]

5


6it [01:01, 10.32s/it]

6


7it [01:11, 10.33s/it]

7


8it [01:22, 10.52s/it]

8


9it [01:33, 10.49s/it]

9


10it [01:43, 10.38s/it]

10


11it [01:53, 10.32s/it]

11


12it [02:03, 10.34s/it]

12


13it [02:14, 10.40s/it]

13


14it [02:25, 10.53s/it]

14


15it [02:35, 10.51s/it]

15


16it [02:46, 10.56s/it]

16


17it [02:56, 10.41s/it]

17


18it [03:06, 10.38s/it]

18


19it [03:17, 10.52s/it]

19


20it [03:28, 10.63s/it]

20


21it [03:39, 10.64s/it]

21


22it [03:49, 10.61s/it]

22


23it [03:59, 10.49s/it]

23


24it [04:10, 10.52s/it]

24


25it [04:20, 10.36s/it]

25


26it [04:31, 10.53s/it]

26


27it [04:41, 10.46s/it]

27


28it [04:51, 10.36s/it]

28


29it [05:01, 10.25s/it]

29


30it [05:12, 10.46s/it]

30


31it [05:23, 10.48s/it]

31


32it [05:33, 10.50s/it]

32


33it [05:43, 10.35s/it]

33


34it [05:54, 10.33s/it]

34


35it [06:04, 10.41s/it]

35


36it [06:14, 10.36s/it]

36


37it [06:25, 10.34s/it]

37


38it [06:35, 10.25s/it]

38


39it [06:45, 10.32s/it]

39


40it [06:55, 10.30s/it]

40


41it [07:06, 10.38s/it]

41


42it [07:16, 10.33s/it]

42


43it [07:27, 10.42s/it]

43


44it [07:37, 10.37s/it]

44


45it [07:47, 10.36s/it]

45


46it [07:58, 10.29s/it]

46


47it [08:07, 10.17s/it]

47


48it [08:18, 10.26s/it]

48


49it [08:29, 10.45s/it]

49


50it [08:39, 10.42s/it]

50


51it [08:49, 10.32s/it]

51


52it [09:00, 10.43s/it]

52


53it [09:10, 10.22s/it]

53


54it [09:20, 10.26s/it]

54


55it [09:30, 10.32s/it]

55


56it [09:41, 10.39s/it]

56


57it [09:52, 10.45s/it]

57


58it [10:01, 10.25s/it]

58


59it [10:12, 10.28s/it]

59


60it [10:21, 10.11s/it]

60


61it [10:32, 10.28s/it]

61


62it [10:43, 10.46s/it]

62


63it [10:53, 10.45s/it]

63


64it [11:04, 10.42s/it]

64


65it [11:14, 10.49s/it]

65


66it [11:25, 10.49s/it]

66


67it [11:35, 10.40s/it]

67


68it [11:45, 10.32s/it]

68


69it [11:55, 10.27s/it]

69


70it [12:06, 10.24s/it]

70


71it [12:16, 10.29s/it]

71


72it [12:26, 10.36s/it]

72


73it [12:37, 10.46s/it]

73


74it [12:47, 10.34s/it]

74


75it [12:58, 10.37s/it]

75


76it [13:08, 10.35s/it]

76


77it [13:18, 10.37s/it]

77


78it [13:29, 10.42s/it]

78


79it [13:39, 10.45s/it]

79


80it [13:50, 10.44s/it]

80


81it [14:01, 10.56s/it]

81


82it [14:11, 10.51s/it]

82


83it [14:21, 10.33s/it]

83


84it [14:31, 10.35s/it]

84


85it [14:42, 10.36s/it]

85


86it [14:53, 10.54s/it]

86


87it [15:03, 10.41s/it]

87


88it [15:13, 10.36s/it]

88


89it [15:24, 10.45s/it]

89


90it [15:35, 10.59s/it]

90


91it [15:45, 10.48s/it]

91


92it [15:56, 10.68s/it]

92


93it [16:07, 10.73s/it]

93


94it [16:17, 10.59s/it]

94


95it [16:28, 10.53s/it]

95


96it [16:38, 10.44s/it]

96


97it [16:48, 10.38s/it]

97


98it [16:58, 10.33s/it]

98


99it [17:09, 10.50s/it]

99


100it [17:20, 10.54s/it]

100


101it [17:30, 10.54s/it]

101


102it [17:41, 10.63s/it]

102


103it [17:52, 10.65s/it]

103


104it [18:02, 10.56s/it]

104


105it [18:13, 10.56s/it]

105


106it [18:23, 10.51s/it]

106


107it [18:34, 10.49s/it]

107


108it [18:44, 10.59s/it]

108


109it [18:55, 10.46s/it]

109


110it [19:05, 10.45s/it]

110


111it [19:16, 10.66s/it]

111


112it [19:26, 10.56s/it]

112


113it [19:37, 10.62s/it]

113


114it [19:48, 10.57s/it]

114


115it [19:58, 10.54s/it]

115


116it [20:08, 10.43s/it]

116


117it [20:19, 10.42s/it]

117


118it [20:29, 10.26s/it]

118


119it [20:39, 10.20s/it]

119


120it [20:50, 10.41s/it]

120


121it [21:00, 10.41s/it]

121


122it [21:10, 10.26s/it]

122


123it [21:20, 10.28s/it]

123


124it [21:30, 10.26s/it]

124


125it [21:41, 10.25s/it]

125


126it [21:51, 10.34s/it]

126


127it [22:02, 10.36s/it]

127


128it [22:13, 10.51s/it]

128


129it [22:23, 10.53s/it]

129


130it [22:33, 10.37s/it]

130


131it [22:43, 10.19s/it]

131


132it [22:53, 10.31s/it]

132


133it [23:04, 10.31s/it]

133


134it [23:14, 10.42s/it]

134


135it [23:25, 10.35s/it]

135


136it [23:35, 10.47s/it]

136


137it [23:46, 10.45s/it]

137


138it [23:56, 10.30s/it]

138


139it [24:06, 10.36s/it]

139


140it [24:17, 10.42s/it]

140


141it [24:27, 10.47s/it]

141


142it [24:38, 10.42s/it]

142


143it [24:48, 10.50s/it]

143


144it [24:59, 10.59s/it]

144


145it [25:09, 10.34s/it]

145


146it [25:20, 10.48s/it]

146


147it [25:30, 10.56s/it]

147


148it [25:41, 10.45s/it]

148


149it [25:51, 10.47s/it]

149


150it [26:01, 10.38s/it]

150


151it [26:12, 10.36s/it]

151


152it [26:23, 10.51s/it]

152


153it [26:32, 10.27s/it]

153


154it [26:43, 10.32s/it]

154


155it [26:53, 10.30s/it]

155


156it [27:03, 10.37s/it]

156


157it [27:14, 10.55s/it]

157


158it [27:25, 10.54s/it]

158


159it [27:35, 10.51s/it]

159


160it [27:46, 10.55s/it]

160


161it [27:56, 10.48s/it]

161


162it [28:07, 10.43s/it]

162


163it [28:17, 10.45s/it]

163


164it [28:28, 10.62s/it]

164


165it [28:39, 10.64s/it]

165


166it [28:49, 10.59s/it]

166


167it [29:00, 10.56s/it]

167


168it [29:10, 10.48s/it]

168


169it [29:21, 10.51s/it]

169


170it [29:31, 10.42s/it]

170


171it [29:41, 10.30s/it]

171


172it [29:52, 10.48s/it]

172


173it [30:03, 10.55s/it]

173


174it [30:13, 10.38s/it]

174


175it [30:23, 10.41s/it]

175


176it [30:33, 10.37s/it]

176


177it [30:44, 10.47s/it]

177


178it [30:54, 10.34s/it]

178


179it [31:05, 10.47s/it]

179


180it [31:15, 10.37s/it]

180


181it [31:26, 10.59s/it]

181


182it [31:37, 10.59s/it]

182


183it [31:47, 10.54s/it]

183


184it [31:57, 10.36s/it]

184


185it [32:07, 10.36s/it]

185


186it [32:18, 10.31s/it]

186


187it [32:28, 10.39s/it]

187


188it [32:39, 10.50s/it]

188


189it [32:49, 10.40s/it]

189


190it [32:59, 10.38s/it]

190


191it [33:10, 10.39s/it]

191


192it [33:20, 10.29s/it]

192


193it [33:30, 10.32s/it]

193


194it [33:40, 10.42s/it]


In [None]:

torch.cuda.get_device_name(0)


'NVIDIA A30'


One can observe how the model starts to generate more positive outputs after a few optimisation steps.

Note: Investigating the KL-divergence will probably show that at this point the model has not converged to the target KL-divergence, yet. To get there would require longer training or starting with a higher initial coefficient.



Let's inspect some examples from the IMDB dataset. We can use model_ref to compare the tuned model model against the model before optimisation.


In [None]:

#### get a batch from the dataset
bs                 = 16
game_data          = dict()


In [None]:

game_data


{}

In [None]:

dataset.set_format("pandas")


In [None]:

df_batch           = dataset[:].sample(bs)
df_batch


Unnamed: 0,review,label,input_ids,query
9325,Well I guess I know the answer to that questio...,0,"[5779, 314]",Well I
23943,"This is an excellent, fast paced thriller by W...",1,"[1212, 318, 281, 6275, 11, 3049]","This is an excellent, fast"
10309,"Now, I flicked onto this just out of curiosity...",0,"[3844, 11, 314, 781]","Now, I fl"
14702,We tend to forget that the master/slave contex...,1,"[1135, 4327, 284, 6044, 326, 262]",We tend to forget that the
4466,"The proverb ""Never judge a book by it's cover""...",0,"[464, 36950]",The proverb
8982,I've never understood the appeal of Garbo. She...,0,"[40, 1053, 1239, 7247]",I've never understood
14943,"Hugh (Ed Harris) is a hotshot, bachelor senato...",1,"[39, 6724, 357, 7407, 10026]",Hugh (Ed Harris
16515,This particular Joe McDoakes short subject was...,1,"[1212, 1948, 5689, 1982]",This particular Joe Mc
13573,Sisters In Law is made by the same directors o...,1,"[50, 6223, 554, 3854, 318, 925, 416]",Sisters In Law is made by
16473,I was very fond of this film. It kept me guess...,1,"[40, 373, 845, 16245, 286]",I was very fond of


In [None]:

game_data["query"] = df_batch["query"].tolist()
query_tensors      = df_batch["input_ids"].tolist()


In [None]:

response_tensors_ref, response_tensors = [], []


In [None]:

#### get response from gpt2 and gpt2_ref
for i in range(bs):
    gen_len = output_length_sampler()

    output  = ref_model.generate(
        torch.tensor(query_tensors[i]).unsqueeze(dim=0).to(device), max_new_tokens=gen_len, **gen_kwargs
    ).squeeze()[-gen_len:]
    response_tensors_ref.append(output)


    output = model.generate(
        torch.tensor(query_tensors[i]).unsqueeze(dim=0).to(device), max_new_tokens=gen_len, **gen_kwargs
    ).squeeze()[-gen_len:]
    response_tensors.append(output)


In [None]:

#### decode responses
game_data["response (before)"] = [tokenizer.decode(response_tensors_ref[i]) for i in range(bs)]
game_data["response (after)"]  = [tokenizer.decode(response_tensors[i]) for i in range(bs)]


In [None]:

#### sentiment analysis of query/response pairs before/after
texts = [q + r for q, r in zip(game_data["query"], game_data["response (before)"])]
game_data["rewards (before)"] = [output[1]["score"] for output in sentiment_pipe(texts, **sent_kwargs)]




In [None]:

texts = [q + r for q, r in zip(game_data["query"], game_data["response (after)"])]
game_data["rewards (after)"] = [output[1]["score"] for output in sentiment_pipe(texts, **sent_kwargs)]


In [None]:

# store results in a dataframe
df_results = pd.DataFrame(game_data)
df_results


Unnamed: 0,query,response (before),response (after),rewards (before),rewards (after)
0,Well I,don't know why they,"love THIS, wonderful film",-1.327288,2.887235
1,"This is an excellent, fast","-paced ride, with big action scenes",moving story with a nice idea of contemporary,2.836497,2.924836
2,"Now, I fl",inched. It was so wrong,"ocked to this hilarious show, and",-1.44626,2.764766
3,We tend to forget that the,picture itself was wish,"movie is thrilling,",-0.736429,2.090948
4,The proverb,on the Désir de Rathmeister's installation wa...,keeps a very pleasant note...well written.(19...,0.27414,2.469272
5,I've never understood,more by this point in your,this wonderful script & still encourage,1.372031,2.755003
6,Hugh (Ed Harris,", ""The Hurt Locker"") helps",) is marvelous and fictional. It's,0.467258,2.578309
7,This particular Joe Mc,Avoy came into her film as if,"Govern shone, with his Voyager light and",1.32904,2.701365
8,Sisters In Law is made by,Donna Hawley and Lauren Ridge. The two stars ...,a group of filmmakers who gave Lawrence Sim's...,1.828336,2.65118
9,I was very fond of,Korea. I love all of their eccentricities. My,"her throughout you really, now very fond of. She",2.317214,2.652236


In [None]:

print("mean:")
display(df_results[["rewards (before)", "rewards (after)"]].mean())
print()
print("median:")
display(df_results[["rewards (before)", "rewards (after)"]].median())



mean:


rewards (before)    0.187238
rewards (after)     2.513685
dtype: float64


median:


rewards (before)    0.283274
rewards (after)     2.651708
dtype: float64

In [None]:

## model.save_pretrained(    "gpt2-imdb-pos-v2", push_to_hub=True)
## tokenizer.save_pretrained("gpt2-imdb-pos-v2", push_to_hub=True)

model.save_pretrained(    "gpt2-imdb-pos-v2", push_to_hub=False)
tokenizer.save_pretrained("gpt2-imdb-pos-v2", push_to_hub=False)



('gpt2-imdb-pos-v2/tokenizer_config.json',
 'gpt2-imdb-pos-v2/special_tokens_map.json',
 'gpt2-imdb-pos-v2/vocab.json',
 'gpt2-imdb-pos-v2/merges.txt',
 'gpt2-imdb-pos-v2/added_tokens.json',
 'gpt2-imdb-pos-v2/tokenizer.json')