In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%pip install transformers trl wandb

Collecting trl
  Downloading trl-0.8.2-py3-none-any.whl (263 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m263.7/263.7 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting wandb
  Downloading wandb-0.16.6-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m18.1 MB/s[0m eta [36m0:00:00[0m
Collecting accelerate (from trl)
  Downloading accelerate-0.29.2-py3-none-any.whl (297 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.4/297.4 kB[0m [31m18.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets (from trl)
  Downloading datasets-2.18.0-py3-none-any.whl (510 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m38.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tyro>=0.5.11 (from trl)
  Downloading tyro-0.8.3-py3-none-any.whl (102 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m102.0/102.0 kB[0m [31m11.0 MB/

In [3]:
import torch
from tqdm import tqdm
import pandas as pd

tqdm.pandas()

from transformers import pipeline, AutoTokenizer
from datasets import load_dataset

from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead
from trl.core import LengthSampler

In [8]:
config = PPOConfig(
    model_name="Zlovoblachko/test_L1_sent_generator",
    learning_rate=1.41e-5,
    log_with="wandb",
)

sent_kwargs = {"return_all_scores": True, "function_to_apply": "none", "batch_size": 16}

In [4]:
import wandb

wandb.init()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [5]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [9]:
def build_dataset(config, dataset_name="imdb", input_min_text_length=2, input_max_text_length=8):
    """
    Build dataset for training. This builds the dataset from `load_dataset`, one should
    customize this function to train the model on its own dataset.

    Args:
        dataset_name (`str`):
            The name of the dataset to be loaded.

    Returns:
        dataloader (`torch.utils.data.DataLoader`):
            The dataloader for the dataset.
    """
    tokenizer = AutoTokenizer.from_pretrained(config.model_name)
    tokenizer.pad_token = tokenizer.eos_token
    # load imdb with datasets
    ds = load_dataset(dataset_name, split="train")
    ds = ds.remove_columns("Simultaneously")
    ds = ds.rename_columns({"Simultaneously demand is ever soaring.": "review", "Synonyms":"label"})

    input_size = LengthSampler(input_min_text_length, input_max_text_length)

    def tokenize(sample):
        sample["input_ids"] = tokenizer.encode(sample["review"])[: input_size()]
        sample["query"] = tokenizer.decode(sample["input_ids"])
        return sample

    ds = ds.map(tokenize, batched=False)
    ds.set_format(type="torch")
    return ds

In [10]:
dataset = build_dataset(config, "Zlovoblachko/L1_real_aug_sents")


def collator(data):
    return dict((key, [d[key] for d in data]) for key in data[0])

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/476 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/131 [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/423k [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/3330 [00:00<?, ? examples/s]

In [11]:
model = AutoModelForCausalLMWithValueHead.from_pretrained(config.model_name)
ref_model = AutoModelForCausalLMWithValueHead.from_pretrained(config.model_name)
tokenizer = AutoTokenizer.from_pretrained(config.model_name)

tokenizer.pad_token = tokenizer.eos_token

config.json:   0%|          | 0.00/1.01k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/328M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/119 [00:00<?, ?B/s]



In [12]:
ppo_trainer = PPOTrainer(config, model, ref_model, tokenizer, dataset=dataset, data_collator=collator)

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

### Word form transmission

In [None]:
sentiment_pipe = pipeline("text-classification", model="Zlovoblachko/L1-classifier-WFT")

In [None]:
device = ppo_trainer.accelerator.device
if ppo_trainer.accelerator.num_processes == 1:
    device = 0 if torch.cuda.is_available() else "cpu"
sentiment_pipe = pipeline("text-classification", model="Zlovoblachko/L1-classifier-WFT", device=device)

In [None]:
output_min_length = 5
output_max_length = 25
output_length_sampler = LengthSampler(output_min_length, output_max_length)


generation_kwargs = {
    "min_length": -1,
    "top_k": 0.0,
    "top_p": 1.0,
    "do_sample": True,
    "pad_token_id": tokenizer.eos_token_id,
}


epochs = 5
for epoch in tqdm(range(epochs), "epoch: "):
    for batch in tqdm(ppo_trainer.dataloader):
        query_tensors = batch["input_ids"]
        response_tensors = []
        for query in query_tensors:
            gen_len = output_length_sampler()
            generation_kwargs["max_new_tokens"] = gen_len
            response = ppo_trainer.generate(query, **generation_kwargs)
            response_tensors.append(response.squeeze()[-gen_len:])
        batch["response"] = [tokenizer.decode(r.squeeze()) for r in response_tensors]
        texts = [q + r for q, r in zip(batch["query"], batch["response"])]
        pipe_outputs = sentiment_pipe(texts, **sent_kwargs)
        rewards = [torch.tensor(output[1]["score"]) for output in pipe_outputs]
        stats = ppo_trainer.step(query_tensors, response_tensors, rewards)
        ppo_trainer.log_stats(stats, batch, rewards)

epoch:   0%|          | 0/5 [00:00<?, ?it/s]
  0%|          | 0/26 [00:00<?, ?it/s][A
  4%|▍         | 1/26 [00:21<08:49, 21.16s/it][A
  8%|▊         | 2/26 [00:40<08:04, 20.21s/it][A
 12%|█▏        | 3/26 [00:58<07:19, 19.12s/it][A
 15%|█▌        | 4/26 [01:16<06:53, 18.79s/it][A
 19%|█▉        | 5/26 [01:34<06:28, 18.48s/it][A
 23%|██▎       | 6/26 [01:52<06:03, 18.20s/it][A

 31%|███       | 8/26 [02:29<05:33, 18.53s/it][A
 35%|███▍      | 9/26 [02:48<05:15, 18.58s/it][A
 38%|███▊      | 10/26 [03:07<04:56, 18.55s/it][A
 42%|████▏     | 11/26 [03:26<04:40, 18.73s/it][A
 46%|████▌     | 12/26 [03:44<04:18, 18.47s/it][A
 50%|█████     | 13/26 [04:02<04:01, 18.57s/it][A
 54%|█████▍    | 14/26 [04:22<03:45, 18.83s/it][A
 58%|█████▊    | 15/26 [04:41<03:27, 18.85s/it][A
 62%|██████▏   | 16/26 [04:58<03:04, 18.49s/it][A
 65%|██████▌   | 17/26 [05:17<02:46, 18.46s/it][A
 69%|██████▉   | 18/26 [05:35<02:27, 18.40s/it][A
 73%|███████▎  | 19/26 [05:54<02:10, 18.58s/it][A
 7

In [None]:
#### get a batch from the dataset
bs = 16
game_data = dict()
dataset.set_format("pandas")
df_batch = dataset[:].sample(bs)
game_data["query"] = df_batch["query"].tolist()
query_tensors = df_batch["input_ids"].tolist()

response_tensors_ref, response_tensors = [], []

#### get response from gpt2 and gpt2_ref
for i in range(bs):
    gen_len = output_length_sampler()
    output = ref_model.generate(
        torch.tensor(query_tensors[i]).unsqueeze(dim=0).to(device), max_new_tokens=gen_len, **gen_kwargs
    ).squeeze()[-gen_len:]
    response_tensors_ref.append(output)
    output = model.generate(
        torch.tensor(query_tensors[i]).unsqueeze(dim=0).to(device), max_new_tokens=gen_len, **gen_kwargs
    ).squeeze()[-gen_len:]
    response_tensors.append(output)

#### decode responses
game_data["response (before)"] = [tokenizer.decode(response_tensors_ref[i]) for i in range(bs)]
game_data["response (after)"] = [tokenizer.decode(response_tensors[i]) for i in range(bs)]

#### sentiment analysis of query/response pairs before/after
texts = [q + r for q, r in zip(game_data["query"], game_data["response (before)"])]
game_data["rewards (before)"] = [output[1]["score"] for output in sentiment_pipe(texts, **sent_kwargs)]

texts = [q + r for q, r in zip(game_data["query"], game_data["response (after)"])]
game_data["rewards (after)"] = [output[1]["score"] for output in sentiment_pipe(texts, **sent_kwargs)]

# store results in a dataframe
df_results = pd.DataFrame(game_data)
df_results



Unnamed: 0,query,response (before),response (after),rewards (before),rewards (after)
0,Recruitment,reached pre-school level in only similar age ...,data is deeply the chemicals which are an ess...,-1.832326,1.508754
1,"According to this graph,",the amount of male workers in different count...,it shows that by 2030 the number of,-1.855791,-0.626631
2,"Overall,","all metals in Kyoto, Tokyo, Japan were transp...",it is obvious that the unemployment rate is r...,-1.568462,1.65713
3,Clinical breast,or developing stages help protect us from unn...,"opening more than 40 percent, a lot of male a...",-0.952443,1.001286
4,The authors,answer this many questions in various categor...,argue that there is an opportunity to reduce ...,-1.638218,0.531674
5,Glad,imd team dominateds,ies can be iterally,0.846196,-0.229262
6,"In any way, since then",everybody has different and more physical 3-y...,it has not any influence on public health. It...,-2.101387,0.25466
7,Language is the best way of,"getting skilled musicians, musicians and infl...","getting healthier and healthier for all, and ...",-2.072001,-0.902893
8,"However, drug seizures saw a",slight decrease in 2014 from 1990 to 2012 and...,major increase and market rates were plummeti...,-1.206117,1.72295
9,The grand jury investigation at Philadelphia,"'s Zucco mass were doomed, and a",County level was not so positive during the p...,1.13344,1.152621


In [None]:
print("mean:")
display(df_results[["rewards (before)", "rewards (after)"]].mean())
print()
print("median:")
display(df_results[["rewards (before)", "rewards (after)"]].median())

mean:


rewards (before)   -1.342496
rewards (after)     0.704455
dtype: float64


median:


rewards (before)   -1.734269
rewards (after)     0.959319
dtype: float64

In [None]:
model.save_pretrained("WFT_L1_sent_generator", push_to_hub=True)
tokenizer.save_pretrained("WFT_L1_sent_generator", push_to_hub=True)

README.md:   0%|          | 0.00/5.18k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/328M [00:00<?, ?B/s]

('WFT_L1_sent_generator/tokenizer_config.json',
 'WFT_L1_sent_generator/special_tokens_map.json',
 'WFT_L1_sent_generator/vocab.json',
 'WFT_L1_sent_generator/merges.txt',
 'WFT_L1_sent_generator/added_tokens.json',
 'WFT_L1_sent_generator/tokenizer.json')

### Tense semantics

In [None]:
device = ppo_trainer.accelerator.device
if ppo_trainer.accelerator.num_processes == 1:
    device = 0 if torch.cuda.is_available() else "cpu"
sentiment_pipe = pipeline("text-classification", model="Zlovoblachko/L1-classifier-TenSem", device=device)

In [None]:
gen_kwargs = {"min_length": -1, "top_k": 0.0, "top_p": 1.0, "do_sample": True, "pad_token_id": tokenizer.eos_token_id}

In [None]:
output_min_length = 5
output_max_length = 25
output_length_sampler = LengthSampler(output_min_length, output_max_length)


generation_kwargs = {
    "min_length": -1,
    "top_k": 0.0,
    "top_p": 1.0,
    "do_sample": True,
    "pad_token_id": tokenizer.eos_token_id,
}


epochs = 5
for epoch in tqdm(range(epochs), "epoch: "):
    for batch in tqdm(ppo_trainer.dataloader):
        query_tensors = batch["input_ids"]
        response_tensors = []
        for query in query_tensors:
            gen_len = output_length_sampler()
            generation_kwargs["max_new_tokens"] = gen_len
            response = ppo_trainer.generate(query, **generation_kwargs)
            response_tensors.append(response.squeeze()[-gen_len:])
        batch["response"] = [tokenizer.decode(r.squeeze()) for r in response_tensors]
        texts = [q + r for q, r in zip(batch["query"], batch["response"])]
        pipe_outputs = sentiment_pipe(texts, **sent_kwargs)
        rewards = [torch.tensor(output[1]["score"]) for output in pipe_outputs]
        stats = ppo_trainer.step(query_tensors, response_tensors, rewards)
        ppo_trainer.log_stats(stats, batch, rewards)

epoch:   0%|          | 0/5 [00:00<?, ?it/s]

  4%|▍         | 1/26 [00:20<08:26, 20.26s/it][A
  8%|▊         | 2/26 [00:37<07:29, 18.72s/it][A
 12%|█▏        | 3/26 [00:56<07:11, 18.74s/it][A
 15%|█▌        | 4/26 [01:14<06:47, 18.53s/it][A
 19%|█▉        | 5/26 [01:34<06:40, 19.09s/it][A
 23%|██▎       | 6/26 [01:53<06:18, 18.92s/it][A
 27%|██▋       | 7/26 [02:13<06:03, 19.13s/it][A
 31%|███       | 8/26 [02:31<05:40, 18.91s/it][A
 35%|███▍      | 9/26 [02:51<05:26, 19.22s/it][A

 42%|████▏     | 11/26 [03:34<05:08, 20.57s/it][A
 46%|████▌     | 12/26 [03:51<04:32, 19.46s/it][A
 50%|█████     | 13/26 [04:19<04:45, 21.96s/it][A
 54%|█████▍    | 14/26 [04:38<04:13, 21.15s/it][A
 58%|█████▊    | 15/26 [04:57<03:44, 20.40s/it][A
 62%|██████▏   | 16/26 [05:16<03:20, 20.10s/it][A
 65%|██████▌   | 17/26 [05:35<02:56, 19.66s/it][A
 69%|██████▉   | 18/26 [05:54<02:36, 19.50s/it][A
 73%|███████▎  | 19/26 [06:13<02:16, 19.46s/it][A
 77%|███████▋  | 20/26 [06:32<01:56, 19.39s/

In [None]:
#### get a batch from the dataset
bs = 16
game_data = dict()
dataset.set_format("pandas")
df_batch = dataset[:].sample(bs)
game_data["query"] = df_batch["query"].tolist()
query_tensors = df_batch["input_ids"].tolist()

response_tensors_ref, response_tensors = [], []

#### get response from gpt2 and gpt2_ref
for i in range(bs):
    gen_len = output_length_sampler()
    output = ref_model.generate(
        torch.tensor(query_tensors[i]).unsqueeze(dim=0).to(device), max_new_tokens=gen_len, **gen_kwargs
    ).squeeze()[-gen_len:]
    response_tensors_ref.append(output)
    output = model.generate(
        torch.tensor(query_tensors[i]).unsqueeze(dim=0).to(device), max_new_tokens=gen_len, **gen_kwargs
    ).squeeze()[-gen_len:]
    response_tensors.append(output)

#### decode responses
game_data["response (before)"] = [tokenizer.decode(response_tensors_ref[i]) for i in range(bs)]
game_data["response (after)"] = [tokenizer.decode(response_tensors[i]) for i in range(bs)]

#### sentiment analysis of query/response pairs before/after
texts = [q + r for q, r in zip(game_data["query"], game_data["response (before)"])]
game_data["rewards (before)"] = [output[1]["score"] for output in sentiment_pipe(texts, **sent_kwargs)]

texts = [q + r for q, r in zip(game_data["query"], game_data["response (after)"])]
game_data["rewards (after)"] = [output[1]["score"] for output in sentiment_pipe(texts, **sent_kwargs)]

# store results in a dataframe
df_results = pd.DataFrame(game_data)
df_results



Unnamed: 0,query,response (before),response (after),rewards (before),rewards (after)
0,"At the same time,",children not from school can have not underst...,in Italy the trend was slightly abandon in 20...,-0.785985,1.823884
1,In modern world our life is,simiently one of the main problems that must ...,less stable and in a different temps in 2014 ...,-1.751107,1.762096
2,Some people are confident that,sciences which can help to a conclusion,globalization will improve the statistic in 2015,-2.140757,0.99475
3,Here is an imaginary tale,of international pirate parties and why the p...,"in Japan in 2000, in 2030 and in 2050 years t...",-1.037005,1.452268
4,Public college tuition has jumped 33,percent to £65 millions. Also tuition is only...,",1 percent in 2014 and 2015, while the number ...",-0.230313,1.508732
5,A variety,of shops provide the reverse course. I would ...,of health issues have developed from 2012 to ...,-1.70656,1.580844
6,"In Germany, China and the UK","have two percent increase at %125? i.e., Japa...",both have thousands of people in 1998 in 1979...,1.694651,1.80409
7,Also people have different taste in,"different languages, some travelling by road ...","2012 than 1995, in comparison with UK except ...",-0.960485,1.831349
8,By 2002 mobile calls achieved,about 60% growth. These were up to 15,the highest proportionly in Africa. While in ...,1.043935,1.669698
9,UCI categories are categories of age,", faculty and what kinds of people support eac...",in 1940 or later between 1940 and 1970. In 19...,-0.720919,1.721218


In [None]:
print("mean:")
display(df_results[["rewards (before)", "rewards (after)"]].mean())
print()
print("median:")
display(df_results[["rewards (before)", "rewards (after)"]].median())

mean:


rewards (before)   -0.230737
rewards (after)     1.555031
dtype: float64


median:


rewards (before)   -0.655856
rewards (after)     1.685598
dtype: float64

In [None]:
model.save_pretrained("TenSem_L1_sent_generator", push_to_hub=True)
tokenizer.save_pretrained("TenSem_L1_sent_generator", push_to_hub=True)

model.safetensors:   0%|          | 0.00/328M [00:00<?, ?B/s]

('TenSem_L1_sent_generator/tokenizer_config.json',
 'TenSem_L1_sent_generator/special_tokens_map.json',
 'TenSem_L1_sent_generator/vocab.json',
 'TenSem_L1_sent_generator/merges.txt',
 'TenSem_L1_sent_generator/added_tokens.json',
 'TenSem_L1_sent_generator/tokenizer.json')

### Synonyms

In [None]:
device = ppo_trainer.accelerator.device
if ppo_trainer.accelerator.num_processes == 1:
    device = 0 if torch.cuda.is_available() else "cpu"
sentiment_pipe = pipeline("text-classification", model="Zlovoblachko/L1-classifier-Synonyms", device=device)

config.json:   0%|          | 0.00/740 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/263M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.20k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/669k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

In [None]:
gen_kwargs = {"min_length": -1, "top_k": 0.0, "top_p": 1.0, "do_sample": True, "pad_token_id": tokenizer.eos_token_id}

In [None]:
output_min_length = 5
output_max_length = 25
output_length_sampler = LengthSampler(output_min_length, output_max_length)


generation_kwargs = {
    "min_length": -1,
    "top_k": 0.0,
    "top_p": 1.0,
    "do_sample": True,
    "pad_token_id": tokenizer.eos_token_id,
}


epochs = 5
for epoch in tqdm(range(epochs), "epoch: "):
    for batch in tqdm(ppo_trainer.dataloader):
        query_tensors = batch["input_ids"]
        response_tensors = []
        for query in query_tensors:
            gen_len = output_length_sampler()
            generation_kwargs["max_new_tokens"] = gen_len
            response = ppo_trainer.generate(query, **generation_kwargs)
            response_tensors.append(response.squeeze()[-gen_len:])
        batch["response"] = [tokenizer.decode(r.squeeze()) for r in response_tensors]
        texts = [q + r for q, r in zip(batch["query"], batch["response"])]
        pipe_outputs = sentiment_pipe(texts, **sent_kwargs)
        rewards = [torch.tensor(output[1]["score"]) for output in pipe_outputs]
        stats = ppo_trainer.step(query_tensors, response_tensors, rewards)
        ppo_trainer.log_stats(stats, batch, rewards)

epoch:   0%|          | 0/5 [00:00<?, ?it/s]

  4%|▍         | 1/26 [00:20<08:41, 20.85s/it][A
  8%|▊         | 2/26 [00:40<08:03, 20.13s/it][A
 12%|█▏        | 3/26 [00:59<07:34, 19.77s/it][A
 15%|█▌        | 4/26 [01:18<07:04, 19.31s/it][A
 19%|█▉        | 5/26 [01:38<06:48, 19.45s/it][A
 23%|██▎       | 6/26 [01:58<06:32, 19.63s/it][A
 27%|██▋       | 7/26 [02:17<06:09, 19.44s/it][A
 31%|███       | 8/26 [02:34<05:39, 18.88s/it][A
 35%|███▍      | 9/26 [02:52<05:14, 18.50s/it][A

 42%|████▏     | 11/26 [03:31<04:47, 19.18s/it][A
 46%|████▌     | 12/26 [03:51<04:29, 19.28s/it][A
 50%|█████     | 13/26 [04:11<04:14, 19.57s/it][A
 54%|█████▍    | 14/26 [04:30<03:52, 19.36s/it][A
 58%|█████▊    | 15/26 [04:50<03:34, 19.48s/it][A
 62%|██████▏   | 16/26 [05:09<03:14, 19.45s/it][A
 65%|██████▌   | 17/26 [05:29<02:56, 19.65s/it][A
 69%|██████▉   | 18/26 [05:49<02:37, 19.65s/it][A
 73%|███████▎  | 19/26 [06:07<02:14, 19.25s/it][A
 77%|███████▋  | 20/26 [06:26<01:54, 19.13s/

In [None]:
#### get a batch from the dataset
bs = 16
game_data = dict()
dataset.set_format("pandas")
df_batch = dataset[:].sample(bs)
game_data["query"] = df_batch["query"].tolist()
query_tensors = df_batch["input_ids"].tolist()

response_tensors_ref, response_tensors = [], []

#### get response from gpt2 and gpt2_ref
for i in range(bs):
    gen_len = output_length_sampler()
    output = ref_model.generate(
        torch.tensor(query_tensors[i]).unsqueeze(dim=0).to(device), max_new_tokens=gen_len, **gen_kwargs
    ).squeeze()[-gen_len:]
    response_tensors_ref.append(output)
    output = model.generate(
        torch.tensor(query_tensors[i]).unsqueeze(dim=0).to(device), max_new_tokens=gen_len, **gen_kwargs
    ).squeeze()[-gen_len:]
    response_tensors.append(output)

#### decode responses
game_data["response (before)"] = [tokenizer.decode(response_tensors_ref[i]) for i in range(bs)]
game_data["response (after)"] = [tokenizer.decode(response_tensors[i]) for i in range(bs)]

#### sentiment analysis of query/response pairs before/after
texts = [q + r for q, r in zip(game_data["query"], game_data["response (before)"])]
game_data["rewards (before)"] = [output[1]["score"] for output in sentiment_pipe(texts, **sent_kwargs)]

texts = [q + r for q, r in zip(game_data["query"], game_data["response (after)"])]
game_data["rewards (after)"] = [output[1]["score"] for output in sentiment_pipe(texts, **sent_kwargs)]

# store results in a dataframe
df_results = pd.DataFrame(game_data)
df_results



Unnamed: 0,query,response (before),response (after),rewards (before),rewards (after)
0,Some crews promote a person to,create a new creations. The idea of design,a sport because of the trust in the fans,0.673985,0.732858
1,By 2006 sales were $4,billion and had popularity among a stands lik...,millions more than state goods for free (more,-1.036358,-0.928639
2,So why is texting growing in,such way become more popular than social calls,"all the time and so often, people",-0.01428,0.51444
3,A possible,solution to this problem is not to create an ...,"reason is the government, as one can notice, ...",0.887826,0.683454
4,"All in all,",the EU spends a lot. Worldwide,I had problems with avatars.,0.061614,-0.39659
5,They begin,"with in the 11th point, but the ones in the b...",an aades to reach results. The students will ...,-0.462678,0.592117
6,This was,a time of great finality - it was suddenly,a quick lesson and was given because mankind ...,-0.204435,0.692657
7,This country had some similu,ent small population while its changes in outl...,ulliham which are air travel. it remained acti...,-0.494473,-0.536687
8,"Moreover, it makes us overlook",impossible or goes too far.,the fact that some people think,0.934987,0.920209
9,Statistica data of,town is growing percent of people from the ag...,young people is also relatively really contra...,-0.960907,-0.398864


In [None]:
print("mean:")
display(df_results[["rewards (before)", "rewards (after)"]].mean())
print()
print("median:")
display(df_results[["rewards (before)", "rewards (after)"]].median())

mean:


rewards (before)   -0.187076
rewards (after)     0.082977
dtype: float64


median:


rewards (before)   -0.333556
rewards (after)     0.284369
dtype: float64

In [None]:
model.save_pretrained("Synonyms_L1_sent_generator", push_to_hub=True)
tokenizer.save_pretrained("Synonyms_L1_sent_generator", push_to_hub=True)

model.safetensors:   0%|          | 0.00/328M [00:00<?, ?B/s]

('Synonyms_L1_sent_generator/tokenizer_config.json',
 'Synonyms_L1_sent_generator/special_tokens_map.json',
 'Synonyms_L1_sent_generator/vocab.json',
 'Synonyms_L1_sent_generator/merges.txt',
 'Synonyms_L1_sent_generator/added_tokens.json',
 'Synonyms_L1_sent_generator/tokenizer.json')

### Copying expression

In [None]:
device = ppo_trainer.accelerator.device
if ppo_trainer.accelerator.num_processes == 1:
    device = 0 if torch.cuda.is_available() else "cpu"
sentiment_pipe = pipeline("text-classification", model="Zlovoblachko/L1-classifier-CopExp", device=device)

config.json:   0%|          | 0.00/746 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/263M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.20k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/669k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

In [None]:
gen_kwargs = {"min_length": -1, "top_k": 0.0, "top_p": 1.0, "do_sample": True, "pad_token_id": tokenizer.eos_token_id}

In [None]:
output_min_length = 5
output_max_length = 25
output_length_sampler = LengthSampler(output_min_length, output_max_length)


generation_kwargs = {
    "min_length": -1,
    "top_k": 0.0,
    "top_p": 1.0,
    "do_sample": True,
    "pad_token_id": tokenizer.eos_token_id,
}


epochs = 5
for epoch in tqdm(range(epochs), "epoch: "):
    for batch in tqdm(ppo_trainer.dataloader):
        query_tensors = batch["input_ids"]
        response_tensors = []
        for query in query_tensors:
            gen_len = output_length_sampler()
            generation_kwargs["max_new_tokens"] = gen_len
            response = ppo_trainer.generate(query, **generation_kwargs)
            response_tensors.append(response.squeeze()[-gen_len:])
        batch["response"] = [tokenizer.decode(r.squeeze()) for r in response_tensors]
        texts = [q + r for q, r in zip(batch["query"], batch["response"])]
        pipe_outputs = sentiment_pipe(texts, **sent_kwargs)
        rewards = [torch.tensor(output[1]["score"]) for output in pipe_outputs]
        stats = ppo_trainer.step(query_tensors, response_tensors, rewards)
        ppo_trainer.log_stats(stats, batch, rewards)

epoch:   0%|          | 0/5 [00:00<?, ?it/s]

  4%|▍         | 1/26 [00:19<08:03, 19.33s/it][A
  8%|▊         | 2/26 [00:38<07:39, 19.14s/it][A
 12%|█▏        | 3/26 [00:57<07:20, 19.17s/it][A
 15%|█▌        | 4/26 [01:16<07:02, 19.20s/it][A
 19%|█▉        | 5/26 [01:35<06:37, 18.95s/it][A
 23%|██▎       | 6/26 [01:55<06:24, 19.25s/it][A
 27%|██▋       | 7/26 [02:13<06:00, 18.95s/it][A
 31%|███       | 8/26 [02:34<05:51, 19.52s/it][A
 35%|███▍      | 9/26 [02:53<05:29, 19.38s/it][A

 42%|████▏     | 11/26 [03:32<04:52, 19.48s/it][A
 46%|████▌     | 12/26 [03:52<04:33, 19.51s/it][A
 50%|█████     | 13/26 [04:12<04:16, 19.75s/it][A
 54%|█████▍    | 14/26 [04:30<03:52, 19.36s/it][A
 58%|█████▊    | 15/26 [04:48<03:26, 18.81s/it][A
 62%|██████▏   | 16/26 [05:06<03:07, 18.72s/it][A
 65%|██████▌   | 17/26 [05:26<02:50, 18.92s/it][A
 69%|██████▉   | 18/26 [05:45<02:32, 19.04s/it][A
 73%|███████▎  | 19/26 [06:04<02:13, 19.02s/it][A
 77%|███████▋  | 20/26 [06:23<01:54, 19.09s/

In [None]:
#### get a batch from the dataset
bs = 16
game_data = dict()
dataset.set_format("pandas")
df_batch = dataset[:].sample(bs)
game_data["query"] = df_batch["query"].tolist()
query_tensors = df_batch["input_ids"].tolist()

response_tensors_ref, response_tensors = [], []

#### get response from gpt2 and gpt2_ref
for i in range(bs):
    gen_len = output_length_sampler()
    output = ref_model.generate(
        torch.tensor(query_tensors[i]).unsqueeze(dim=0).to(device), max_new_tokens=gen_len, **gen_kwargs
    ).squeeze()[-gen_len:]
    response_tensors_ref.append(output)
    output = model.generate(
        torch.tensor(query_tensors[i]).unsqueeze(dim=0).to(device), max_new_tokens=gen_len, **gen_kwargs
    ).squeeze()[-gen_len:]
    response_tensors.append(output)

#### decode responses
game_data["response (before)"] = [tokenizer.decode(response_tensors_ref[i]) for i in range(bs)]
game_data["response (after)"] = [tokenizer.decode(response_tensors[i]) for i in range(bs)]

#### sentiment analysis of query/response pairs before/after
texts = [q + r for q, r in zip(game_data["query"], game_data["response (before)"])]
game_data["rewards (before)"] = [output[1]["score"] for output in sentiment_pipe(texts, **sent_kwargs)]

texts = [q + r for q, r in zip(game_data["query"], game_data["response (after)"])]
game_data["rewards (after)"] = [output[1]["score"] for output in sentiment_pipe(texts, **sent_kwargs)]

# store results in a dataframe
df_results = pd.DataFrame(game_data)
df_results



Unnamed: 0,query,response (before),response (after),rewards (before),rewards (after)
0,This concentration continued,to decline in two years and amounted to 14% i...,to be stable very last stable from 17% to 25 ...,0.16507,1.109349
1,The express who live in glass houses,"with electricity only rises from 60,1 to 90,3...","are not people of all ages, but the retiremen...",0.94689,1.08721
2,The arena of relationships is intolerant,... so it is necessary to take measures to red...,to everyone's eyes and the event is happening...,-0.40944,1.137671
3,"One should not forget, that there",is a real way to gain chann,are all kinds of people who give the,0.725937,1.158632
4,I don�,�t know exactly why. I am not unready to reall...,"�t want to do this thing. I imagine, because t...",0.681704,0.97314
5,They can also give use in,"family, hobby or services, eat of modern gadg...",nervous space muscles that we pick. Mainly th...,0.674888,0.660787
6,The given chart compares the,information about European Union situation in 12,populations of this group aged category outside,-0.589846,-0.383683
7,In Germany in both,sexes is kind and universities achieve someth...,"genders people need also help, for example, t...",0.00077,0.846695
8,"Simultaneously, the datas",il sent by East Asia in 2010 show a significan...,endant of every EU people will take part in th...,-1.826002,-0.134454
9,Obesity,is the main cause of overweight,is a main issue of people,-1.240512,0.112863


In [None]:
print("mean:")
display(df_results[["rewards (before)", "rewards (after)"]].mean())
print()
print("median:")
display(df_results[["rewards (before)", "rewards (after)"]].median())

mean:


rewards (before)   -0.131474
rewards (after)     0.764359
dtype: float64


median:


rewards (before)    0.082920
rewards (after)     0.972577
dtype: float64

In [None]:
model.save_pretrained("CopExp_L1_sent_generator", push_to_hub=True)
tokenizer.save_pretrained("CopExp_L1_sent_generator", push_to_hub=True)

model.safetensors:   0%|          | 0.00/328M [00:00<?, ?B/s]

('CopExp_L1_sent_generator/tokenizer_config.json',
 'CopExp_L1_sent_generator/special_tokens_map.json',
 'CopExp_L1_sent_generator/vocab.json',
 'CopExp_L1_sent_generator/merges.txt',
 'CopExp_L1_sent_generator/added_tokens.json',
 'CopExp_L1_sent_generator/tokenizer.json')

### Transliteration

In [13]:
device = ppo_trainer.accelerator.device
if ppo_trainer.accelerator.num_processes == 1:
    device = 0 if torch.cuda.is_available() else "cpu"
sentiment_pipe = pipeline("text-classification", model="Zlovoblachko/L1-classifier-Transliteration", device=device)

config.json:   0%|          | 0.00/764 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/263M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.20k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/669k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

In [14]:
gen_kwargs = {"min_length": -1, "top_k": 0.0, "top_p": 1.0, "do_sample": True, "pad_token_id": tokenizer.eos_token_id}

In [15]:
output_min_length = 5
output_max_length = 25
output_length_sampler = LengthSampler(output_min_length, output_max_length)


generation_kwargs = {
    "min_length": -1,
    "top_k": 0.0,
    "top_p": 1.0,
    "do_sample": True,
    "pad_token_id": tokenizer.eos_token_id,
}


epochs = 5
for epoch in tqdm(range(epochs), "epoch: "):
    for batch in tqdm(ppo_trainer.dataloader):
        query_tensors = batch["input_ids"]
        response_tensors = []
        for query in query_tensors:
            gen_len = output_length_sampler()
            generation_kwargs["max_new_tokens"] = gen_len
            response = ppo_trainer.generate(query, **generation_kwargs)
            response_tensors.append(response.squeeze()[-gen_len:])
        batch["response"] = [tokenizer.decode(r.squeeze()) for r in response_tensors]
        texts = [q + r for q, r in zip(batch["query"], batch["response"])]
        pipe_outputs = sentiment_pipe(texts, **sent_kwargs)
        rewards = [torch.tensor(output[1]["score"]) for output in pipe_outputs]
        stats = ppo_trainer.step(query_tensors, response_tensors, rewards)
        ppo_trainer.log_stats(stats, batch, rewards)

epoch:   0%|          | 0/5 [00:00<?, ?it/s]

  4%|▍         | 1/26 [00:18<07:53, 18.95s/it][A
  8%|▊         | 2/26 [00:36<07:16, 18.17s/it][A
 12%|█▏        | 3/26 [00:59<07:43, 20.15s/it][A
 15%|█▌        | 4/26 [01:16<07:00, 19.13s/it][A
 19%|█▉        | 5/26 [01:34<06:34, 18.78s/it][A
 23%|██▎       | 6/26 [01:53<06:12, 18.62s/it][A
 27%|██▋       | 7/26 [02:12<05:58, 18.85s/it][A
 31%|███       | 8/26 [02:29<05:26, 18.15s/it][A
 35%|███▍      | 9/26 [02:45<04:58, 17.55s/it][A

 42%|████▏     | 11/26 [03:21<04:28, 17.90s/it][A
 46%|████▌     | 12/26 [03:39<04:10, 17.91s/it][A
 50%|█████     | 13/26 [03:57<03:52, 17.90s/it][A
 54%|█████▍    | 14/26 [04:14<03:32, 17.71s/it][A
 58%|█████▊    | 15/26 [04:33<03:17, 17.95s/it][A
 62%|██████▏   | 16/26 [04:51<02:59, 18.00s/it][A
 65%|██████▌   | 17/26 [05:10<02:44, 18.24s/it][A
 69%|██████▉   | 18/26 [05:27<02:24, 18.12s/it][A
 73%|███████▎  | 19/26 [05:45<02:05, 17.87s/it][A
 77%|███████▋  | 20/26 [06:02<01:47, 17.85s/

In [16]:
#### get a batch from the dataset
bs = 16
game_data = dict()
dataset.set_format("pandas")
df_batch = dataset[:].sample(bs)
game_data["query"] = df_batch["query"].tolist()
query_tensors = df_batch["input_ids"].tolist()

response_tensors_ref, response_tensors = [], []

#### get response from gpt2 and gpt2_ref
for i in range(bs):
    gen_len = output_length_sampler()
    output = ref_model.generate(
        torch.tensor(query_tensors[i]).unsqueeze(dim=0).to(device), max_new_tokens=gen_len, **gen_kwargs
    ).squeeze()[-gen_len:]
    response_tensors_ref.append(output)
    output = model.generate(
        torch.tensor(query_tensors[i]).unsqueeze(dim=0).to(device), max_new_tokens=gen_len, **gen_kwargs
    ).squeeze()[-gen_len:]
    response_tensors.append(output)

#### decode responses
game_data["response (before)"] = [tokenizer.decode(response_tensors_ref[i]) for i in range(bs)]
game_data["response (after)"] = [tokenizer.decode(response_tensors[i]) for i in range(bs)]

#### sentiment analysis of query/response pairs before/after
texts = [q + r for q, r in zip(game_data["query"], game_data["response (before)"])]
game_data["rewards (before)"] = [output[1]["score"] for output in sentiment_pipe(texts, **sent_kwargs)]

texts = [q + r for q, r in zip(game_data["query"], game_data["response (after)"])]
game_data["rewards (after)"] = [output[1]["score"] for output in sentiment_pipe(texts, **sent_kwargs)]

# store results in a dataframe
df_results = pd.DataFrame(game_data)
df_results



Unnamed: 0,query,response (before),response (after),rewards (before),rewards (after)
0,Some crews promote a,fashion among people who are interested to be...,"lot of cigarettes with electronic gadgets, wh...",-0.522784,0.740069
1,By 2006 sales were $,10 billion for the was in Italy then $,"3 try piece, enjoy by femur",-1.88324,1.228231
2,So why is,there a problem of spreading newborn amounts of,this explanation why rails do not remain unp,-1.719967,-1.363717
3,A possible way to,try this approach can be to create some kind ...,behave is as aurop style. But not studly pl,-0.776999,2.240878
4,All in,"all, everything is about loving person",all imminently the use results,-1.409599,-2.011396
5,They begin from 10%,to 20%. The amount of children less than thei...,to 15% in Isci (Ebutsk and Benik,-2.024133,1.670215
6,This was using fabric,ine and there are definitely more economical w...,ae that scarn surveces robbers and threat,1.810877,2.293106
7,This country had,to become like a part of the normalisation of...,important influence eorfnes or socol afscistd,-1.069164,2.20051
8,"Moreover, it makes",sense of sharing high-quality,us consolciven that,-1.064636,0.685943
9,Statistica data of all,the years shows an increasing amount of child...,kindergard a garantfull learning that taim,-0.815087,2.354837


In [17]:
print("mean:")
display(df_results[["rewards (before)", "rewards (after)"]].mean())
print()
print("median:")
display(df_results[["rewards (before)", "rewards (after)"]].median())

mean:


rewards (before)   -1.253039
rewards (after)     1.222469
dtype: float64


median:


rewards (before)   -1.455198
rewards (after)     1.668306
dtype: float64

In [18]:
model.save_pretrained("Transliteration_L1_sent_generator", push_to_hub=True)
tokenizer.save_pretrained("Transliteration_L1_sent_generator", push_to_hub=True)

model.safetensors:   0%|          | 0.00/328M [00:00<?, ?B/s]

('Transliteration_L1_sent_generator/tokenizer_config.json',
 'Transliteration_L1_sent_generator/special_tokens_map.json',
 'Transliteration_L1_sent_generator/vocab.json',
 'Transliteration_L1_sent_generator/merges.txt',
 'Transliteration_L1_sent_generator/added_tokens.json',
 'Transliteration_L1_sent_generator/tokenizer.json')