In [2]:
!nvidia-smi

Sat Jun 10 10:22:08 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   44C    P8     9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
!pip install gdown
!pip install torch
!pip install transformers
!pip install datasets
!pip install nvidia-ml-py3

In [37]:
import gdown

gdown.download(
    "https://drive.google.com/file/d/136GXNgC-f4GbEmX8o7MVMjT_f0Fj7voe/view?usp=sharing", 
    "reward_model_1440.pth", 
    quiet=False
)

Downloading...
From: https://drive.google.com/file/d/136GXNgC-f4GbEmX8o7MVMjT_f0Fj7voe/view?usp=sharing
To: /content/reward_model_1440.pth
76.0kB [00:00, 11.4MB/s]


'reward_model_1440.pth'

In [4]:
from transformers import LEDModel, LEDTokenizer
from datasets import load_dataset
import torch
from torch import nn, optim
from torch.cuda.amp import autocast, GradScaler
from tqdm import tqdm

In [None]:
dataset = load_dataset("aeromaki/arxiv_noised_small", "comparisons")

In [7]:
tokenizer = LEDTokenizer.from_pretrained("allenai/led-large-16384-arxiv")

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/772 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/27.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.29k [00:00<?, ?B/s]

In [8]:
def generate_global_attention_mask(tokenizer, input_ids):
    mask = torch.zeros_like(input_ids)
    mask[((input_ids == tokenizer.bos_token_id) | (input_ids == tokenizer.eos_token_id)).nonzero(as_tuple=True)] = 1
    return mask

In [9]:
class RewardModel(nn.Module):
    def __init__(self, model="allenai/led-large-16384-arxiv", head_layer_size=32):
        super(RewardModel, self).__init__()
        self.led_encoder = LEDModel.from_pretrained(model).get_encoder()
        self._encoder_output_size = self.led_encoder.layernorm_embedding.weight.shape[0]
        self.head = nn.Sequential(
            nn.Linear(self._encoder_output_size, head_layer_size, bias=False),
            nn.ReLU(),
            nn.Linear(head_layer_size, 1, bias=False)
        )

    def forward(self, input_ids, global_attention_mask):
        hidden_state = self.led_encoder(input_ids, global_attention_mask=global_attention_mask).last_hidden_state
        output = hidden_state.view(hidden_state.size(0), -1, hidden_state.size(-1))[:, -1, :]
        output = self.head(output)
        return output.squeeze()

In [11]:
test = RewardModel().to("cuda")
test.load_state_dict(torch.load("reward_model_1440.pth"))

Downloading pytorch_model.bin:   0%|          | 0.00/1.84G [00:00<?, ?B/s]

Some weights of the model checkpoint at allenai/led-large-16384-arxiv were not used when initializing LEDModel: ['final_logits_bias', 'lm_head.weight']
- This IS expected if you are initializing LEDModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LEDModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<All keys matched successfully>

In [23]:
class Criterion():
    def __init__(self):
        self.logsig = nn.LogSigmoid()
    def loss(self, output):
        return -self.logsig(output[0] - output[1])

In [24]:
optimizer = optim.SGD(test.parameters(), lr=0.001)
criterion = Criterion()
scaler = GradScaler()

In [25]:
import nvidia_smi

nvidia_smi.nvmlInit()
handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0)

In [None]:
i = 0
for d in tqdm(dataset["train"]):
    i += 1
    if i < 1000:
        continue
    if i > 1440:
        break
    merged_1 = d["article"] + " TL;DR: " + d["abstract"]
    merged_0 = d["article"] + " TL:DR: " + d["noised"]
    
    put = tokenizer.batch_encode_plus([merged_1, merged_0], return_tensors="pt", padding=True).input_ids.to("cuda")
    att = generate_global_attention_mask(tokenizer, put).to("cuda")
    
    optimizer.zero_grad()
    
    with autocast():
        res = test(put, att)
        loss = criterion.loss(res)
        scaler.scale(loss).backward()

        del put
        del att
        del res
        
        scaler.step(optimizer)
        scaler.update()

    if i % 1000 == 0:
        torch.save(test.state_dict(), f"./reward_model_{i}.pth")
    if i % 30 == 0:
        info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
        print(f"epoch: {i} / loss: {loss} / GPU: {100 * (1 - info.free / info.total)}% used")
    del loss

  9%|▉         | 1017/11376 [00:31<11:24, 15.14it/s] 

epoch: 1020 / loss: 0.022796630859375 / GPU: 85.40824890136719% used


  9%|▉         | 1050/11376 [01:18<3:49:22,  1.33s/it]

epoch: 1050 / loss: 0.0187530517578125 / GPU: 85.43754577636719% used


  9%|▉         | 1080/11376 [01:54<4:16:29,  1.49s/it]

epoch: 1080 / loss: 0.0218505859375 / GPU: 85.43754577636719% used


 10%|▉         | 1110/11376 [02:31<3:15:59,  1.15s/it]

epoch: 1110 / loss: 0.019378662109375 / GPU: 86.02348327636719% used


 10%|█         | 1140/11376 [03:12<3:54:00,  1.37s/it]

epoch: 1140 / loss: 0.0171966552734375 / GPU: 86.02348327636719% used


 10%|█         | 1170/11376 [03:52<3:44:59,  1.32s/it]

epoch: 1170 / loss: 0.0166015625 / GPU: 86.02348327636719% used


 11%|█         | 1200/11376 [04:36<4:57:57,  1.76s/it]

epoch: 1200 / loss: 0.01666259765625 / GPU: 86.02348327636719% used


 11%|█         | 1230/11376 [05:22<4:17:34,  1.52s/it]

epoch: 1230 / loss: 0.016998291015625 / GPU: 86.02348327636719% used


 11%|█         | 1260/11376 [06:01<3:30:08,  1.25s/it]

epoch: 1260 / loss: 0.01560211181640625 / GPU: 86.02348327636719% used


 11%|█▏        | 1290/11376 [06:49<4:07:07,  1.47s/it]

epoch: 1290 / loss: 0.0179443359375 / GPU: 86.02348327636719% used


 12%|█▏        | 1320/11376 [07:26<3:05:50,  1.11s/it]

epoch: 1320 / loss: 0.69140625 / GPU: 86.02348327636719% used


 12%|█▏        | 1350/11376 [08:06<4:12:32,  1.51s/it]

epoch: 1350 / loss: 0.693359375 / GPU: 86.02348327636719% used


 12%|█▏        | 1380/11376 [08:54<4:35:54,  1.66s/it]

epoch: 1380 / loss: 0.693359375 / GPU: 86.02348327636719% used


 12%|█▏        | 1410/11376 [09:37<4:00:53,  1.45s/it]

epoch: 1410 / loss: 0.01300811767578125 / GPU: 86.02348327636719% used


 13%|█▎        | 1440/11376 [10:14<1:10:42,  2.34it/s]

epoch: 1440 / loss: 0.0164794921875 / GPU: 86.02348327636719% used





In [None]:
gc.collect()
torch.cuda.empty_cache()

In [None]:
info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
print(f"GPU: {100 * (1 - info.free / info.total)}% used")

In [None]:
del put
del att
del res
del loss

In [None]:
torch.save(test.state_dict(), f"./reward_model_{i-1}.pth")

In [None]:
import pickle
with open("../paperRaccoon/lab/_FRIDGE/_aug/str/arxiv_validation_aug_str.pickle", "rb") as f:
    validation = pickle.load(f)

In [6]:
dataset = load_dataset("openai/summarize_from_feedback", "comparisons")

Downloading and preparing dataset summarize_from_feedback/comparisons to /root/.cache/huggingface/datasets/openai___summarize_from_feedback/comparisons/0.0.0/483f970ceb55b926b0a087ef4f678ab1b089bc8174a107a452c6152e88af7ff0...


Downloading data files:   0%|          | 0/23 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/29.7M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/9.11M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/55.2M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/46.0M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/25.2M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/18.9M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/2.32M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/8.72M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/15.3M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/10.4M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/5.56M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/6.69M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/29.9M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/27.9M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/28.6M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/15.4M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/18.1M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/9.64M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/2.19M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/2.18M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/6.00M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/10.1k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/23 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Dataset summarize_from_feedback downloaded and prepared to /root/.cache/huggingface/datasets/openai___summarize_from_feedback/comparisons/0.0.0/483f970ceb55b926b0a087ef4f678ab1b089bc8174a107a452c6152e88af7ff0. Subsequent calls will reuse this data.


  0%|          | 0/2 [00:00<?, ?it/s]

In [34]:
def redditpreprocessor(row):
  orig = row["info"]
  text = "TITLE: " + orig["title"] + "POST: " + orig["post"]

  merged_1 = text + " TL;DR: " + row["summaries"][row["choice"]]["text"]
  merged_0 = text + " TL;DR: " + row["summaries"][1-row["choice"]]["text"]

  return [merged_1, merged_0]

In [35]:
i = 0
test.eval()
for d in tqdm(dataset["validation"]):
    i += 1

    data = redditpreprocessor(d)
    
    put = tokenizer.batch_encode_plus(d, return_tensors="pt", padding=True).input_ids.to("cuda")
    att = generate_global_attention_mask(tokenizer, put).to("cuda")
    
    with autocast():
        res = test(put, att)
        loss = criterion.loss(res)

        del put
        del att
        del res
        
        print(f"epoch: {i} / loss: {loss} / GPU: {100 * (1 - info.free / info.total)}% used")
    del loss

  0%|          | 0/86086 [00:02<?, ?it/s]


OutOfMemoryError: ignored