In [None]:
import wandb
import torch
import os
import torch.nn as nn
from datasets import load_dataset
from transformers import GPT2Tokenizer, GPT2LMHeadModel, GPT2Model
import torch.nn.functional as F

from torch.utils.data import Dataset, DataLoader
from dataclasses import dataclass
from tqdm import tqdm
from datasets import load_dataset

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
def getwandbrun(cfgs):
  wandb.login(key=cfgs.WANDBAPI_KEY)
  run = wandb.init(
      entity="ajheshbasnet-kpriet",
      project="RLVR",
      name = "rlvr-runs",
      config=vars(cfgs),
  )
  return run

In [None]:
@dataclass
class configs:
  MAX_SEQ_LEN = 512
  MODEL_NAME = "gpt2"
  WANDBAPI_KEY = " "
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

cfg = configs()

In [None]:
tokenizer = GPT2Tokenizer.from_pretrained(cfg.MODEL_NAME)
tokenizer.padding_side = "left"
tokenizer.pad_token = tokenizer.eos_token

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]



merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [None]:
model = GPT2LMHeadModel.from_pretrained(cfg.MODEL_NAME).to(cfg.DEVICE)

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/148 [00:00<?, ?it/s]

GPT2LMHeadModel LOAD REPORT from: gpt2
Key                  | Status     |  | 
---------------------+------------+--+-
h.{0...11}.attn.bias | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [None]:
dataset = load_dataset("CarperAI/openai_summarize_tldr")

README.md:   0%|          | 0.00/532 [00:00<?, ?B/s]

data/train-00000-of-00001-e8c59e5cf7bce1(…):   0%|          | 0.00/111M [00:00<?, ?B/s]

data/test-00000-of-00001-59ffb27399371ea(…):   0%|          | 0.00/6.23M [00:00<?, ?B/s]

data/valid-00000-of-00001-0e33e6bd86e3ed(…):   0%|          | 0.00/6.12M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/116722 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/6553 [00:00<?, ? examples/s]

Generating valid split:   0%|          | 0/6447 [00:00<?, ? examples/s]

In [None]:
dataset

DatasetDict({
    train: Dataset({
        features: ['prompt', 'label'],
        num_rows: 116722
    })
    test: Dataset({
        features: ['prompt', 'label'],
        num_rows: 6553
    })
    valid: Dataset({
        features: ['prompt', 'label'],
        num_rows: 6447
    })
})

In [None]:
train_dataset = dataset['train']
valid_dataset = dataset['test']
test_dataset = dataset['valid']

In [None]:
train_dataset['prompt'][0]

"SUBREDDIT: r/relationships\nTITLE: I (f/22) have to figure out if I want to still know these girls or not and would hate to sound insulting\nPOST: Not sure if this belongs here but it's worth a try. \n\nBackstory:\nWhen I (f/22) went through my first real breakup 2 years ago because he needed space after a year of dating roand  it effected me more than I thought. It was a horrible time in my life due to living with my mother and finally having the chance to cut her out of my life. I can admit because of it was an emotional wreck and this guy was stable and didn't know how to deal with me. We ended by him avoiding for a month or so after going to a festival with my friends. When I think back I wish he just ended. So after he ended it added my depression I suffered but my friends helped me through it and I got rid of everything from him along with cutting contact. \n\nNow: Its been almost 3 years now and I've gotten better after counselling and mild anti depressants. My mother has been 

In [None]:
def format_dataset(datasets):
  datasets['prompt_label'] = f'''{datasets['prompt']} {datasets['label']}'''
  return datasets

In [None]:
SFT_train_dataset = train_dataset.map(format_dataset)
SFT_valid_dataset = valid_dataset.map(format_dataset)

Map:   0%|          | 0/116722 [00:00<?, ? examples/s]

Map:   0%|          | 0/6553 [00:00<?, ? examples/s]

In [None]:
SFT_train_dataset

Dataset({
    features: ['prompt', 'label', 'prompt_label'],
    num_rows: 116722
})

In [None]:
tokenizer.eos_token_id

50256

In [None]:
cfg.MAX_SEQ_LEN

512

In [None]:
class SFT_Dataset(Dataset):

    def __init__(self, ds):
        self.prompt_label = ds['prompt_label']
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.prompt_label)

    def __getitem__(self, idx):

        # Tokenize without automatic special tokens
        ids = self.tokenizer(
            self.prompt_label[idx],
            return_tensors="pt",
            add_special_tokens=False
        )["input_ids"][0]

        # Prepare a boolean mask of length MAX_SEQ_LEN+1
        padd_boolean = torch.zeros(cfg.MAX_SEQ_LEN + 1, dtype=torch.bool)

        # ----------------------------------------
        # Short sequence case
        # ----------------------------------------
        if len(ids) <= cfg.MAX_SEQ_LEN:
            # We will add one EOS + pad to length MAX_SEQ_LEN+1
            # After adding EOS later, padding positions start at index len(ids)+1
            padd_boolean[len(ids) + 1 :] = True

            # Pad with EOS tokens
            padding_token_ids = torch.tensor(
                [self.tokenizer.eos_token_id] * (cfg.MAX_SEQ_LEN + 1 - len(ids))
            )

            ids = torch.cat((ids, padding_token_ids))

        # ----------------------------------------
        # Long sequence case
        # ----------------------------------------
        else:
            # Take last MAX_SEQ_LEN tokens (left truncation)
            ids = ids[-cfg.MAX_SEQ_LEN :]

            # Add exactly one EOS at the end
            ids = torch.cat((ids, torch.tensor([self.tokenizer.eos_token_id])))

            # No padding to mask: all tokens are real
            padd_boolean[:] = False

        # ----------------------------------------
        # Create targets with masking
        # ----------------------------------------
        idss = ids.clone()

        # Mask padding eos positions
        idss[padd_boolean] = -100

        # Create x, y pairs
        x = ids[:-1]
        y = idss[1:]

        return {
            "input_ids": x,
            "target_ids": y
        }


In [None]:
train_ds = SFT_Dataset(SFT_train_dataset)
valid_ds = SFT_Dataset(SFT_valid_dataset)
train_dataloader = DataLoader(train_ds, batch_size=cfg.SFT_TRAIN_BATCH_SIZE, shuffle=True)
valid_dataloader = DataLoader(valid_ds, batch_size=cfg.SFT_VALID_BATCH_SIZE, shuffle=True)

In [None]:
SFT_OPTIMIZER = torch.optim.Adam(model.parameters(), lr=cfg.SFT_LEARNING_RATE)
criterion = nn.CrossEntropyLoss(ignore_index=-100)

In [None]:
checkpointer = torch.load("/content/drive/MyDrive/grpo/checkpoint_epoch_2.pth", map_location=cfg.DEVICE)

In [None]:
SFT_OPTIMIZER.load_state_dict(checkpointer['optimizer_state_dict'])
model.load_state_dict(checkpointer['model_state_dict'])

<All keys matched successfully>

In [None]:
runs = getwandbrun(cfg)

  | |_| | '_ \/ _` / _` |  _/ -_)
[34m[1mwandb[0m: [wandb.login()] Using explicit session credentials for https://api.wandb.ai.
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33majheshbasnet[0m ([33majheshbasnet-kpriet[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
import os
from tqdm import tqdm
import torch
from torch.amp import autocast, GradScaler

# ------------------- Performance Boost (Safe) -------------------
torch.backends.cudnn.allow_tf32 = True
torch.backends.cudnn.conv.fp32_precision = 'tf32'

# ------------------- AMP setup -------------------
scaler = GradScaler(device="cuda")

# ------------------- Training Loop -------------------
for epoch in range(cfg.SFT_EPOCHS):

    global_rollouts = 0
    epoch_training_loss = 0
    training_step = 0

    SFT_OPTIMIZER.zero_grad(set_to_none=True)
    model.train()

    for step, train_batch in enumerate(tqdm(train_dataloader, total=len(train_dataloader))):

        inputs = train_batch['input_ids'].to(cfg.DEVICE)
        outputs = train_batch['target_ids'].to(cfg.DEVICE)

        # ---------------- Forward + Loss (AMP) ----------------
        with autocast(device_type="cuda", dtype=torch.float16):
            output_logits = model(inputs).logits
            loss = criterion(
                output_logits.reshape(-1, output_logits.size(-1)),
                outputs.reshape(-1),
            )
            loss = loss / cfg.GRADIENT_ACCUM_STEPS  # scale for accumulation

        # ---------------- Backward ----------------
        scaler.scale(loss).backward()

        # Track epoch training loss (unscaled for logging)
        epoch_training_loss += (loss * cfg.GRADIENT_ACCUM_STEPS).item()
        training_step += 1
        global_rollouts += 1

        # ---------------- Gradient Accumulation Step ----------------
        if (step + 1) % cfg.GRADIENT_ACCUM_STEPS == 0:
            scaler.unscale_(SFT_OPTIMIZER)
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

            scaler.step(SFT_OPTIMIZER)
            scaler.update()

            # Optional scheduler (only if defined)
            if 'scheduler' in globals():
                scheduler.step()

            SFT_OPTIMIZER.zero_grad(set_to_none=True)

            runs.log({
                "accum-training-loss": (loss * cfg.GRADIENT_ACCUM_STEPS).item(),
                "global-rollouts": global_rollouts,
                "epochs": epoch + 1
            })

        # ---------------- Validation ----------------
        if global_rollouts % cfg.EVAL_EVERY_STEP == 0:
            model.eval()
            with torch.no_grad():

                epoch_valid_loss = 0
                valid_step = 0

                for valid_batch in valid_dataloader:

                    inputs = valid_batch['input_ids'].to(cfg.DEVICE)
                    outputs = valid_batch['target_ids'].to(cfg.DEVICE)

                    with autocast(device_type="cuda", dtype=torch.float16):
                        output_logits = model(inputs).logits
                        loss = criterion(
                            output_logits.reshape(-1, output_logits.size(-1)),
                            outputs.reshape(-1),
                        )

                    epoch_valid_loss += loss.item()
                    valid_step += 1

                epoch_valid_loss = epoch_valid_loss / valid_step

                runs.log({
                    "training-loss": epoch_training_loss / training_step,
                    "valid-loss": epoch_valid_loss,
                })

            model.train()

    # ---------------- Handle Final Partial Accumulation ----------------
    if (step + 1) % cfg.GRADIENT_ACCUM_STEPS != 0:
        scaler.unscale_(SFT_OPTIMIZER)
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        scaler.step(SFT_OPTIMIZER)
        scaler.update()

        if 'scheduler' in globals():
            scheduler.step()

        SFT_OPTIMIZER.zero_grad(set_to_none=True)

    # ---------------- Checkpoint ----------------
    checkpoint = {
        'epoch': epoch + 1,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': SFT_OPTIMIZER.state_dict(),
        'scaler_state_dict': scaler.state_dict(),
        'loss': epoch_training_loss / training_step,
    }

    save_dir = "/content/drive/MyDrive/sft-optimizer"
    os.makedirs(save_dir, exist_ok=True)

    filename = f"checkpoint_epoch_{epoch+1}.pth"
    torch.save(checkpoint, os.path.join(save_dir, filename))


  self.setter(val)
100%|██████████| 9727/9727 [1:48:48<00:00,  1.49it/s]
 29%|██▉       | 2799/9727 [30:16<1:14:56,  1.54it/s]


KeyboardInterrupt: 

In [None]:
# ---------------- Checkpoint ----------------
checkpoint = {
    'epoch': epoch + 1,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': SFT_OPTIMIZER.state_dict(),
    'scaler_state_dict': scaler.state_dict(),
    'loss': epoch_training_loss / training_step,
}

save_dir = "/content/drive/MyDrive/sft-optimizer"
os.makedirs(save_dir, exist_ok=True)

filename = f"checkpoint_epoch_{epoch+1}.pth"
torch.save(checkpoint, os.path.join(save_dir, filename))

In [None]:
p = rl_dataset_valid['prompt'][2]

ids = tokenizer(p, return_tensors = 'pt')['input_ids'].to(cfg.DEVICE)
attn = tokenizer(p, return_tensors = 'pt')['attention_mask'].to(cfg.DEVICE)

In [None]:
o_ids = model.generate(ids, max_new_tokens = 500)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [None]:
print(tokenizer.decode(o_ids)[0])

SUBREDDIT: r/relationships
TITLE: Me [00 M/F] with my ___ [00 M/F] duration, short-description
POST: So my boyfriend and I met online about a year and a half ago. For half a year we skyped everyday and got to know each other. He flew out to meet my family before I decided to move in with him in a different state. Things went really well. We fought a lot in the beginning of our relationship but we grew a lot together and fell deeply in love. However, while we became too comfortable with each other, our financial situation has been declining in a bad way. We decided to call some family for a little extra help. My boyfriend spoke to his grandmother because they're really close. Her reaction was a bit different than expected, she told us if we wanted her help, my boyfriend would have to go back to his home state and prove himself in a sense. And that left me with no choice but to do the same thing and go back to my hometown. Now I feel like we were just hitting the peak of our relationship