In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import nvidia_smi

def get_gpu_allocated_mem():
    nvidia_smi.nvmlInit()
    deviceCount = nvidia_smi.nvmlDeviceGetCount()
    handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0)
    info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
    return info.free/info.total

get_gpu_allocated_mem()

0.9901298522949219

In [3]:
import pickle as pkl
import itertools
from pathlib import Path
from typing import NamedTuple, OrderedDict
import numpy as np
import torch
from ict_2 import ICT
from tqdm.notebook import tqdm, trange
from copy import deepcopy
import os
from data_loader import DataLoader
import random

current_dir = Path(os.path.join(os.path.abspath(".")))

cv_split = pkl.load(
    open(current_dir / "data_biclfs" / "cross_validation_splits.pkl", "rb")
)
training_data = pkl.load(
    open(current_dir / "data_biclfs" / "training_data_templated.pkl", "rb")
)
verbalizers = ["no", "yes"]

fold_0 = cv_split[0]

task2verbalizers = {task: verbalizers for task in fold_0["train"]}
task2template_examples = {task: training_data[task] for task in fold_0["train"]}

In [4]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("gpt2-medium")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

In [5]:
# Training data creation
bsz = 8
# Create dataloader
data_loader = DataLoader(
    tokenizer=tokenizer,
    task_format="clm",
    task2verbalizers=task2verbalizers,
    example_delimiter=" ",
    device="cuda",
)
# Prepare the training examples
epoch_train_examples = []  # List of training batches
for task in tqdm(task2template_examples, desc="Preparing training examples."):
    # Loop by task so batches only contain examples of the same task
    training_examples = []
    examples = task2template_examples[task]
    for query_example in examples:
        # Remove the random selection of templates
        input_text = data_loader.prepare_input(
            task,
            query_example,
            examples,
            5,
            True,
            template=None,
        )
        # NOTE: Current implementation filters out completely the
        # examples that exceed model's maximum token length, will
        # probably need to change this so that we truncate rather than
        # filter out the example entirely
        convert_label_to_string = lambda x: "no" if x == 0 else "yes"
        if (
            len(tokenizer(input_text)["input_ids"]) <= tokenizer.model_max_length
        ):
            training_examples.append(
                (task, input_text, convert_label_to_string(query_example["<label>"]))
            )
    # Divide model_examples into batches, so that each batch has all examples from the same task
    # Shuffle within batch examples
    random.shuffle(training_examples)
    for idx in range(0, len(training_examples), bsz):
        epoch_train_examples.append(training_examples[idx : idx + bsz])

Preparing training examples.:   0%|          | 0/18 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (1033 > 1024). Running this sequence through the model will result in indexing errors


In [6]:
epoch_train_examples[0][0]

('group3_2019SemEval5TweetEvalHate',
 '@user @user @user Expose the Corruption #DeclassifyFISA #MAGA #AmericaFirst #KAG #BuildThatWall  Is this tweet offensive towards women or immigrants? no #Buildthatwall #Deportthemall @user More Needless Deaths at the Hands of an Illegal Alien, and an Oregon Ballot Initiative  Is there hate speech towards women or immigrants in the tweet? yes @user OK BITCH WELL ILL LEFT SWIPE YOU AGAIN HOE BYEEEEE ‼️  Does the tweet contain hate speech towards women or immigrants? ',
 'yes')

In [6]:
from transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained("gpt2-medium")
model.to("cuda");

In [7]:
# Split weights in two groups, one with weight decay and the other not.
no_decay = ["bias", "layer_norm.weight"]
optimizer_grouped_parameters = [
    {
        "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
        "weight_decay": 0.0,
    },
    {
        "params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)],
        "weight_decay": 0.0,
    },
]
optimizer = torch.optim.AdamW(optimizer_grouped_parameters, lr=1e-6)

In [8]:
from torch.nn.functional import cross_entropy

In [9]:
# small training loop
for epoch in range(1):
    for batch in tqdm(epoch_train_examples):
        optimizer.zero_grad()
        input_dict = tokenizer(
            [example[1] for example in batch],
            padding=True,
            return_tensors="pt",).to("cuda")
        labels = torch.tensor(tokenizer.convert_tokens_to_ids([example[2] for example in batch]), dtype=torch.int64).to("cuda")
        outputs = model(**input_dict)
        logits_last_token = outputs.logits[:, -1, :]
        loss = cross_entropy(logits_last_token, labels)
        loss.backward()
        optimizer.step()
        print(get_gpu_allocated_mem())

  0%|          | 0/298 [00:00<?, ?it/s]

0.9081672668457031
0.8373664855957031
0.7695198059082031
0.7695198059082031
0.7668830871582031
0.7668830871582031
0.7668830871582031
0.33883132934570315
0.3018684387207031
0.3018684387207031
0.3018684387207031
0.3018684387207031
0.3018684387207031
0.3018684387207031
0.3018684387207031
0.3018684387207031
0.3018684387207031
0.3018684387207031
0.3018684387207031
0.3018684387207031
0.3018684387207031
0.3018684387207031
0.28341140747070315
0.28341140747070315
0.28341140747070315
0.28341140747070315
0.28341140747070315
0.28341140747070315
0.28341140747070315
0.28341140747070315
0.28341140747070315
0.28341140747070315
0.28341140747070315
0.28341140747070315
0.28341140747070315
0.28341140747070315
0.28341140747070315
0.28341140747070315
0.28341140747070315
0.28341140747070315
0.28341140747070315
0.28341140747070315
0.28341140747070315
0.28341140747070315
0.28341140747070315
0.28341140747070315
0.28341140747070315
0.28341140747070315
0.28341140747070315
0.28341140747070315
0.28341140747070315
0