In [None]:
!pip install -q peft transformers datasets

In [None]:
!pip uninstall -y pyarrow
!pip install pyarrow

Found existing installation: pyarrow 18.0.0
Uninstalling pyarrow-18.0.0:
  Successfully uninstalled pyarrow-18.0.0
Collecting pyarrow
  Using cached pyarrow-18.0.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Using cached pyarrow-18.0.0-cp310-cp310-manylinux_2_28_x86_64.whl (40.0 MB)
Installing collected packages: pyarrow
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cudf-cu12 24.10.1 requires pyarrow<18.0.0a0,>=14.0.0, but you have pyarrow 18.0.0 which is incompatible.
pylibcudf-cu12 24.10.1 requires pyarrow<18.0.0a0,>=14.0.0, but you have pyarrow 18.0.0 which is incompatible.[0m[31m
[0mSuccessfully installed pyarrow-18.0.0


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, default_data_collator, get_linear_schedule_with_warmup
from peft import get_peft_config, get_peft_model, PromptTuningInit, PromptTuningConfig, TaskType, PeftType
import torch
from datasets import load_dataset
import os
from torch.utils.data import DataLoader
from tqdm import tqdm

device = 'cuda'
model_name_or_path = 'bigscience/bloomz-560m'
tokenizer_name_or_path = 'bigscience/bloomz-560m'
peft_config = PromptTuningConfig(
    task_type=TaskType.CAUSAL_LM,
    prompt_tuning_init=PromptTuningInit.TEXT,
    num_virtual_tokens=8,
    prompt_tuning_init_text='Classify if the tweet is a complaint or not:',
    tokenizer_name_or_path = model_name_or_path,
)

dataset_name = 'twitter_complaints'
checkpoint_name=f"{dataset_name}_{model_name_or_path}_{peft_config.peft_type}_{peft_config.task_type}_v1.pt".replace(
    "/","_"
)
text_column="Tweet text"
label_column='text_label'
max_length=64
lr = 3e-2
num_epochs=50
batch_size=8


In [None]:
dataset = load_dataset('ought/raft', dataset_name)
dataset['train'][0]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


{'Tweet text': '@HMRCcustomers No this is my first job', 'ID': 0, 'Label': 2}

In [None]:
classes = [k.replace("_"," ") for k in dataset['train'].features['Label'].names]
dataset = dataset.map(
    lambda x: {'text_label': [classes[label] for label in x['Label']]},
    batched=True,
    num_proc=1,
)
dataset['train'][0]

{'Tweet text': '@HMRCcustomers No this is my first job',
 'ID': 0,
 'Label': 2,
 'text_label': 'no complaint'}

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
if tokenizer.pad_token_id is None:
  tokenizer.pad_token_id = tokenizer.eos_token_id
target_max_length = max([len(tokenizer(class_label)['input_ids']) for class_label in classes])
print(target_max_length)

3


In [None]:
def preprocess_function(examples):
    batch_size = len(examples[text_column])
    inputs = [f"{text_column} : {x} Label : " for x in examples[text_column]]
    targets = [str(x) for x in examples[label_column]]
    model_inputs = tokenizer(inputs)
    labels = tokenizer(targets)
    for i in range(batch_size):
        sample_input_ids = model_inputs["input_ids"][i]
        label_input_ids = labels["input_ids"][i] + [tokenizer.pad_token_id]
        # print(i, sample_input_ids, label_input_ids)
        model_inputs["input_ids"][i] = sample_input_ids + label_input_ids
        labels["input_ids"][i] = [-100] * len(sample_input_ids) + label_input_ids
        model_inputs["attention_mask"][i] = [1] * len(model_inputs["input_ids"][i])
    # print(model_inputs)
    for i in range(batch_size):
        sample_input_ids = model_inputs["input_ids"][i]
        label_input_ids = labels["input_ids"][i]
        model_inputs["input_ids"][i] = [tokenizer.pad_token_id] * (
            max_length - len(sample_input_ids)
        ) + sample_input_ids
        model_inputs["attention_mask"][i] = [0] * (max_length - len(sample_input_ids)) + model_inputs[
            "attention_mask"
        ][i]
        labels["input_ids"][i] = [-100] * (max_length - len(sample_input_ids)) + label_input_ids
        model_inputs["input_ids"][i] = torch.tensor(model_inputs["input_ids"][i][:max_length])
        model_inputs["attention_mask"][i] = torch.tensor(model_inputs["attention_mask"][i][:max_length])
        labels["input_ids"][i] = torch.tensor(labels["input_ids"][i][:max_length])
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

In [None]:
processed_datasets = dataset.map(
    preprocess_function,
    batched=True,
    num_proc=1,
    remove_columns=dataset['train'].column_names,
    load_from_cache_file=False,
    desc='Running tokenizer on dataset'
)

Running tokenizer on dataset:   0%|          | 0/50 [00:00<?, ? examples/s]

Running tokenizer on dataset:   0%|          | 0/3399 [00:00<?, ? examples/s]

In [None]:
train_dataset = processed_datasets['train']
eval_dataset = processed_datasets['test']

train_dataloader = DataLoader(
    train_dataset, shuffle=True, collate_fn=default_data_collator, batch_size=batch_size,pin_memory=True
)
eval_dataloader = DataLoader(eval_dataset, collate_fn=default_data_collator, batch_size=batch_size,pin_memory=True)

In [None]:
model = AutoModelForCausalLM.from_pretrained(model_name_or_path)
model = get_peft_model(model, peft_config)
print(model.print_trainable_parameters())

trainable params: 8,192 || all params: 559,222,784 || trainable%: 0.0015
None


In [None]:
optimizer = torch.optim.AdamW(model.parameters(),lr=lr)
lr_scheduler = get_linear_schedule_with_warmup(
    optimizer = optimizer,
    num_warmup_steps=0,
    num_training_steps=(len(train_dataloader) * num_epochs),
)

In [None]:
model = model.to(device)

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for step, batch in enumerate(tqdm(train_dataloader)):
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        total_loss += loss.detach().float()
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()

    model.eval()
    eval_loss = 0
    eval_preds = []
    for step, batch in enumerate(tqdm(eval_dataloader)):
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model(**batch)
        loss = outputs.loss
        eval_loss += loss.detach().float()
        eval_preds.extend(
            tokenizer.batch_decode(torch.argmax(outputs.logits, -1).detach().cpu().numpy(), skip_special_tokens=True)
        )

    eval_epoch_loss = eval_loss / len(eval_dataloader)
    eval_ppl = torch.exp(eval_epoch_loss)
    train_epoch_loss = total_loss / len(train_dataloader)
    train_ppl = torch.exp(train_epoch_loss)
    print(f"{epoch=}: {train_ppl=} {train_epoch_loss=} {eval_ppl=} {eval_epoch_loss=}")

100%|██████████| 7/7 [00:04<00:00,  1.43it/s]
100%|██████████| 425/425 [01:37<00:00,  4.35it/s]


epoch=0: train_ppl=tensor(1.3732e+12, device='cuda:0') train_epoch_loss=tensor(27.9482, device='cuda:0') eval_ppl=tensor(3402.5791, device='cuda:0') eval_epoch_loss=tensor(8.1323, device='cuda:0')


100%|██████████| 7/7 [00:03<00:00,  2.30it/s]
100%|██████████| 425/425 [01:37<00:00,  4.35it/s]


epoch=1: train_ppl=tensor(2110.2556, device='cuda:0') train_epoch_loss=tensor(7.6546, device='cuda:0') eval_ppl=tensor(2798.1658, device='cuda:0') eval_epoch_loss=tensor(7.9367, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.48it/s]
100%|██████████| 425/425 [01:37<00:00,  4.35it/s]


epoch=2: train_ppl=tensor(415.2189, device='cuda:0') train_epoch_loss=tensor(6.0288, device='cuda:0') eval_ppl=tensor(4911.8354, device='cuda:0') eval_epoch_loss=tensor(8.4994, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.49it/s]
100%|██████████| 425/425 [01:37<00:00,  4.35it/s]


epoch=3: train_ppl=tensor(248.2330, device='cuda:0') train_epoch_loss=tensor(5.5144, device='cuda:0') eval_ppl=tensor(8678.3223, device='cuda:0') eval_epoch_loss=tensor(9.0686, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.48it/s]
100%|██████████| 425/425 [01:37<00:00,  4.34it/s]


epoch=4: train_ppl=tensor(184.0959, device='cuda:0') train_epoch_loss=tensor(5.2155, device='cuda:0') eval_ppl=tensor(10735.7529, device='cuda:0') eval_epoch_loss=tensor(9.2813, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.48it/s]
100%|██████████| 425/425 [01:37<00:00,  4.35it/s]


epoch=5: train_ppl=tensor(128.0800, device='cuda:0') train_epoch_loss=tensor(4.8527, device='cuda:0') eval_ppl=tensor(13294.8936, device='cuda:0') eval_epoch_loss=tensor(9.4951, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.48it/s]
100%|██████████| 425/425 [01:39<00:00,  4.29it/s]


epoch=6: train_ppl=tensor(110.9115, device='cuda:0') train_epoch_loss=tensor(4.7087, device='cuda:0') eval_ppl=tensor(27953.1445, device='cuda:0') eval_epoch_loss=tensor(10.2383, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.46it/s]
100%|██████████| 425/425 [01:39<00:00,  4.28it/s]


epoch=7: train_ppl=tensor(82.4548, device='cuda:0') train_epoch_loss=tensor(4.4123, device='cuda:0') eval_ppl=tensor(28383.8418, device='cuda:0') eval_epoch_loss=tensor(10.2536, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.47it/s]
100%|██████████| 425/425 [01:38<00:00,  4.32it/s]


epoch=8: train_ppl=tensor(69.0298, device='cuda:0') train_epoch_loss=tensor(4.2345, device='cuda:0') eval_ppl=tensor(31124.0625, device='cuda:0') eval_epoch_loss=tensor(10.3457, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.47it/s]
100%|██████████| 425/425 [01:38<00:00,  4.32it/s]


epoch=9: train_ppl=tensor(59.4837, device='cuda:0') train_epoch_loss=tensor(4.0857, device='cuda:0') eval_ppl=tensor(42453.1992, device='cuda:0') eval_epoch_loss=tensor(10.6562, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.47it/s]
100%|██████████| 425/425 [01:38<00:00,  4.33it/s]


epoch=10: train_ppl=tensor(52.8376, device='cuda:0') train_epoch_loss=tensor(3.9672, device='cuda:0') eval_ppl=tensor(42010.2539, device='cuda:0') eval_epoch_loss=tensor(10.6457, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.47it/s]
100%|██████████| 425/425 [01:37<00:00,  4.34it/s]


epoch=11: train_ppl=tensor(38.9068, device='cuda:0') train_epoch_loss=tensor(3.6612, device='cuda:0') eval_ppl=tensor(48029.4062, device='cuda:0') eval_epoch_loss=tensor(10.7796, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.48it/s]
100%|██████████| 425/425 [01:37<00:00,  4.34it/s]


epoch=12: train_ppl=tensor(30.5552, device='cuda:0') train_epoch_loss=tensor(3.4195, device='cuda:0') eval_ppl=tensor(68640.1484, device='cuda:0') eval_epoch_loss=tensor(11.1366, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.47it/s]
100%|██████████| 425/425 [01:37<00:00,  4.35it/s]


epoch=13: train_ppl=tensor(24.4166, device='cuda:0') train_epoch_loss=tensor(3.1953, device='cuda:0') eval_ppl=tensor(113551.5078, device='cuda:0') eval_epoch_loss=tensor(11.6400, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.48it/s]
100%|██████████| 425/425 [01:37<00:00,  4.35it/s]


epoch=14: train_ppl=tensor(19.0863, device='cuda:0') train_epoch_loss=tensor(2.9490, device='cuda:0') eval_ppl=tensor(161986.5938, device='cuda:0') eval_epoch_loss=tensor(11.9953, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.48it/s]
100%|██████████| 425/425 [01:37<00:00,  4.35it/s]


epoch=15: train_ppl=tensor(15.5049, device='cuda:0') train_epoch_loss=tensor(2.7412, device='cuda:0') eval_ppl=tensor(105544.3906, device='cuda:0') eval_epoch_loss=tensor(11.5669, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.47it/s]
100%|██████████| 425/425 [01:37<00:00,  4.34it/s]


epoch=16: train_ppl=tensor(11.1424, device='cuda:0') train_epoch_loss=tensor(2.4108, device='cuda:0') eval_ppl=tensor(199751.0781, device='cuda:0') eval_epoch_loss=tensor(12.2048, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.48it/s]
100%|██████████| 425/425 [01:37<00:00,  4.34it/s]


epoch=17: train_ppl=tensor(7.4636, device='cuda:0') train_epoch_loss=tensor(2.0100, device='cuda:0') eval_ppl=tensor(378719.4688, device='cuda:0') eval_epoch_loss=tensor(12.8446, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.48it/s]
100%|██████████| 425/425 [01:37<00:00,  4.34it/s]


epoch=18: train_ppl=tensor(5.9914, device='cuda:0') train_epoch_loss=tensor(1.7903, device='cuda:0') eval_ppl=tensor(443254.2188, device='cuda:0') eval_epoch_loss=tensor(13.0019, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.48it/s]
100%|██████████| 425/425 [01:37<00:00,  4.34it/s]


epoch=19: train_ppl=tensor(4.7542, device='cuda:0') train_epoch_loss=tensor(1.5590, device='cuda:0') eval_ppl=tensor(599521.0625, device='cuda:0') eval_epoch_loss=tensor(13.3039, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.47it/s]
100%|██████████| 425/425 [01:38<00:00,  4.32it/s]


epoch=20: train_ppl=tensor(4.6777, device='cuda:0') train_epoch_loss=tensor(1.5428, device='cuda:0') eval_ppl=tensor(224092.3906, device='cuda:0') eval_epoch_loss=tensor(12.3198, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.44it/s]
100%|██████████| 425/425 [01:39<00:00,  4.28it/s]


epoch=21: train_ppl=tensor(3.9125, device='cuda:0') train_epoch_loss=tensor(1.3642, device='cuda:0') eval_ppl=tensor(161445.2656, device='cuda:0') eval_epoch_loss=tensor(11.9919, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.37it/s]
100%|██████████| 425/425 [01:38<00:00,  4.31it/s]


epoch=22: train_ppl=tensor(3.1868, device='cuda:0') train_epoch_loss=tensor(1.1590, device='cuda:0') eval_ppl=tensor(592735.9375, device='cuda:0') eval_epoch_loss=tensor(13.2925, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.47it/s]
100%|██████████| 425/425 [01:38<00:00,  4.33it/s]


epoch=23: train_ppl=tensor(2.6055, device='cuda:0') train_epoch_loss=tensor(0.9576, device='cuda:0') eval_ppl=tensor(397145.6250, device='cuda:0') eval_epoch_loss=tensor(12.8921, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.48it/s]
100%|██████████| 425/425 [01:38<00:00,  4.33it/s]


epoch=24: train_ppl=tensor(2.3612, device='cuda:0') train_epoch_loss=tensor(0.8592, device='cuda:0') eval_ppl=tensor(844555.2500, device='cuda:0') eval_epoch_loss=tensor(13.6466, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.48it/s]
100%|██████████| 425/425 [01:37<00:00,  4.34it/s]


epoch=25: train_ppl=tensor(2.1882, device='cuda:0') train_epoch_loss=tensor(0.7831, device='cuda:0') eval_ppl=tensor(613018.8125, device='cuda:0') eval_epoch_loss=tensor(13.3262, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.47it/s]
100%|██████████| 425/425 [01:38<00:00,  4.34it/s]


epoch=26: train_ppl=tensor(1.8412, device='cuda:0') train_epoch_loss=tensor(0.6104, device='cuda:0') eval_ppl=tensor(1063957.7500, device='cuda:0') eval_epoch_loss=tensor(13.8775, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.48it/s]
100%|██████████| 425/425 [01:38<00:00,  4.33it/s]


epoch=27: train_ppl=tensor(1.7026, device='cuda:0') train_epoch_loss=tensor(0.5322, device='cuda:0') eval_ppl=tensor(1280779.8750, device='cuda:0') eval_epoch_loss=tensor(14.0630, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.48it/s]
100%|██████████| 425/425 [01:38<00:00,  4.33it/s]


epoch=28: train_ppl=tensor(1.5728, device='cuda:0') train_epoch_loss=tensor(0.4529, device='cuda:0') eval_ppl=tensor(1533256.8750, device='cuda:0') eval_epoch_loss=tensor(14.2429, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.47it/s]
100%|██████████| 425/425 [01:38<00:00,  4.33it/s]


epoch=29: train_ppl=tensor(1.4603, device='cuda:0') train_epoch_loss=tensor(0.3786, device='cuda:0') eval_ppl=tensor(3328410.5000, device='cuda:0') eval_epoch_loss=tensor(15.0180, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.47it/s]
100%|██████████| 425/425 [01:38<00:00,  4.33it/s]


epoch=30: train_ppl=tensor(1.4788, device='cuda:0') train_epoch_loss=tensor(0.3912, device='cuda:0') eval_ppl=tensor(2591343., device='cuda:0') eval_epoch_loss=tensor(14.7677, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.47it/s]
100%|██████████| 425/425 [01:38<00:00,  4.34it/s]


epoch=31: train_ppl=tensor(1.4099, device='cuda:0') train_epoch_loss=tensor(0.3435, device='cuda:0') eval_ppl=tensor(2884988.5000, device='cuda:0') eval_epoch_loss=tensor(14.8750, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.48it/s]
100%|██████████| 425/425 [01:37<00:00,  4.34it/s]


epoch=32: train_ppl=tensor(1.3759, device='cuda:0') train_epoch_loss=tensor(0.3191, device='cuda:0') eval_ppl=tensor(4071105.7500, device='cuda:0') eval_epoch_loss=tensor(15.2194, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.48it/s]
100%|██████████| 425/425 [01:37<00:00,  4.35it/s]


epoch=33: train_ppl=tensor(1.3399, device='cuda:0') train_epoch_loss=tensor(0.2926, device='cuda:0') eval_ppl=tensor(3922001.5000, device='cuda:0') eval_epoch_loss=tensor(15.1821, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.48it/s]
100%|██████████| 425/425 [01:37<00:00,  4.34it/s]


epoch=34: train_ppl=tensor(1.5076, device='cuda:0') train_epoch_loss=tensor(0.4105, device='cuda:0') eval_ppl=tensor(3097568.2500, device='cuda:0') eval_epoch_loss=tensor(14.9461, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.46it/s]
100%|██████████| 425/425 [01:37<00:00,  4.35it/s]


epoch=35: train_ppl=tensor(1.6647, device='cuda:0') train_epoch_loss=tensor(0.5097, device='cuda:0') eval_ppl=tensor(1544031.6250, device='cuda:0') eval_epoch_loss=tensor(14.2499, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.48it/s]
100%|██████████| 425/425 [01:38<00:00,  4.33it/s]


epoch=36: train_ppl=tensor(1.5115, device='cuda:0') train_epoch_loss=tensor(0.4131, device='cuda:0') eval_ppl=tensor(1086939.1250, device='cuda:0') eval_epoch_loss=tensor(13.8989, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.48it/s]
100%|██████████| 425/425 [01:38<00:00,  4.34it/s]


epoch=37: train_ppl=tensor(1.4888, device='cuda:0') train_epoch_loss=tensor(0.3980, device='cuda:0') eval_ppl=tensor(1409663., device='cuda:0') eval_epoch_loss=tensor(14.1589, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.47it/s]
100%|██████████| 425/425 [01:37<00:00,  4.35it/s]


epoch=38: train_ppl=tensor(1.3815, device='cuda:0') train_epoch_loss=tensor(0.3232, device='cuda:0') eval_ppl=tensor(1449814.1250, device='cuda:0') eval_epoch_loss=tensor(14.1869, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.48it/s]
100%|██████████| 425/425 [01:37<00:00,  4.34it/s]


epoch=39: train_ppl=tensor(1.3178, device='cuda:0') train_epoch_loss=tensor(0.2759, device='cuda:0') eval_ppl=tensor(2068494.7500, device='cuda:0') eval_epoch_loss=tensor(14.5423, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.47it/s]
100%|██████████| 425/425 [01:38<00:00,  4.33it/s]


epoch=40: train_ppl=tensor(1.2857, device='cuda:0') train_epoch_loss=tensor(0.2513, device='cuda:0') eval_ppl=tensor(3409129., device='cuda:0') eval_epoch_loss=tensor(15.0420, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.47it/s]
100%|██████████| 425/425 [01:38<00:00,  4.33it/s]


epoch=41: train_ppl=tensor(1.2630, device='cuda:0') train_epoch_loss=tensor(0.2335, device='cuda:0') eval_ppl=tensor(3709952.7500, device='cuda:0') eval_epoch_loss=tensor(15.1265, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.47it/s]
100%|██████████| 425/425 [01:38<00:00,  4.33it/s]


epoch=42: train_ppl=tensor(1.2846, device='cuda:0') train_epoch_loss=tensor(0.2505, device='cuda:0') eval_ppl=tensor(4010309.7500, device='cuda:0') eval_epoch_loss=tensor(15.2044, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.48it/s]
100%|██████████| 425/425 [01:38<00:00,  4.33it/s]


epoch=43: train_ppl=tensor(1.2623, device='cuda:0') train_epoch_loss=tensor(0.2329, device='cuda:0') eval_ppl=tensor(3640236.7500, device='cuda:0') eval_epoch_loss=tensor(15.1076, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.47it/s]
100%|██████████| 425/425 [01:38<00:00,  4.33it/s]


epoch=44: train_ppl=tensor(1.2723, device='cuda:0') train_epoch_loss=tensor(0.2409, device='cuda:0') eval_ppl=tensor(3970288.5000, device='cuda:0') eval_epoch_loss=tensor(15.1943, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.47it/s]
100%|██████████| 425/425 [01:38<00:00,  4.33it/s]


epoch=45: train_ppl=tensor(1.2713, device='cuda:0') train_epoch_loss=tensor(0.2400, device='cuda:0') eval_ppl=tensor(4051550., device='cuda:0') eval_epoch_loss=tensor(15.2146, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.47it/s]
100%|██████████| 425/425 [01:38<00:00,  4.33it/s]


epoch=46: train_ppl=tensor(1.2548, device='cuda:0') train_epoch_loss=tensor(0.2270, device='cuda:0') eval_ppl=tensor(4614658.5000, device='cuda:0') eval_epoch_loss=tensor(15.3447, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.48it/s]
100%|██████████| 425/425 [01:38<00:00,  4.33it/s]


epoch=47: train_ppl=tensor(1.2520, device='cuda:0') train_epoch_loss=tensor(0.2247, device='cuda:0') eval_ppl=tensor(4566554.5000, device='cuda:0') eval_epoch_loss=tensor(15.3343, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.47it/s]
100%|██████████| 425/425 [01:38<00:00,  4.33it/s]


epoch=48: train_ppl=tensor(1.2454, device='cuda:0') train_epoch_loss=tensor(0.2194, device='cuda:0') eval_ppl=tensor(4432684., device='cuda:0') eval_epoch_loss=tensor(15.3045, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.47it/s]
100%|██████████| 425/425 [01:38<00:00,  4.33it/s]

epoch=49: train_ppl=tensor(1.2349, device='cuda:0') train_epoch_loss=tensor(0.2110, device='cuda:0') eval_ppl=tensor(4498810., device='cuda:0') eval_epoch_loss=tensor(15.3193, device='cuda:0')





In [None]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
peft_model_id = 'hang1n/bloomz-560m_PROMPT_TUNING_CAUSAL_LM'
model.push_to_hub('hang1n/bloomz-560m_PROMPT_TUNING_CAUSAL_LM' ,use_auth_token=True)



adapter_model.safetensors:   0%|          | 0.00/32.9k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/hang1n/bloomz-560m_PROMPT_TUNING_CAUSAL_LM/commit/cec0f6cf69dab7099270f1876e02e6be6709ac00', commit_message='Upload model', commit_description='', oid='cec0f6cf69dab7099270f1876e02e6be6709ac00', pr_url=None, repo_url=RepoUrl('https://huggingface.co/hang1n/bloomz-560m_PROMPT_TUNING_CAUSAL_LM', endpoint='https://huggingface.co', repo_type='model', repo_id='hang1n/bloomz-560m_PROMPT_TUNING_CAUSAL_LM'), pr_revision=None, pr_num=None)

In [None]:
from peft import PeftModel, PeftConfig

peft_model_id = 'hang1n/bloomz-560m_PROMPT_TUNING_CAUSAL_LM'

config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
model = PeftModel.from_pretrained(model, peft_model_id)

adapter_config.json:   0%|          | 0.00/510 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/32.9k [00:00<?, ?B/s]

In [None]:
inputs = tokenizer(
    f'{text_column} : {"@nationalgridus I have no water and the bill is current and paid. Can you do something about this?"} Label : ',
    return_tensors="pt",
)

In [None]:
model.to(device)
with torch.no_grad():
  inputs = {k: v.to(device) for k, v in inputs.items()}
  outputs = model.generate(
      input_ids = inputs['input_ids'], attention_mask = inputs['attention_mask'],max_new_tokens=10, eos_token_id=3
  )
  print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True))

['Tweet text : @nationalgridus I have no water and the bill is current and paid. Can you do something about this? Label : complaint']
