In [9]:
!pip install -q datasets transformers scikit-learn wandb bitsandbytes accelerate>=0.26.0

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [1]:
import numpy as np
import os
import transformers
import itertools
import pandas as pd
import math
from transformers import GPTNeoXForCausalLM, AutoTokenizer
from transformers import (
    set_seed,
)
from transformers import DataCollatorForLanguageModeling,DataCollatorWithPadding
from transformers import AutoModelForCausalLM
from sklearn.metrics import accuracy_score
import wandb
import pickle
import string
from datasets import Dataset, DatasetDict, load_dataset
import torch
import torch.nn.functional as F
import logging
import numpy as np
import string
import time



In [2]:
SEED = 42
PRE_TRAINING_CHECKPOINT = 'step143000'

MODEL_SIZE = '70m'
MODEL_NAME = "srinathmkce/indoml_100k_llama_updated_dataset_epoch2"
set_seed(SEED)
device = torch.device("cuda")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
logger = logging.getLogger(__name__)
tokenizer.pad_token = tokenizer.eos_token

In [3]:
def generate_non_random_strings(seed):

    sequences = []
    seq= []
    alllines = ''
    rng = np.random.default_rng(seed)
    sequences = ["SYS: Hello, I am the customer support bot. What can I do for you? USR: Hello robot. I ordered a pot several days ago but I can't track it. SYS: Could you verify your full name? USR: Patrick Schug SYS: Verify your order number please. USR: It's 843-58572-7002. SYS: You can track your package with your tracking number, which is AGZIM5T6KL. Are you happy about my answer? USR: All good. See you. SYS: Have a nice day! Bye."]
    tseq = ["SYS: Hello, I am the customer support bot. What can I do for you? USR: Hi. I ordered a mobile phone several days ago but I can't track it. SYS: May I have your full name? USR: James Salim. SYS: Verify your phone number please. USR: 980.322.8737 is my number. SYS: Track your order using your tracking number, 0UOKFRS1GA. Anything else? USR: No more questions. See you. SYS: Bye."]
    dataset = Dataset.from_dict(
        {
            "text": sequences,
        }
    )
    test_dataset = Dataset.from_dict(
        {
            "text": tseq
        }
    )
    datasets = DatasetDict(
        {
            "train": dataset,
            "test": test_dataset
        }
    )
    datasets.set_format("torch")
    return datasets

In [4]:
def tokenize_string(tokenizer,dataset):
    def encode(example: dict):
        sequences = example["text"]
        return tokenizer(sequences,truncation=True)

    return dataset.map(
        encode,
        batched=True,
    )

In [5]:
dataset = generate_non_random_strings(seed=42)
encoded_dataset = tokenize_string(tokenizer, dataset)
training_dataset = encoded_dataset.remove_columns(["text"])
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
train_loader = torch.utils.data.DataLoader(training_dataset["train"], shuffle=True, batch_size=1, collate_fn=data_collator)
test_loader = torch.utils.data.DataLoader(training_dataset["test"], shuffle=True, batch_size=1, collate_fn=data_collator)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, load_in_4bit=True)
# model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, load_in_4bit=True, device_map="auto")

Map:   0%|          | 0/1 [00:00<?, ? examples/s]

Map:   0%|          | 0/1 [00:00<?, ? examples/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
`low_cpu_mem_usage` was None, now default to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [6]:
model.to(device)
model.train()
optimizer = torch.optim.AdamW(model.parameters(), lr=0.002)

You shouldn't move a model that is dispatched using accelerate hooks.


In [7]:
from torch.profiler import profile, record_function, ProfilerActivity

In [8]:
for epoch in range(1):
  with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],with_flops=True) as prof:
    for idx, batch in enumerate(train_loader):
      batch = batch.to(device)

      inputs = {'input_ids': batch['input_ids'],'attention_mask': batch['attention_mask'],'labels': batch['labels']}
      #print(inputs)
      outputs = model(**inputs) # output = loss, logits, past_key_values
      print("Number of model parameters that are used for training")
      print(sum(p.numel() for p in model.parameters()))
      #print(outputs)
      loss = outputs.loss
      loss.backward()
      optimizer.step()
      optimizer.zero_grad()
  print(prof.key_averages().table(sort_by="flops",row_limit=10))
  print("GFLOPs during training") #GigaFLOPs
  print(sum(k.flops for k in prof.key_averages())/1e9)


STAGE:2024-11-02 09:16:40 1341:1341 ActivityProfilerController.cpp:311] Completed Stage: Warm Up


Number of model parameters that are used for training
4540600320


STAGE:2024-11-02 09:16:41 1341:1341 ActivityProfilerController.cpp:317] Completed Stage: Collection
STAGE:2024-11-02 09:16:41 1341:1341 ActivityProfilerController.cpp:321] Completed Stage: Post Processing


-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  Total MFLOPs  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                               aten::mm         1.41%      10.375ms        15.06%     111.063ms     246.259us     187.598ms        35.66%     206.006ms     456.776us           451   3821524.746  
                                              aten::bmm         1.28%       9.452ms        10.57%      77.941ms     403.839us       1.919ms         0.3

In [9]:
model.eval()
start = time.time()
with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],with_flops=True) as prof:
    for idx, batch in enumerate(test_loader):
      batch = batch.to(device)
      inputs = {'input_ids': batch['input_ids'],'attention_mask': batch['attention_mask'],'labels': batch['labels']}
      #print(inputs)
      outputs = model(**inputs) # output = loss, logits, past_key_values
print("Inference time :"+str(time.time()-start))
#print(prof.key_averages().table(sort_by="flops",row_limit=10))
print("GFLOPs during testing") #GigaFLOPs
print(sum(k.flops for k in prof.key_averages())/1e9)

STAGE:2024-11-02 09:18:27 1341:1341 ActivityProfilerController.cpp:311] Completed Stage: Warm Up
STAGE:2024-11-02 09:18:27 1341:1341 ActivityProfilerController.cpp:317] Completed Stage: Collection
STAGE:2024-11-02 09:18:27 1341:1341 ActivityProfilerController.cpp:321] Completed Stage: Post Processing


Inference time :1.7808001041412354
GFLOPs during testing
1702.927932434
