In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import transformers
import torch
from torch.utils.data import DataLoader, Dataset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_name =  "tiiuae/falcon-40b"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config, device_map="auto", trust_remote_code=True)

Loading checkpoint shards: 100%|██████████| 9/9 [01:13<00:00,  8.16s/it]


In [3]:
from peft import prepare_model_for_kbit_training

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [4]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [5]:
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["query_key_value"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

trainable params: 16711680 || all params: 20935688192 || trainable%: 0.07982388659373477


# Data

In [6]:
from datasets import load_dataset

train_data = load_dataset("json", data_files="../Dataset/data_train.json")
test_data = load_dataset("json", data_files="../Dataset/data_test.json")
# data = load_dataset("truthful_qa", "generation")

In [7]:
train_data

DatasetDict({
    train: Dataset({
        features: ['index', 'question', 'answer'],
        num_rows: 177
    })
})

In [8]:
test_data['train']

Dataset({
    features: ['index', 'question', 'answer'],
    num_rows: 43
})

In [9]:
train_dataset = train_data['train'].map(lambda x: {"input_text": x['question']  + "\n" + x['answer']})
tokenizer.pad_token = tokenizer.eos_token

In [10]:
train_encodings = tokenizer(train_dataset['input_text'], truncation=True, padding=True, max_length=256, return_tensors='pt')

In [11]:
class TextDataset(Dataset):
    def __init__(self, encodings):
        self.encodings = encodings

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item["labels"] = item["input_ids"].clone()
        return item

    def __len__(self):
        return len(self.encodings["input_ids"])

In [12]:
train_dataset = TextDataset(train_encodings)

In [13]:
def generate(index):

  example_text = test_data['train'][index]['question']
  correct_answer = test_data['train'][index]['answer']
  tokenizer.pad_token = tokenizer.eos_token

  print("Question:")
  print(example_text)

  encoding = tokenizer(example_text, return_tensors="pt").to("cuda:0")
  output = model.generate(input_ids=encoding.input_ids, attention_mask=encoding.attention_mask, max_new_tokens=100, do_sample=True, temperature=0.000001, eos_token_id=tokenizer.eos_token_id, top_k = 0)
  print("Answer:")
  print(tokenizer.decode(output[0], skip_special_tokens=True))

  print("Best Answer:")
  print(correct_answer)

  print()

In [14]:
index = 0
test_data['train'][index]['question']

'Which mode of transport is advisable for traveling during traffic hours?'

In [15]:
generate(index)

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Question:
Which mode of transport is advisable for traveling during traffic hours?
Answer:
Which mode of transport is advisable for traveling during traffic hours?
I am planning to travel from Delhi to Gurgaon during traffic hours. Which mode of transport is advisable for traveling during traffic hours?
I am planning to travel from Delhi to Gurgaon during traffic hours. Which mode of transport is advisable for traveling during traffic hours?
Hi,
I think the best mode of transport is metro. It is fast and comfortable.
You can also take a bus.
You can also take a taxi.
You can also take a auto.
You
Best Answer:
The Delhi Metro is a reliable option for traveling during rush hours, as it's not affected by road traffic. Auto-rickshaws and taxis might get stuck in traffic.



In [16]:
trainer = transformers.Trainer(
    model=model,
    train_dataset=train_dataset,
    # eval_dataset=val_dataset,
    args=transformers.TrainingArguments(
        num_train_epochs=40,
        per_device_train_batch_size=16,
        gradient_accumulation_steps=4,
        warmup_ratio=0.05,
        # max_steps=100,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        output_dir="outputs-40b",
        optim="paged_adamw_8bit",
        lr_scheduler_type='cosine',
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()

  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
You're using a PreTrainedTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
1,2.0662
2,2.0586
3,2.0819
4,2.0966
5,2.0398
6,1.9175
7,2.0733
8,1.9915
9,1.9727
10,1.8673


TrainOutput(global_step=120, training_loss=0.6290167532861233, metrics={'train_runtime': 2643.6812, 'train_samples_per_second': 2.678, 'train_steps_per_second': 0.045, 'total_flos': 6.760415470288896e+16, 'train_loss': 0.6290167532861233, 'epoch': 40.0})

In [17]:
model.save_pretrained("./outputs-40b")

In [18]:

generate(index)

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Question:
Which mode of transport is advisable for traveling during traffic hours?




Answer:
Which mode of transport is advisable for traveling during traffic hours?
When traveling during traffic hours, it's advisable to opt for the Delhi Metro. It is a reliable and comfortable choice as it operates independently of road traffic. Auto-rickshaws and taxis may experience delays due to congestion.
Best Answer:
The Delhi Metro is a reliable option for traveling during rush hours, as it's not affected by road traffic. Auto-rickshaws and taxis might get stuck in traffic.

