# Install

In [1]:
# https://stackoverflow.com/questions/56081324/why-are-google-colab-shell-commands-not-working
import locale
def getpreferredencoding(do_setlocale = True):
    return "UTF-8"
locale.getpreferredencoding = getpreferredencoding

In [3]:
!pip install -q -U bitsandbytes
!pip install -q -U git+https://github.com/huggingface/transformers.git
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q -U git+https://github.com/huggingface/accelerate.git
!pip install -q -U einops
!pip install -q -U safetensors
!pip install -q -U torch
!pip install -q -U xformers
!pip install -q -U datasets
!pip install -q -U scipy
!pip install -q -U jupyter ipywidgets

# Import

In [5]:
import tqdm as notebook_tqdm
from datasets import load_dataset
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline, AutoModel
import transformers

# Load Quantized Model

In [6]:
model_id = "vilsonrodrigues/falcon-7b-instruct-sharded" # sharded model by vilsonrodrigues
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={"":0}, trust_remote_code=True)

Loading checkpoint shards:   0%|          | 0/15 [00:00<?, ?it/s]

Some weights of FalconForCausalLM were not initialized from the model checkpoint at vilsonrodrigues/falcon-7b-instruct-sharded and are newly initialized: ['lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
from peft import prepare_model_for_kbit_training

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [6]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [7]:
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["query_key_value"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

trainable params: 4718592 || all params: 3613463424 || trainable%: 0.13058363808693696


# Prepare Data

In [8]:
data = load_dataset("json", data_files="../data/train.json")
data

Found cached dataset json (/home/elsatch/.cache/huggingface/datasets/json/default-be29fcba34db1804/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96)
100%|██████████| 1/1 [00:00<00:00, 238.83it/s]


DatasetDict({
    train: Dataset({
        features: ['prompt', 'completion'],
        num_rows: 252
    })
})

In [9]:
tokenizer.pad_token = tokenizer.eos_token

# prompt_template = "### Instruction: {prompt}\n### Response:"

train_dataset = data['train'].shuffle().map(lambda x: {"input_text": x['prompt']  + "\n" + x['completion']})

# Tokenize the datasets
train_encodings = tokenizer(train_dataset['input_text'], truncation=True, padding=True, max_length=256, return_tensors='pt')

                                                   

In [10]:
data['train'][62]

{'prompt': 'Verify if the country column consists of values USA, Canada and Mexico',
 'completion': "expect_column_values_to_be_in_set(column='country', value_set=['USA', 'Canada', 'Mexico'])"}

In [11]:
class TextDataset(Dataset):
    def __init__(self, encodings):
        self.encodings = encodings

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item["labels"] = item["input_ids"].clone()
        return item

    def __len__(self):
        return len(self.encodings["input_ids"])


In [12]:
# Convert the encodings to PyTorch datasets
train_dataset = TextDataset(train_encodings)

# Example Before Fine Tuning

In [13]:
def generate(index):

  example_text = data['train'][index]['prompt']
  correct_answer = data['train'][index]['completion']

  print("Question:")
  print(example_text)

  encoding = tokenizer(example_text, return_tensors="pt").to("cuda:0")
  output = model.generate(input_ids=encoding.input_ids, attention_mask=encoding.attention_mask, max_new_tokens=100, do_sample=True, temperature=0.000001, eos_token_id=tokenizer.eos_token_id, top_k = 0)

  print("Answer:")
  print(tokenizer.decode(output[0], skip_special_tokens=True))

  print("Best Answer:")
  print(correct_answer)

  print()

In [14]:
%%time
generate(0)

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Question:
Ensure that MIT University graduate proportion of unique values is within 90% and 100%.
Answer:
Ensure that MIT University graduate proportion of unique values is within 90% and 100%.
The proportion of unique values in the MIT University graduate population is 90%.
Best Answer:
expect_column_proportion_of_unique_values_to_be_between(column='graduate', min_value=0.9, max_value=1.0, condition_parser='pandas',  row_condition='University==MIT')

CPU times: user 1.28 s, sys: 225 ms, total: 1.51 s
Wall time: 1.66 s


# Training

In [None]:
trainer = transformers.Trainer(
    model=model,
    train_dataset=train_dataset,
    # eval_dataset=val_dataset,
    args=transformers.TrainingArguments(
        num_train_epochs=25,
        per_device_train_batch_size=16,
        gradient_accumulation_steps=4,
        warmup_ratio=0.05,
        # max_steps=100,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        output_dir="outputs",
        optim="paged_adamw_8bit",
        lr_scheduler_type='cosine',
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()

  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
You're using a PreTrainedTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  1%|          | 1/100 [00:23<38:51, 23.55s/it]

{'loss': 2.4287, 'learning_rate': 4e-05, 'epoch': 0.25}


  2%|▏         | 2/100 [00:47<38:22, 23.49s/it]

{'loss': 2.4137, 'learning_rate': 8e-05, 'epoch': 0.5}


  3%|▎         | 3/100 [01:10<38:19, 23.70s/it]

{'loss': 2.5372, 'learning_rate': 0.00012, 'epoch': 0.75}


  4%|▍         | 4/100 [01:33<37:07, 23.20s/it]

{'loss': 2.4222, 'learning_rate': 0.00016, 'epoch': 1.0}


  5%|▌         | 5/100 [01:57<36:59, 23.37s/it]

{'loss': 2.411, 'learning_rate': 0.0002, 'epoch': 1.25}


  6%|▌         | 6/100 [02:20<36:52, 23.53s/it]

{'loss': 2.377, 'learning_rate': 0.00019994532573409262, 'epoch': 1.5}


  7%|▋         | 7/100 [02:44<36:43, 23.70s/it]

{'loss': 2.3131, 'learning_rate': 0.00019978136272187747, 'epoch': 1.75}


  8%|▊         | 8/100 [03:07<35:52, 23.40s/it]

{'loss': 2.2442, 'learning_rate': 0.00019950829025450114, 'epoch': 2.0}


  9%|▉         | 9/100 [03:31<35:44, 23.57s/it]

{'loss': 2.0984, 'learning_rate': 0.00019912640693269752, 'epoch': 2.25}


 10%|█         | 10/100 [03:55<35:27, 23.64s/it]

{'loss': 2.1169, 'learning_rate': 0.00019863613034027224, 'epoch': 2.5}


 11%|█         | 11/100 [04:18<34:42, 23.40s/it]

{'loss': 2.0074, 'learning_rate': 0.00019803799658748094, 'epoch': 2.75}


 12%|█▏        | 12/100 [04:40<33:55, 23.13s/it]

{'loss': 1.8132, 'learning_rate': 0.0001973326597248006, 'epoch': 3.0}


 13%|█▎        | 13/100 [05:04<33:48, 23.32s/it]

{'loss': 1.771, 'learning_rate': 0.00019652089102773488, 'epoch': 3.25}


 14%|█▍        | 14/100 [05:28<33:37, 23.46s/it]

{'loss': 1.7453, 'learning_rate': 0.00019560357815343577, 'epoch': 3.5}


 15%|█▌        | 15/100 [05:52<33:20, 23.54s/it]

{'loss': 1.549, 'learning_rate': 0.00019458172417006347, 'epoch': 3.75}


 16%|█▌        | 16/100 [06:14<32:30, 23.22s/it]

{'loss': 1.4838, 'learning_rate': 0.0001934564464599461, 'epoch': 4.0}


 17%|█▋        | 17/100 [06:38<32:25, 23.44s/it]

{'loss': 1.4643, 'learning_rate': 0.00019222897549773848, 'epoch': 4.25}


 18%|█▊        | 18/100 [07:02<32:11, 23.56s/it]

{'loss': 1.4205, 'learning_rate': 0.00019090065350491626, 'epoch': 4.5}


 19%|█▉        | 19/100 [07:26<31:54, 23.63s/it]

{'loss': 1.3318, 'learning_rate': 0.00018947293298207635, 'epoch': 4.75}


 20%|██        | 20/100 [07:48<31:08, 23.36s/it]

{'loss': 1.323, 'learning_rate': 0.0001879473751206489, 'epoch': 5.0}


 21%|██        | 21/100 [08:12<30:57, 23.51s/it]

{'loss': 1.2654, 'learning_rate': 0.00018632564809575742, 'epoch': 5.25}


 22%|██▏       | 22/100 [08:36<30:35, 23.53s/it]

{'loss': 1.222, 'learning_rate': 0.00018460952524209355, 'epoch': 5.5}


 23%|██▎       | 23/100 [08:59<30:15, 23.58s/it]

{'loss': 1.0757, 'learning_rate': 0.00018280088311480201, 'epoch': 5.75}


 24%|██▍       | 24/100 [09:22<29:25, 23.23s/it]

{'loss': 1.1068, 'learning_rate': 0.00018090169943749476, 'epoch': 6.0}


 25%|██▌       | 25/100 [09:46<29:14, 23.39s/it]

{'loss': 1.0431, 'learning_rate': 0.00017891405093963938, 'epoch': 6.25}


 26%|██▌       | 26/100 [10:09<28:56, 23.46s/it]

{'loss': 0.9331, 'learning_rate': 0.00017684011108568592, 'epoch': 6.5}


 27%|██▋       | 27/100 [10:33<28:35, 23.50s/it]

{'loss': 0.9251, 'learning_rate': 0.0001746821476984154, 'epoch': 6.75}


 28%|██▊       | 28/100 [10:55<27:42, 23.09s/it]

{'loss': 0.9159, 'learning_rate': 0.00017244252047910892, 'epoch': 7.0}


 29%|██▉       | 29/100 [11:17<26:51, 22.70s/it]

{'loss': 0.8648, 'learning_rate': 0.00017012367842724887, 'epoch': 7.25}


 30%|███       | 30/100 [11:37<25:30, 21.87s/it]

{'loss': 0.8179, 'learning_rate': 0.00016772815716257412, 'epoch': 7.5}


 31%|███       | 31/100 [11:57<24:30, 21.31s/it]

{'loss': 0.7792, 'learning_rate': 0.00016525857615241687, 'epoch': 7.75}


 32%|███▏      | 32/100 [12:16<23:21, 20.61s/it]

{'loss': 0.7926, 'learning_rate': 0.0001627176358473537, 'epoch': 8.0}


 33%|███▎      | 33/100 [12:36<22:48, 20.43s/it]

{'loss': 0.793, 'learning_rate': 0.00016010811472830252, 'epoch': 8.25}


 34%|███▍      | 34/100 [12:56<22:19, 20.30s/it]

{'loss': 0.7615, 'learning_rate': 0.00015743286626829437, 'epoch': 8.5}


 35%|███▌      | 35/100 [13:16<21:53, 20.21s/it]

{'loss': 0.6181, 'learning_rate': 0.00015469481581224272, 'epoch': 8.75}


 36%|███▌      | 36/100 [13:35<21:09, 19.84s/it]

{'loss': 0.7163, 'learning_rate': 0.00015189695737812152, 'epoch': 9.0}


 37%|███▋      | 37/100 [13:55<20:52, 19.89s/it]

{'loss': 0.6399, 'learning_rate': 0.00014904235038305083, 'epoch': 9.25}


 38%|███▊      | 38/100 [14:15<20:35, 19.92s/it]

{'loss': 0.6515, 'learning_rate': 0.0001461341162978688, 'epoch': 9.5}


 39%|███▉      | 39/100 [14:35<20:16, 19.95s/it]

{'loss': 0.636, 'learning_rate': 0.00014317543523384928, 'epoch': 9.75}


 40%|████      | 40/100 [14:54<19:38, 19.65s/it]

{'loss': 0.6415, 'learning_rate': 0.00014016954246529696, 'epoch': 10.0}


 41%|████      | 41/100 [15:14<19:25, 19.76s/it]

{'loss': 0.552, 'learning_rate': 0.00013711972489182208, 'epoch': 10.25}


 42%|████▏     | 42/100 [15:34<19:10, 19.83s/it]

{'loss': 0.6299, 'learning_rate': 0.00013402931744416433, 'epoch': 10.5}


 43%|████▎     | 43/100 [15:54<18:53, 19.89s/it]

{'loss': 0.6034, 'learning_rate': 0.00013090169943749476, 'epoch': 10.75}


 44%|████▍     | 44/100 [16:13<18:17, 19.61s/it]

{'loss': 0.5542, 'learning_rate': 0.00012774029087618446, 'epoch': 11.0}


 45%|████▌     | 45/100 [16:33<18:05, 19.73s/it]

{'loss': 0.5739, 'learning_rate': 0.00012454854871407994, 'epoch': 11.25}


 46%|████▌     | 46/100 [16:53<17:49, 19.81s/it]

{'loss': 0.4888, 'learning_rate': 0.0001213299630743747, 'epoch': 11.5}


 47%|████▋     | 47/100 [17:13<17:33, 19.87s/it]

{'loss': 0.5352, 'learning_rate': 0.000118088053433211, 'epoch': 11.75}


 48%|████▊     | 48/100 [17:32<16:58, 19.60s/it]

{'loss': 0.5229, 'learning_rate': 0.0001148263647711842, 'epoch': 12.0}


 49%|████▉     | 49/100 [17:52<16:45, 19.72s/it]

{'loss': 0.4731, 'learning_rate': 0.00011154846369695863, 'epoch': 12.25}


 50%|█████     | 50/100 [18:12<16:30, 19.81s/it]

{'loss': 0.4999, 'learning_rate': 0.00010825793454723325, 'epoch': 12.5}


 51%|█████     | 51/100 [18:32<16:13, 19.87s/it]

{'loss': 0.4795, 'learning_rate': 0.00010495837546732224, 'epoch': 12.75}


 52%|█████▏    | 52/100 [18:51<15:40, 19.59s/it]

{'loss': 0.4864, 'learning_rate': 0.00010165339447663587, 'epoch': 13.0}


 53%|█████▎    | 53/100 [19:11<15:26, 19.72s/it]

{'loss': 0.4706, 'learning_rate': 9.834660552336415e-05, 'epoch': 13.25}


 54%|█████▍    | 54/100 [19:31<15:11, 19.81s/it]

{'loss': 0.4465, 'learning_rate': 9.504162453267777e-05, 'epoch': 13.5}


 55%|█████▌    | 55/100 [19:51<14:53, 19.87s/it]

{'loss': 0.428, 'learning_rate': 9.174206545276677e-05, 'epoch': 13.75}


 56%|█████▌    | 56/100 [20:10<14:22, 19.59s/it]

{'loss': 0.4517, 'learning_rate': 8.845153630304139e-05, 'epoch': 14.0}


 57%|█████▋    | 57/100 [20:30<14:07, 19.72s/it]

{'loss': 0.4093, 'learning_rate': 8.517363522881579e-05, 'epoch': 14.25}


 58%|█████▊    | 58/100 [20:50<13:51, 19.80s/it]

{'loss': 0.398, 'learning_rate': 8.191194656678904e-05, 'epoch': 14.5}


 59%|█████▉    | 59/100 [21:10<13:34, 19.87s/it]

{'loss': 0.4334, 'learning_rate': 7.867003692562534e-05, 'epoch': 14.75}


 60%|██████    | 60/100 [21:29<13:03, 19.59s/it]

{'loss': 0.4248, 'learning_rate': 7.54514512859201e-05, 'epoch': 15.0}


 61%|██████    | 61/100 [21:49<12:48, 19.72s/it]

{'loss': 0.3775, 'learning_rate': 7.225970912381556e-05, 'epoch': 15.25}


 62%|██████▏   | 62/100 [22:09<12:32, 19.81s/it]

{'loss': 0.3891, 'learning_rate': 6.909830056250527e-05, 'epoch': 15.5}


 63%|██████▎   | 63/100 [22:29<12:15, 19.87s/it]

{'loss': 0.3734, 'learning_rate': 6.59706825558357e-05, 'epoch': 15.75}


 64%|██████▍   | 64/100 [22:48<11:45, 19.59s/it]

{'loss': 0.4115, 'learning_rate': 6.28802751081779e-05, 'epoch': 16.0}


 65%|██████▌   | 65/100 [23:08<11:30, 19.72s/it]

{'loss': 0.3839, 'learning_rate': 5.983045753470308e-05, 'epoch': 16.25}


 66%|██████▌   | 66/100 [23:28<11:13, 19.80s/it]

{'loss': 0.3759, 'learning_rate': 5.6824564766150726e-05, 'epoch': 16.5}


 67%|██████▋   | 67/100 [23:48<10:55, 19.87s/it]

{'loss': 0.3345, 'learning_rate': 5.386588370213124e-05, 'epoch': 16.75}


 68%|██████▊   | 68/100 [24:07<10:26, 19.59s/it]

{'loss': 0.3635, 'learning_rate': 5.095764961694922e-05, 'epoch': 17.0}


 69%|██████▉   | 69/100 [24:27<10:11, 19.72s/it]

{'loss': 0.3418, 'learning_rate': 4.810304262187852e-05, 'epoch': 17.25}


 70%|███████   | 70/100 [24:47<09:54, 19.80s/it]

{'loss': 0.3149, 'learning_rate': 4.530518418775733e-05, 'epoch': 17.5}


 71%|███████   | 71/100 [25:07<09:36, 19.87s/it]

{'loss': 0.3657, 'learning_rate': 4.256713373170564e-05, 'epoch': 17.75}


 72%|███████▏  | 72/100 [25:26<09:08, 19.59s/it]

{'loss': 0.3471, 'learning_rate': 3.9891885271697496e-05, 'epoch': 18.0}


 73%|███████▎  | 73/100 [25:46<08:52, 19.72s/it]

{'loss': 0.3577, 'learning_rate': 3.7282364152646297e-05, 'epoch': 18.25}


 74%|███████▍  | 74/100 [26:06<08:34, 19.81s/it]

{'loss': 0.2933, 'learning_rate': 3.4741423847583134e-05, 'epoch': 18.5}


 75%|███████▌  | 75/100 [26:26<08:16, 19.87s/it]

{'loss': 0.3235, 'learning_rate': 3.227184283742591e-05, 'epoch': 18.75}


 76%|███████▌  | 76/100 [26:45<07:50, 19.59s/it]

{'loss': 0.3109, 'learning_rate': 2.9876321572751144e-05, 'epoch': 19.0}


 77%|███████▋  | 77/100 [27:05<07:33, 19.72s/it]

{'loss': 0.337, 'learning_rate': 2.7557479520891104e-05, 'epoch': 19.25}


 78%|███████▊  | 78/100 [27:25<07:15, 19.80s/it]

{'loss': 0.2684, 'learning_rate': 2.5317852301584643e-05, 'epoch': 19.5}


 79%|███████▉  | 79/100 [27:45<06:57, 19.86s/it]

{'loss': 0.3231, 'learning_rate': 2.315988891431412e-05, 'epoch': 19.75}


 80%|████████  | 80/100 [28:04<06:31, 19.59s/it]

{'loss': 0.3063, 'learning_rate': 2.1085949060360654e-05, 'epoch': 20.0}


 81%|████████  | 81/100 [28:24<06:14, 19.72s/it]

{'loss': 0.3174, 'learning_rate': 1.9098300562505266e-05, 'epoch': 20.25}


 82%|████████▏ | 82/100 [28:44<05:56, 19.80s/it]

{'loss': 0.3035, 'learning_rate': 1.7199116885197995e-05, 'epoch': 20.5}


 83%|████████▎ | 83/100 [29:04<05:37, 19.87s/it]

{'loss': 0.2919, 'learning_rate': 1.5390474757906446e-05, 'epoch': 20.75}


 84%|████████▍ | 84/100 [29:22<05:13, 19.59s/it]

{'loss': 0.2932, 'learning_rate': 1.3674351904242611e-05, 'epoch': 21.0}


 85%|████████▌ | 85/100 [29:42<04:55, 19.72s/it]

{'loss': 0.2789, 'learning_rate': 1.2052624879351104e-05, 'epoch': 21.25}


 86%|████████▌ | 86/100 [30:02<04:37, 19.80s/it]

{'loss': 0.2953, 'learning_rate': 1.0527067017923654e-05, 'epoch': 21.5}


 87%|████████▋ | 87/100 [30:23<04:18, 19.87s/it]

{'loss': 0.2885, 'learning_rate': 9.09934649508375e-06, 'epoch': 21.75}


 88%|████████▊ | 88/100 [30:41<03:55, 19.59s/it]

{'loss': 0.2992, 'learning_rate': 7.771024502261526e-06, 'epoch': 22.0}


 89%|████████▉ | 89/100 [31:01<03:36, 19.72s/it]

{'loss': 0.2644, 'learning_rate': 6.543553540053926e-06, 'epoch': 22.25}


 90%|█████████ | 90/100 [31:21<03:18, 19.80s/it]

{'loss': 0.2935, 'learning_rate': 5.418275829936537e-06, 'epoch': 22.5}


 91%|█████████ | 91/100 [31:41<02:58, 19.87s/it]

{'loss': 0.3022, 'learning_rate': 4.3964218465642355e-06, 'epoch': 22.75}


 92%|█████████▏| 92/100 [32:00<02:36, 19.59s/it]

{'loss': 0.2829, 'learning_rate': 3.4791089722651436e-06, 'epoch': 23.0}


 93%|█████████▎| 93/100 [32:20<02:18, 19.72s/it]

{'loss': 0.2958, 'learning_rate': 2.667340275199426e-06, 'epoch': 23.25}


 94%|█████████▍| 94/100 [32:40<01:58, 19.80s/it]

{'loss': 0.2751, 'learning_rate': 1.9620034125190644e-06, 'epoch': 23.5}


 95%|█████████▌| 95/100 [33:00<01:39, 19.87s/it]

{'loss': 0.2838, 'learning_rate': 1.3638696597277679e-06, 'epoch': 23.75}


 96%|█████████▌| 96/100 [33:19<01:18, 19.59s/it]

{'loss': 0.2745, 'learning_rate': 8.735930673024806e-07, 'epoch': 24.0}


 97%|█████████▋| 97/100 [33:39<00:59, 19.72s/it]

{'loss': 0.2862, 'learning_rate': 4.917097454988584e-07, 'epoch': 24.25}


 98%|█████████▊| 98/100 [33:59<00:39, 19.80s/it]

{'loss': 0.2802, 'learning_rate': 2.1863727812254653e-07, 'epoch': 24.5}


 99%|█████████▉| 99/100 [34:19<00:19, 19.87s/it]

{'loss': 0.2775, 'learning_rate': 5.467426590739511e-08, 'epoch': 24.75}


100%|██████████| 100/100 [34:38<00:00, 20.79s/it]

{'loss': 0.2867, 'learning_rate': 0.0, 'epoch': 25.0}
{'train_runtime': 2078.9108, 'train_samples_per_second': 3.03, 'train_steps_per_second': 0.048, 'train_loss': 0.7900266298651695, 'epoch': 25.0}





TrainOutput(global_step=100, training_loss=0.7900266298651695, metrics={'train_runtime': 2078.9108, 'train_samples_per_second': 3.03, 'train_steps_per_second': 0.048, 'train_loss': 0.7900266298651695, 'epoch': 25.0})

# Example After Fine Tuning

In [21]:
model.config.use_cache = True
model.eval()

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): FalconForCausalLM(
      (transformer): FalconModel(
        (word_embeddings): Embedding(65024, 4544)
        (h): ModuleList(
          (0-31): 32 x FalconDecoderLayer(
            (self_attention): FalconAttention(
              (maybe_rotary): FalconRotaryEmbedding()
              (query_key_value): Linear4bit(
                in_features=4544, out_features=4672, bias=False
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4544, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=4672, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (dense): Linear4bit(in_

In [1]:
#from huggingface_hub import notebook_login
#notebook_login()

In [None]:
#model.push_to_hub("DioulaD/falcon-7b-instruct-qlora-ge-dq-v2", use_auth_token=True)

In [None]:
#tokenizer.push_to_hub("DioulaD/falcon-7b-instruct-qlora-ge-dq-v2")

In [None]:
# model.config.push_to_hub("DioulaD/falcon-7b-instruct-qlora-ge-dq-v2")

In [23]:
pt_save_directory = "./content/falcon-7b-instruct-qlora-ge-dq-v2"
tokenizer.save_pretrained(pt_save_directory)
model.save_pretrained(pt_save_directory, from_pt=True)

In [24]:
trainer.save_model(",/content/falcon-7b-instruct-qlora-ge-dq-v2")

# Inference

In [15]:
from peft import PeftModel, PeftConfig

In [17]:
# PEFT_MODEL = "DioulaD/falcon-7b-instruct-qlora-ge-dq-v2"

PEFT_MODEL = './content/falcon-7b-instruct-qlora-ge-dq-v2'

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)
config = PeftConfig.from_pretrained(PEFT_MODEL)

model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token

model = PeftModel.from_pretrained(model, PEFT_MODEL)
prompt = "Verify that for rows where the category is Electronics, the userType column vaues are in the set: member, guest"

encoding = tokenizer(prompt, return_tensors="pt").to("cuda:0")

with torch.inference_mode():
    out = model.generate(
        input_ids=encoding.input_ids,
        attention_mask=encoding.attention_mask,
        max_new_tokens=100, do_sample=True, temperature=0.3,
        eos_token_id=tokenizer.eos_token_id,
        top_k=0
    )

response = tokenizer.decode(out[0], skip_special_tokens=True)
print(response)


Loading checkpoint shards: 100%|██████████| 15/15 [00:04<00:00,  3.19it/s]
Some weights of FalconForCausalLM were not initialized from the model checkpoint at vilsonrodrigues/falcon-7b-instruct-sharded and are newly initialized: ['lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Verify that for rows where the category is Electronics, the userType column vaues are in the set: member, guest, and admin.
expect_column_values_to_be_in_set(column='userType', value_set=['member', 'guest', 'admin'], condition_parser='pandas', row_condition='category==Electronics')
