### Imports

In [1]:
import os
import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import prepare_model_for_kbit_training
from peft import LoraConfig, get_peft_model
from random import randint

### Loading falcon-40b instruct with quantization

In [3]:
model_id = "tiiuae/falcon-40b-instruct"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained(model_id)

# Falcon requires you to allow remote code execution. This is because the model uses a new architecture that is not part of transformers yet.
# The code is provided by the model authors in the repo.
model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True, quantization_config=bnb_config, device_map="auto")

Loading checkpoint shards:   0%|          | 0/9 [00:00<?, ?it/s]

In [4]:
# Set the Falcon tokenizer
tokenizer.pad_token = tokenizer.eos_token

In [None]:
# %pip install -q -U git+https://github.com/huggingface/peft.git@e2b8e3260d3eeb736edf21a2424e89fe3ecf429d

### Converting to peft and applying low rank adaptation

In [5]:
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [6]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [8]:


config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=[
        "query_key_value",
        "dense",
        "dense_h_to_4h",
        "dense_4h_to_h",
        ],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

trainable params: 55541760 || all params: 20974518272 || trainable%: 0.2648058910327664


### Loading HF dataset

In [28]:
from datasets import load_dataset

# Load dataset from the hub
dataset = load_dataset("shrinath-suresh/so_5k_with_short_answer_cleaned")
dataset = dataset["train"].train_test_split(test_size=0.1)


Found cached dataset json (/home/ubuntu/.cache/huggingface/datasets/shrinath-suresh___json/shrinath-suresh--so_5k_with_short_answer_cleaned-5b2593d73c747107/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4)


  0%|          | 0/1 [00:00<?, ?it/s]

In [29]:
print(f"Train dataset size: {len(dataset['train'])}")
print(f"Test dataset size: {len(dataset['test'])}")

Train dataset size: 4178
Test dataset size: 465


### Setting Prompt

In [30]:
# custom instruct prompt start
prompt_template = f"As a pytorch expert engineer answer the question based on context. Question: \n{{question}}\n Context: \n{{context}}\n---\Answer:\n{{answer}}{{eos_token}}"

# template dataset to add prompt to each sample
def template_dataset(sample):
    sample["text"] = prompt_template.format(question=sample["instruction"],
                                            context=sample["input"],
                                            answer=sample["output"],
                                            eos_token=tokenizer.eos_token)
    return sample


# apply prompt template per sample
train_dataset = dataset["train"].map(template_dataset, remove_columns=list(dataset["train"].features))

print(train_dataset[randint(0, len(dataset))]["text"])

Map:   0%|          | 0/4178 [00:00<?, ? examples/s]

As a pytorch expert engineer answer the question based on context. Question: 
why there are different output between model.forward(input) and model(input)
I'm using pytorch to build a simple model like VGG16,and I have overloaded the function forward in my model.

I found everyone tends to use model(input) to get the output rather than model.forward(input), and I am interested in the difference between them. I try to input the same data, but the result is different. I'm confused.

I have output the layer_weight before I input data, the weight not be changed, and I know when we using model(input) it using __call__ function, and this function will call model.forward.

   vgg = VGG()
   vgg.double()
   for layer in vgg.modules():
      if isinstance(layer,torch.nn.Linear):
         print(layer.weight)
   print("   use model.forward(input)     ")
   result = vgg.forward(array)

   for layer in vgg.modules():
     if isinstance(layer,torch.nn.Linear):
       print(layer.weight) 
   print(" 

In [31]:
# apply prompt template per sample
test_dataset = dataset["test"].map(template_dataset, remove_columns=list(dataset["test"].features))

Map:   0%|          | 0/465 [00:00<?, ? examples/s]

In [32]:
 # tokenize and chunk dataset
lm_train_dataset = train_dataset.map(
    lambda sample: tokenizer(sample["text"]), batched=True, batch_size=2, remove_columns=list(train_dataset.features)
)


lm_test_dataset = test_dataset.map(
    lambda sample: tokenizer(sample["text"]), batched=True, remove_columns=list(test_dataset.features)
)

# Print total number of samples
print(f"Total number of train samples: {len(lm_train_dataset)}")

Map:   0%|          | 0/4178 [00:00<?, ? examples/s]

Map:   0%|          | 0/465 [00:00<?, ? examples/s]

Total number of train samples: 4178


### Setting training parameters

In [33]:
# We set num_train_epochs=1 simply to run a demonstration

trainer = transformers.Trainer(
    model=model,
    train_dataset=lm_train_dataset,
    eval_dataset=lm_test_dataset,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=2,
        per_device_eval_batch_size=2,
        logging_dir=os.getcwd(),
        logging_steps=2,
        num_train_epochs=1,
        learning_rate=2e-4,
        bf16=True,
        save_strategy = "epoch",
        output_dir="outputs",
        report_to="wandb",
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False  # silence the warnings. Please re-enable for inference!

### Start Training

In [None]:
# Start training
trainer.train()

Step,Training Loss
2,1.4888
4,1.5504
6,1.3359
8,1.6101
10,1.3266
12,1.2323
14,1.3092
16,1.2415
18,1.0571
20,1.0472


### Run Evaluation

In [39]:
trainer.evaluate()

{'eval_loss': 1.0922362804412842,
 'eval_runtime': 843.8434,
 'eval_samples_per_second': 0.551,
 'eval_steps_per_second': 0.276,
 'epoch': 1.0}

### Inference - Sample from test set

In [67]:
sample = dataset["test"][randint(0, len(test_dataset))]

In [68]:
sample

{'instruction': "pytorch tensorboard summarywriter.add_video() produces bad videos\nI’m trying to create a video by generating a sequence of 500 matplotlib plots, converting each to a numpy array, stacking them and then passing them to a SummaryWriter()'s add_video(). When I do this, the colorbar is converted from colored to black &amp; white, and only a small number (~3-4) of the matplotlib plots are repeated. I confirmed that my numpy arrays are correct by using them to recreate a matplotlib figure.\n\nMy input tensor has shape (B,C,T,H,W), dtype np.uint8, and values between [0, 255].\n\nMinimal working example below. To be clear, the code runs without any errors. My problem is that the resulting video is wrong.\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport torch\nfrom torch.utils.tensorboard import SummaryWriter\n\n\ntensorboard_writer = SummaryWriter()\nprint(tensorboard_writer.get_logdir())\n\n\ndef fig2data(fig):\n\n    # draw the renderer\n    fig.canvas.draw()\

In [69]:
test_prompt_template = f"As a pytorch expert engineer answer the question based on context. Question: \n{{question}}\n Context: \n{{context}}\n---\Answer:\n"


In [71]:
test_sample = test_prompt_template.format(question=sample["instruction"], context=sample["input"])


In [72]:
input_ids = tokenizer(test_sample, return_tensors="pt").input_ids

In [73]:
tokens_for_summary = 64
output_tokens = input_ids.shape[1] + tokens_for_summary

outputs = model.generate(inputs=input_ids, do_sample=True, max_length=output_tokens)
gen_text = tokenizer.batch_decode(outputs)[0]
print(gen_text)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


As a pytorch expert engineer answer the question based on context. Question: 
pytorch tensorboard summarywriter.add_video() produces bad videos
I’m trying to create a video by generating a sequence of 500 matplotlib plots, converting each to a numpy array, stacking them and then passing them to a SummaryWriter()'s add_video(). When I do this, the colorbar is converted from colored to black &amp; white, and only a small number (~3-4) of the matplotlib plots are repeated. I confirmed that my numpy arrays are correct by using them to recreate a matplotlib figure.

My input tensor has shape (B,C,T,H,W), dtype np.uint8, and values between [0, 255].

Minimal working example below. To be clear, the code runs without any errors. My problem is that the resulting video is wrong.

import matplotlib.pyplot as plt
import numpy as np
import torch
from torch.utils.tensorboard import SummaryWriter


tensorboard_writer = SummaryWriter()
print(tensorboard_writer.get_logdir())


def fig2data(fig):

    #

### Inference - Generic pytorch question

In [42]:
test_prompt_template = f"As a pytorch expert engineer answer the question. Question: \n{{question}}\n---\Answer:\n"

In [52]:
test_sample = test_prompt_template.format(question="")
test_sample


'As a pytorch expert engineer answer the question. Question: \nCode for creating custom dataset?\n---\\Answer:\n'

In [53]:
input_ids = tokenizer(test_sample, return_tensors="pt").input_ids

In [55]:
tokens_for_summary = 64
output_tokens = input_ids.shape[1] + tokens_for_summary

outputs = model.generate(inputs=input_ids, do_sample=True, max_length=output_tokens)
gen_text = tokenizer.batch_decode(outputs)[0]
print(gen_text)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


As a pytorch expert engineer answer the question. Question: 
Code for creating custom dataset?
---\Answer:
Do you have any specific use case that I can help you with? Depending on what you want to achieve, the answer can be different or more general.


Here's an example code that creates a simple MNIST custom dataset using PyTorch:

```python
import torchvision.datasets as datasets
import torchvision.transforms as transforms

image_batch_sz = 10   # Batch size
image_transform = transforms.Compose([
    transforms.Resize((28, 28)),
    transforms.ToTensor()
])
class MNISTCustomDataset(datasets.Dataset):
     def __init__(self):
         self.data = datasets.MNIST(root=f'./path/to/MNIST/dataset',
                                     train=True,
                                     transform=transforms.ToTensor(),
                                     download=True)
         self.transform = image_transform
         self.dataSetIndexToLabelId = {i: label for label, i in enumerate(self.data