# Fast Fine-tuning a Llama-2 7B model for python code generation using Unsloth

### Install the libraries

In [None]:
%%capture
!pip install "unsloth[colab] @ git+https://github.com/unslothai/unsloth.git"
!pip install flash-attn
!pip install transformers datasets

In [None]:
!pip install python-dotenv

Collecting python-dotenv
  Downloading python_dotenv-1.0.0-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.0


## Loading the libraries

In [None]:
from unsloth import FastLlamaModel
import torch
from trl import SFTTrainer
from transformers import TrainingArguments
from transformers.utils import logging
from peft import LoraConfig, PeftModel

from datasets import load_dataset
from random import randrange

We shall run `ldconfig /usr/lib64-nvidia` to try to fix it.


## Set model and training parameters

In [None]:
# General parameters

# The model that you want to train from unsloth
model_name = "unsloth/llama-2-7b"
# The instruction dataset to use
dataset_name = "iamtarun/python_code_instructions_18k_alpaca"
#dataset_name = "HuggingFaceH4/CodeAlpaca_20K"
# Dataset split
dataset_split= "train"
# Adapter name
adapter_name = "adapter-unsloth-llama-2-7b-py-coder"
# Huggingface repository
adapter_repo="edumunozsala/"+adapter_name
# Fine-tuned model name
new_model = "unsloth-llama-2-7B-python-coder"
hf_model_repo = "edumunozsala/"+new_model

# Model Parameters
max_seq_length = 2048
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

# LoRA Parameters
r = 16
target_modules = ["gate_proj", "up_proj", "down_proj"]
#target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj",],
lora_alpha = 16

# Training parameters
learning_rate = 2e-4
weight_decay = 0.01
#Evaluation
evaluation_strategy="no"
eval_steps= 50

# if training in epochs
#num_train_epochs=2
#save_strategy="epoch"

# if training in steps
max_steps = 1500
save_strategy="steps"
save_steps=500

logging_steps=100
warmup_steps = 10
warmup_ratio=0.01
batch_size = 4
gradient_accumulation_steps = 4
lr_scheduler_type = "linear"
optimizer = "adamw_8bit"
use_gradient_checkpointing = True
random_state = 42

## Load the model and generate the adapter

In [None]:
# Check if bfloat16 is supported
HAS_BFLOAT16 = torch.cuda.is_bf16_supported()
# Load the Llama-2 model
model, tokenizer = FastLlamaModel.from_pretrained(
    model_name = model_name,
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

==((====))==  Unsloth: Fast Llama patching release 2024.1
   \\   /|    GPU: Tesla V100-SXM2-16GB. Max memory: 15.773 GB
O^O/ \_/ \    CUDA capability = 7.0. Xformers = 0.0.22.post7. FA = False.
\        /    Pytorch version: 2.1.0+cu121. CUDA Toolkit = 12.1
 "-____-"     bfloat16 = FALSE. Platform = Linux



config.json:   0%|          | 0.00/677 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/3.59G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/183 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/843 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/438 [00:00<?, ?B/s]

In [None]:
# Adapt the model for QLoRA training
model = FastLlamaModel.get_peft_model(
    model,
    r = r,
    #target_modules = target_modules,
    target_modules = target_modules,
    lora_alpha = lora_alpha,
    lora_dropout = 0, # Currently only supports dropout = 0
    bias = "none",    # Currently only supports bias = "none"
    use_gradient_checkpointing = use_gradient_checkpointing,
    random_state = random_state,
    max_seq_length = max_seq_length,
)

Unsloth cannot patch Attention layers with our manual autograd engine since either LoRA adapters
are not enabled or a bias term (like in Qwen) is used.
Unsloth cannot patch O projection layer with our manual autograd engine since either LoRA adapters
are not enabled or a bias term (like in Qwen) is used.
Unsloth 2024.1 patched 32 layers with 0 QKV layers, 0 O layers and 32 MLP layers.


In [None]:
model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32000, 4096)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
              (k_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
              (v_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
              (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
              (rotary_emb): LlamaRotaryEmbedding()
            )
            (mlp): LlamaMLP(
              (gate_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=11008, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Identity()
                )
                (lora_A): ModuleDict(
                  (default):

## Load the dataset with the instruction set

In [None]:
# Load dataset from the hub
dataset = load_dataset(dataset_name, split=dataset_split)
# Show dataset size
print(f"dataset size: {len(dataset)}")
# Show an example
print(dataset[randrange(len(dataset))])


Downloading readme:   0%|          | 0.00/905 [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/18612 [00:00<?, ? examples/s]

dataset size: 18612
{'instruction': 'De-duplicate the given list using a dictionary in Python?', 'input': '[10, 20, 30, 10, 40, 20]', 'output': 'input_list = [10, 20, 30, 10, 40, 20]\nunique_list = list(dict.fromkeys(input_list))\nprint(unique_list)', 'prompt': 'Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nDe-duplicate the given list using a dictionary in Python?\n\n### Input:\n[10, 20, 30, 10, 40, 20]\n\n### Output:\ninput_list = [10, 20, 30, 10, 40, 20]\nunique_list = list(dict.fromkeys(input_list))\nprint(unique_list)'}


In [None]:
# Check the dataset structure
dataset

Dataset({
    features: ['instruction', 'input', 'output', 'prompt'],
    num_rows: 18612
})

In [None]:
# Show a random example
print(dataset[randrange(len(dataset))])

{'instruction': 'Write a Python program that takes a list of numbers and creates a new list of even numbers only.', 'input': 'lst = [2, 3, 5, 6, 7, 8, 10, 11]', 'output': 'def get_even_numbers(lst):\n    even_lst = []\n    for num in lst:\n        if num % 2 == 0:\n            even_lst.append(num)\n    return even_lst', 'prompt': 'Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nWrite a Python program that takes a list of numbers and creates a new list of even numbers only.\n\n### Input:\nlst = [2, 3, 5, 6, 7, 8, 10, 11]\n\n### Output:\ndef get_even_numbers(lst):\n    even_lst = []\n    for num in lst:\n        if num % 2 == 0:\n            even_lst.append(num)\n    return even_lst'}


To fine-tune our model, we need to convert our structured examples into a collection of tasks described via instructions. We define a formatting_function that takes a sample and returns a string with our instruction format.

In [None]:
# Create the formating prompt
instruction_prompt = """### Instruction:
Use the Task below and the Input given to write the Response, which is a programming code that can solve the following Task:

### Task:
{}

### Input:
{}

### Response:
{}
"""

In [None]:
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        text = instruction_prompt.format(instruction, input, output)
        texts.append(text)
    return { "text" : texts, }

In [None]:
# Show a formatted instruction
print(formatting_prompts_func(dataset[randrange(len(dataset))]))


{'text': ['### Instruction:\nUse the Task below and the Input given to write the Response, which is a programming code that can solve the following Task:\n\n### Task:\nW\n\n### Input:\nl\n\n### Response:\nd\n', '### Instruction:\nUse the Task below and the Input given to write the Response, which is a programming code that can solve the following Task:\n\n### Task:\nr\n\n### Input:\ns\n\n### Response:\ne\n', '### Instruction:\nUse the Task below and the Input given to write the Response, which is a programming code that can solve the following Task:\n\n### Task:\ni\n\n### Input:\nt\n\n### Response:\nf\n', '### Instruction:\nUse the Task below and the Input given to write the Response, which is a programming code that can solve the following Task:\n\n### Task:\nt\n\n### Input:\n \n\n### Response:\n \n', '### Instruction:\nUse the Task below and the Input given to write the Response, which is a programming code that can solve the following Task:\n\n### Task:\ne\n\n### Input:\n=\n\n### Re

In [None]:
# Transforme the dataset into a instruction
dataset = dataset.map(formatting_prompts_func, batched = True,)

Map:   0%|          | 0/18612 [00:00<?, ? examples/s]

In [None]:
dataset

Dataset({
    features: ['instruction', 'input', 'output', 'prompt', 'text'],
    num_rows: 18612
})

## Create the Trainer

Set the training parameters

In [None]:
training_arguments = TrainingArguments(
        output_dir= adapter_name,
        evaluation_strategy=evaluation_strategy,
        eval_steps=eval_steps,
        per_device_train_batch_size=batch_size,
        gradient_accumulation_steps=gradient_accumulation_steps,
        #num_train_epochs=num_train_epochs, # Training in epochs
        max_steps= max_steps, # Training in steps
        warmup_steps= warmup_steps,
        warmup_ratio= warmup_ratio,
        learning_rate=learning_rate,
        optim= optimizer,
        save_strategy= save_strategy,
        save_steps=save_steps,
        logging_steps=logging_steps,
        fp16 = not HAS_BFLOAT16,
        bf16 = HAS_BFLOAT16,
        weight_decay = weight_decay,
        lr_scheduler_type = lr_scheduler_type,
        seed = random_state,
)

Define the Trainer

In [None]:
# Set the logging properties
logging.set_verbosity_info()

trainer = SFTTrainer(
    model = model,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    tokenizer = tokenizer,
    args = training_arguments,
)

Map:   0%|          | 0/18612 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs
Using auto half precision backend


In [None]:
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = Tesla V100-SXM2-16GB. Max memory = 15.773 GB.
3.949 GB of memory reserved.


## Run the training job

In [None]:
trainer_stats = trainer.train()

***** Running training *****
  Num examples = 18,612
  Num Epochs = 2
  Instantaneous batch size per device = 4
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 4
  Total optimization steps = 1,500
  Number of trainable parameters = 23,199,744
You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
Unsloth: `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`


Step,Training Loss
100,0.6282
200,0.5077
300,0.5113
400,0.4945
500,0.4976
600,0.507
700,0.5018
800,0.4964
900,0.4812
1000,0.4916


Saving model checkpoint to adapter-unsloth-llama-2-7b-py-coder/checkpoint-500
tokenizer config file saved in adapter-unsloth-llama-2-7b-py-coder/checkpoint-500/tokenizer_config.json
Special tokens file saved in adapter-unsloth-llama-2-7b-py-coder/checkpoint-500/special_tokens_map.json
Saving model checkpoint to adapter-unsloth-llama-2-7b-py-coder/checkpoint-1000
tokenizer config file saved in adapter-unsloth-llama-2-7b-py-coder/checkpoint-1000/tokenizer_config.json
Special tokens file saved in adapter-unsloth-llama-2-7b-py-coder/checkpoint-1000/special_tokens_map.json
Saving model checkpoint to adapter-unsloth-llama-2-7b-py-coder/checkpoint-1500
tokenizer config file saved in adapter-unsloth-llama-2-7b-py-coder/checkpoint-1500/tokenizer_config.json
Special tokens file saved in adapter-unsloth-llama-2-7b-py-coder/checkpoint-1500/special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)




In [None]:
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
!nvidia-smi

6822.212 seconds used for training.
113.7 minutes used for training.
Tue Jan  2 10:48:53 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla V100-SXM2-16GB           Off | 00000000:00:04.0 Off |                    0 |
| N/A   37C    P0              38W / 300W |  10092MiB / 16384MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+

## Copy the model and training output to Google Drive

Save the model locally

In [None]:
# save model in local
trainer.save_model()

Saving model checkpoint to adapter-unsloth-llama-2-7b-py-coder
tokenizer config file saved in adapter-unsloth-llama-2-7b-py-coder/tokenizer_config.json
Special tokens file saved in adapter-unsloth-llama-2-7b-py-coder/special_tokens_map.json


In [None]:
from google.colab import drive

drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
adapter_name, adapter_repo

('adapter-unsloth-llama-2-7b-py-coder',
 'edumunozsala/adapter-unsloth-llama-2-7b-py-coder')

In [None]:
!cp ./adapter-unsloth-llama-2-7b-py-coder/*.* /content/drive/MyDrive/Projects/unsloth-llama-2-7B-python-coder/adapter

In [None]:
!cp -r ./adapter-unsloth-llama-2-7b-py-coder/runs /content/drive/MyDrive/Projects/unsloth-llama-2-7B-python-coder/adapter

## Connect to Huggingface Hub to save the adapter/model

You can log in to Hugging Face Hub interactively

In [None]:
from huggingface_hub import notebook_login
# Log in to HF Hub
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

Or you can provide .env file containing the Hugging Face token

In [None]:
from huggingface_hub import login
from dotenv import load_dotenv
import os

# Load the enviroment variables
load_dotenv()
# Login to the Hugging Face Hub
login(token=os.getenv("HF_HUB_TOKEN"))

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [None]:
# push merged model to the hub
trainer.push_to_hub(adapter_repo)
tokenizer.push_to_hub(adapter_repo)

Saving model checkpoint to adapter-unsloth-llama-2-7b-py-coder
tokenizer config file saved in adapter-unsloth-llama-2-7b-py-coder/tokenizer_config.json
Special tokens file saved in adapter-unsloth-llama-2-7b-py-coder/special_tokens_map.json


Upload 4 LFS files:   0%|          | 0/4 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/92.8M [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/4.60k [00:00<?, ?B/s]

events.out.tfevents.1704185656.fb9072b15930.325.0:   0%|          | 0.00/7.42k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer config file saved in /tmp/tmp_0wz5kkb/tokenizer_config.json
Special tokens file saved in /tmp/tmp_0wz5kkb/special_tokens_map.json
Uploading the following files to edumunozsala/unsloth-llama-2-7B-python-coder: special_tokens_map.json,tokenizer.model,tokenizer_config.json,tokenizer.json


CommitInfo(commit_url='https://huggingface.co/edumunozsala/unsloth-llama-2-7B-python-coder/commit/b5d3495008715da01eabdf5a4ea6b83d2a3e59d2', commit_message='Upload tokenizer', commit_description='', oid='b5d3495008715da01eabdf5a4ea6b83d2a3e59d2', pr_url=None, pr_revision=None, pr_num=None)

## Merge the base model and the adapter and upload it to the Hub

This piece of code is an extraction of a notebook by Benjamine Marie. It's the naive approach for simplicity.


In [None]:
from peft import LoraConfig, PeftModel
from transformers import (
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    AutoTokenizer,
    TrainingArguments
)

In [None]:
model_name

'unsloth/llama-2-7b'

The next cell recreate the variables with the adapter and model names

In [None]:
adapter_name = "edumunozsala/adapter-unsloth-llama-2-7b-py-coder" #"kaitchup/Llama-2-7B-oasstguanaco-adapter"
local_path= "/content/drive/MyDrive/Projects/unsloth-llama-2-7B-python-coder/"
hf_model_repo = "edumunozsala/unsloth-llama-2-7B-python-coder"

In [None]:
model = AutoModelForCausalLM.from_pretrained(model_name)
model = PeftModel.from_pretrained(model, adapter_name)
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, add_eos_token=True, use_fast=True)
model = model.merge_and_unload()
#model.save_pretrained("./naive_merge/", safe_serialization=True)
#model.save_pretrained(os.path.join(local_path,"naive_merge"), safe_serialization=True)
# Save the model to the hub
# push merged model to the hub
model.push_to_hub(hf_model_repo)
tokenizer.push_to_hub(hf_model_repo)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/677 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/3.59G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/183 [00:00<?, ?B/s]

adapter_config.json:   0%|          | 0.00/589 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/92.8M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/843 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/438 [00:00<?, ?B/s]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model-00001-of-00006.safetensors:   0%|          | 0.00/4.84G [00:00<?, ?B/s]

model-00004-of-00006.safetensors:   0%|          | 0.00/4.86G [00:00<?, ?B/s]

model-00006-of-00006.safetensors:   0%|          | 0.00/2.68G [00:00<?, ?B/s]

model-00003-of-00006.safetensors:   0%|          | 0.00/4.86G [00:00<?, ?B/s]

model-00002-of-00006.safetensors:   0%|          | 0.00/4.86G [00:00<?, ?B/s]

Upload 6 LFS files:   0%|          | 0/6 [00:00<?, ?it/s]

model-00005-of-00006.safetensors:   0%|          | 0.00/4.86G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/edumunozsala/unsloth-llama-2-7B-python-coder/commit/8853f4baab0501f75a84c7d377f9b5164ec114fc', commit_message='Upload tokenizer', commit_description='', oid='8853f4baab0501f75a84c7d377f9b5164ec114fc', pr_url=None, pr_revision=None, pr_num=None)

## Load the model from the HF Hub and test it

In [None]:
del model
torch.cuda.empty_cache()
gc.collect()

248

Finally we download the created model from the hub and test it to make sure it works fine!

In [None]:
# Fine-tuned model name
new_model = "unsloth-llama-2-7B-python-coder"
# Huggingface repository
hf_model_repo="edumunozsala/"+new_model
print(hf_model_repo)

edumunozsala/unsloth-llama-2-7B-python-coder


In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer,LlamaForCausalLM

# Load the entire model on the GPU 0
device_map = {"": 0}

# Get the tokenizer
tokenizer = AutoTokenizer.from_pretrained(hf_model_repo)
# Load the model
model = AutoModelForCausalLM.from_pretrained(hf_model_repo, load_in_4bit=True, torch_dtype=torch.float16,
                                             device_map=device_map)
# Create an instruction
instruction="Write a Python function to display the first and last elements of a list."
input=""

prompt = f"""### Instruction:
Use the Task below and the Input given to write the Response, which is a programming code that can solve the Task.

### Task:
{instruction}

### Input:
{input}

### Response:
"""
# Tokenize the input
input_ids = tokenizer(prompt, return_tensors="pt", truncation=True).input_ids.cuda()
# Run the model to infere an output
outputs = model.generate(input_ids=input_ids, max_new_tokens=100, do_sample=True, top_p=0.9,temperature=0.5)

# Print the result
print(f"Prompt:\n{prompt}\n")
print(f"Generated instruction:\n{tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0][len(prompt):]}")


tokenizer_config.json:   0%|          | 0.00/843 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/552 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/671 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/6 [00:00<?, ?it/s]

model-00001-of-00006.safetensors:   0%|          | 0.00/4.84G [00:00<?, ?B/s]

model-00002-of-00006.safetensors:   0%|          | 0.00/4.86G [00:00<?, ?B/s]

model-00003-of-00006.safetensors:   0%|          | 0.00/4.86G [00:00<?, ?B/s]

model-00004-of-00006.safetensors:   0%|          | 0.00/4.86G [00:00<?, ?B/s]

model-00005-of-00006.safetensors:   0%|          | 0.00/4.86G [00:00<?, ?B/s]

model-00006-of-00006.safetensors:   0%|          | 0.00/2.68G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/183 [00:00<?, ?B/s]



Prompt:
### Instruction:
Use the Task below and the Input given to write the Response, which is a programming code that can solve the Task.

### Task:
Write a Python function to display the first and last elements of a list.

### Input:


### Response:


Generated instruction:
def first_last_elements(list):
    return (list[0], list[-1])

print(first_last_elements([1, 2, 3, 4, 5]))

# Output: (1, 5)

# Output: (2, 4)

# Output: (3, 3)

# Output: (4, 2)

# Output: (5, 1)
