In [None]:
!pip install -q  torch peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7 accelerate

import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline
)
from peft import LoraConfig
from trl import SFTTrainer

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.5/92.5 MB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m117.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.4/77.4 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m261.0/261.0 kB[0m [31m31.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m79.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.0/302.0 kB[0m [31m32.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m82.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━

In [None]:
# Dataset
data_name = "mlabonne/guanaco-llama2-1k"
training_data = load_dataset(data_name, split="train")

# Model and tokenizer names
base_model_name = "NousResearch/Llama-2-7b-chat-hf"
refined_model = "llama-2-7b-mlabonne-enhanced" #You can give it your own name

# Tokenizer
llama_tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
llama_tokenizer.pad_token = llama_tokenizer.eos_token
llama_tokenizer.padding_side = "right"  # Fix for fp16

# Quantization Config
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False
)

# Model
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    quantization_config=quant_config,
    device_map={"": 0}
)
base_model.config.use_cache = False
base_model.config.pretraining_tp = 1

Downloading readme:   0%|          | 0.00/1.02k [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/967k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/1000 [00:00<?, ? examples/s]

(…)at-hf/resolve/main/tokenizer_config.json:   0%|          | 0.00/746 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

(…)2-7b-chat-hf/resolve/main/tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

(…)b-chat-hf/resolve/main/added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

(…)-hf/resolve/main/special_tokens_map.json:   0%|          | 0.00/435 [00:00<?, ?B/s]

(…)ma-2-7b-chat-hf/resolve/main/config.json:   0%|          | 0.00/583 [00:00<?, ?B/s]

(…)esolve/main/model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

(…)t-hf/resolve/main/generation_config.json:   0%|          | 0.00/179 [00:00<?, ?B/s]

In [None]:
# LoRA Config
peft_parameters = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=8,
    bias="none",
    task_type="CAUSAL_LM"
)

# Training Params
train_params = TrainingArguments(
    output_dir="./results_modified",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=25,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="tensorboard"
)

# Trainer
fine_tuning = SFTTrainer(
    model=base_model,
    train_dataset=training_data,
    peft_config=peft_parameters,
    dataset_text_field="text",
    tokenizer=llama_tokenizer,
    args=train_params
)

# Training
fine_tuning.train()

# Save Model
fine_tuning.model.save_pretrained(refined_model)



Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
25,1.3455
50,1.6171
75,1.207
100,1.44
125,1.1736
150,1.3618
175,1.1686
200,1.4587
225,1.1517
250,1.5274




In [None]:
fine_tuning.model.config.to_json_file('./config.json')

In [None]:
# Generate Text
query = "How do I use the OpenAI API?"
# text_gen = pipeline(
#                     task="text-generation", model=refined_model,
#                     config='/content/config.json',
#                     tokenizer=llama_tokenizer, max_length=200)
text_gen = pipeline(
"text-generation",
model=fine_tuning.model,
tokenizer=llama_tokenizer,
torch_dtype=torch.bfloat16,
device_map="auto",
max_length=200
)

output = text_gen(f"<s>[INST] {query} [/INST]")
print(output[0]['generated_text'])

Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.
The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MusicgenForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'O

<s>[INST] How do I use the OpenAI API? [/INST] The OpenAI API is a RESTful API that allows you to interact with the OpenAI model. It is built on top of the OpenAI SDK, which provides a Python interface to the OpenAI model.

To use the OpenAI API, you will need to first install the OpenAI SDK. You can do this by running the following command:
```
pip install openai-sdk
```
Once you have installed the OpenAI SDK, you can use the OpenAI API by sending HTTP requests to the OpenAI API endpoint. The OpenAI API endpoint is located at `https://api.openai.org/`.

Here is an example of how to use the OpenAI API to perform a simple task:
```
import openai

# Set the API endpoint
api_endpoint = "https://api.openai.org"

# Set the


In [None]:
# Generate Text
query = "What is OpenAI and who made it?"
# text_gen = pipeline(
#                     task="text-generation", model=refined_model,
#                     config='/content/config.json',
#                     tokenizer=llama_tokenizer, max_length=200)
text_gen = pipeline(
"text-generation",
model=fine_tuning.model,
tokenizer=llama_tokenizer,
torch_dtype=torch.bfloat16,
device_map="auto",
max_length=200
)

output = text_gen(f"<s>[INST] {query} [/INST]")
print(output[0]['generated_text'])

The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MusicgenForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PLBartForCausalLM', 'ProphetNetForCausalLM', 'QDQBertLMHeadModel', 'ReformerModelWithLMHead', 'RemBertForCausal

<s>[INST] What is OpenAI and who made it? [/INST] OpenAI is an independent non-profit research organization that was founded in 2015 by Elon Musk, Sam Altman, and Greg Brockman. The organization is dedicated to developing and promoting artificial intelligence (AI) that is safe, transparent, and beneficial to humanity.

OpenAI is a research organization that focuses on developing and improving AI systems, including machine learning, natural language processing, and computer vision. The organization also works on developing new AI technologies, such as generative models and reinforcement learning algorithms.

OpenAI is funded by a number of organizations, including the Open Philanthropy Project, the Future of Life Institute, and the Machine Intelligence Research Institute. The organization also receives funding from a number of private donors and companies, including Google, Microsoft, and Facebook.

Overall,


In [None]:
query = "Which company was founded first, OpenAI or Google? Explain with specific years."
output = text_gen(f"<s>[INST] {query} [/INST]")
print(output[0]['generated_text'])

<s>[INST] Which company was founded first, OpenAI or Google? Explain with specific years. [/INST] Google was founded first in 1998, while OpenAI was founded in 2015.

Google was founded by Larry Page and Sergey Brin, while OpenAI was founded by Elon Musk, Sam Altman, and Greg Brockman.

Google is a multinational technology company that specializes in Internet-related services and products, including search, cloud computing, and advertising technologies. OpenAI is a non-profit artificial intelligence research organization that aims to develop and promote safe and beneficial AI.

Google has been around for over 20 years, while OpenAI has only been around for 6 years. However, OpenAI has made significant progress in the field of AI research and development, and has gained a lot of attention and support from the AI community.


In [None]:
query = "구글과 삼성 중에서 어느 회사가 먼저 설립되었지? 답변은 한국어로 번역해서 말해."
output = text_gen(f"<s>[INST] {query} [/INST]")
print(output[0]['generated_text'])



<s>[INST] 구글과 삼성 중에서 어느 회사가 먼저 설립되었지? 답변은 한국어로 번역해서 말해. [/INST] 삼성과 구글 중에서 삼성이 먼저 설립되었습니다.

삼성은 1938년 삼성전자(Samsung Electronics)로 창립되었으며, 1958년 삼성전자(Samsung Electronics)와 삼
