<a href="https://colab.research.google.com/github/drago467/AIVN/blob/master/Project%20Chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q -U bitsandbytes
!pip install -q -U datasets
!pip install -q -U git+https://github.com/huggingface/transformers.git
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q -U git+https://github.com/huggingface/accelerate.git
!pip install -q -U loralib
!pip install -q -U einops
!pip install -q -U googletrans==3.1.0a0

In [67]:
import json
import os
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers

In [4]:
from googletrans import Translator
from pprint import pprint
from datasets import load_dataset
from huggingface_hub import notebook_login
from peft import (
    LoraConfig,
    PeftConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training
)
from transformers import (
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig
)
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [48]:
MODEL_NAME = "vilm/vinallama-7b-chat"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map='auto',
    trust_remote_code=True,
    quantization_config=bnb_config
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [49]:
generation_config = model.generation_config
generation_config.max_new_tokens = 200
generation_config.temperature = 0.7
generation_config.top_p = 0.7
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id

In [50]:
prompt = """
<|im_start|>system
Bạn là một trợ lý AI hữu ích. Hãy trả lời người dùng một cách chính xác.
<|im_end|>
<|im_start|>user
Viết hàm python vẽ scatterplot
<|im_end|>
<|im_start|>assistant
""".strip()

In [51]:
%%time
from IPython.display import Javascript
display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 500})'''))

device = 'cuda' if torch.cuda.is_available() else 'cpu'
encoding = tokenizer(prompt, return_tensors="pt").to(device)
with torch.inference_mode():
  outputs = model.generate(
      input_ids = encoding.input_ids,
      attention_mask = encoding.attention_mask,
      generation_config = generation_config
  )
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

<IPython.core.display.Javascript object>

<|im_start|> system
Bạn là một trợ lý AI hữu ích. Hãy trả lời người dùng một cách chính xác.
 
<|im_start|> user
Viết hàm python vẽ scatterplot
 
<|im_start|> assistant
Dưới đây là một hàm Python vẽ biểu đồ phân tán:

```python
def plot scatter (x, y):
# Vẽ biểu đồ phân tán
plt.scatter (x, y)
# Đặt tiêu đề và nhãn trục
plt.title ('Biểu đồ phân tán')
plt.xlabel ('Giá trị x')
plt.ylabel ('Giá trị y')
# Hiển thị biểu đồ
plt.show()
```

Hàm này có hai tham số: `x` và `y`, đại diện cho các giá trị x và y cho biểu đồ phân tán. Hàm đầu tiên vẽ biểu đồ phân tán bằng cách sử dụng `plt.scatter (x, y) '. Sau đó, nó đặt tiêu đề và nhãn trục bằng cách sử dụng `plt.title ('Biểu đồ phân tán ') \tvà \tplt.xlabel ('Giá trị x ') \t
CPU times: user 17.6 s, sys: 291 ms, total: 17.9 s
Wall time: 18.1 s


In [55]:
data = load_dataset('alespalla/chatbot_instruction_prompts')

In [56]:
data

DatasetDict({
    train: Dataset({
        features: ['response', 'prompt'],
        num_rows: 258042
    })
    test: Dataset({
        features: ['response', 'prompt'],
        num_rows: 64511
    })
})

In [57]:
type(data)

datasets.dataset_dict.DatasetDict

In [58]:
import pandas as pd
pd.DataFrame(data['train'])

Unnamed: 0,response,prompt
0,. Determine the cause of the clog. Clogs in ki...,What’s the best way to fix my kitchen drain?\n...
1,I have broken something by accident in the pas...,I'm just going to go to the store and whatever...
2,Bonjour,"Translate the phrase ""Good Morning"" to French"
3,Samurai warriors played an incredibly importan...,What contribution did Samurai warriors make to...
4,I think both you and your friend will feel bet...,"Yeah, maybe you're right."
...,...,...
258037,You need to talk to their customer service but...,It's my first time. Can I get a refund if I re...
258038,"If that's the case, then I think the best thin...",I met him and he told me he's feeling down.
258039,Police do shoot white people but that's beside...,Police don't shoot white people so that proves...
258040,You should be willing to help someone out if t...,I need the money and I think it's disgusting a...


In [59]:
def generate_prompt(data_point):
  translator = Translator()
  vn_prompt = translator.translate(data_point['prompt'], src='en', dest='vi').text
  vn_response = translator.translate(data_point['response'], src='en', dest='vi').text

  return f"""
<|im_start|>system
Bạn là một trợ lý AI hữu ích. Hãy trả lời người dùng một cách chính xác.
<|im_end|>
<|im_start|>user
{vn_prompt}
<|im_end|>
<|im_start|>assistant
{vn_response}
""".strip()

In [60]:
def generate_and_tokenize_prompt(data_point):
  full_prompt = generate_prompt(data_point)
  tokenized_full_prompt = tokenizer(
      full_prompt,
      padding=True,
      truncation=True
  )
  return tokenized_full_prompt

In [61]:
data = data['train'].shard(num_shards=50, index=0).filter(
    lambda sample: sample['response'] != '' and sample['prompt'] != ''
).shuffle().map(generate_and_tokenize_prompt)

Map:   0%|          | 0/5161 [00:00<?, ? examples/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [69]:
training_args = transformers.TrainingArguments(
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=1,
    learning_rate=2e-4,
    fp16=True,
    save_total_limit=3,
    logging_steps=1,
    output_dir="experiments",
    optim="paged_adamw_8bit",
    lr_scheduler_type="cosine",
    warmup_ratio=0.05,
)
trainer = transformers.Trainer(
    model = model,
    train_dataset = data,
    args = training_args,
    data_collator = transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model.config.use_cache = False
trainer.train()

ValueError: You cannot perform fine-tuning on purely quantized models. Please attach trainable adapters on top of the quantized model to correctly perform fine-tuning. Please see: https://huggingface.co/docs/transformers/peft for more details

In [None]:
%%time
device = 'cuda' if torch.cuda.is_available() else 'cpu'

prompt = """
<|im_start|>system
Bạn là một trợ lý AI hữu ích. Hãy trả lời người dùng một cách chính xác.
<|im_end|>
<|im_start|>user
Mô tả về thành phố Đà
<|im_end|>
<|im_start|>assistant
""".strip()

encoding = tokenizer(prompt, return_tensors="pt").to(device)
with torch.inference_mode():
  outputs = model.generate(
      input_ids = encoding.input_ids,
      attention_mask = encoding.attention_mask,
      generation_config = generation_config
  )
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
