##  Finetuning, Create Datasets

## 初始環境設定

In [None]:
import os
from pathlib import Path
HOME = str(Path.home())
Add_Binarry_Path=HOME+'/.local/bin'
os.environ['PATH']=os.environ['PATH']+':'+Add_Binarry_Path
current_foldr=!pwd
current_foldr=current_foldr[0]
current_foldr

## 確認CUDA版本, 以及否能使用GPU
若無gpu 請點選右側->已連線->變更執行階段類型->T4 Gpu

In [None]:
!nvidia-smi
import torch
print(torch.__version__)
torch.cuda.is_available()

## 安裝套件

In [None]:
pip install datasets -q

### LOAD LIBRARY

In [None]:
import json
from datasets import load_dataset

### Loading dataset

In [None]:
# 讀取數據集，take可以取得該數據集前n筆資料
dataset = load_dataset("lavita/ChatDoctor-HealthCareMagic-100k", split="train", streaming=True)

# 提取所需欄位並建立新的字典列表
limit=0
extracted_data = []
for example in dataset:
    extracted_example = {
        "instruction": example["instruction"],
        "input": example["input"],
        "output": example["output"]
    }
    extracted_data.append(extracted_example)
    if len(extracted_data) == limit:
        break

# 指定 JSON 文件名稱
json_filename = "data.json"

# 寫入 JSON 文件
with open(json_filename, "w") as json_file:
    json.dump(extracted_data, json_file, indent=4)

print(f"數據已提取並保存為 {json_filename}")

### 建立本地端資料內容

In [None]:
# 所有資料內容
import pandas as pd
df = pd. read_json ( 'data.json' )
df.describe()

### 取出前一千筆, 並儲存為本地端資料

In [None]:
# 取出前一千筆, 並儲存
dataset_df_1k = df[:1000]
dataset_df_1k.to_json('data_1k.json', orient='records')

## Finetuning, Aloaca Lora Training
https://huggingface.co/docs/transformers/main/peft

In [None]:
!git clone https://github.com/c00cjz00/alpaca-lora.git alpaca-lora_training_v1

In [None]:
%cd alpaca-lora_training_v1

In [None]:
!pip install cohere gdown kaleido langchain openai pyngrok pypdf python-dotenv sentence-transformers tiktoken -q
!pip install accelerate bitsandbytes hf_transfer huggingface_hub optimum transformers -q 
!pip install appdirs black black[jupyter] datasets fire loralib sentencepiece gradio -q
!pip install git+https://github.com/huggingface/peft.git -q

### 訓練開始

In [None]:
%cd alpaca-lora_training_v1
!cp ../data_1k.json .
!wandb offline
!python3 finetune.py \
    --base_model 'openlm-research/open_llama_3b_v2' \
    --data_path './data_1k.json' \
    --output_dir './data_1k' \
    --batch_size 16 \
    --micro_batch_size 16 \
    --num_epochs 2 \
    --learning_rate 1e-4 \
    --cutoff_len 512 \
    --val_set_size 500 \
    --lora_r 8 \
    --lora_alpha 16 \
    --lora_dropout 0.05 \
    --lora_target_modules '[q_proj,v_proj]' \
    --train_on_inputs \
    --group_by_length

### 確認結果

In [None]:
from typing import Optional, Any

import torch

from transformers.utils import is_accelerate_available, is_bitsandbytes_available
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    GenerationConfig,
    pipeline,
)

from peft import PeftModel

ALPACA_TEMPLATE = (
    "Below is an instruction that describes a task, paired with an input that provides "
    "further context. Write a response that appropriately completes the request.\n\n"
    "### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n"
)


def load_adapted_hf_generation_pipeline(
    base_model_name,
    lora_model_name,
    temperature: float = 0.7,
    top_p: float = 1.,
    max_tokens: int = 512,
    batch_size: int = 16,
    device: str = "cuda",
    load_in_8bit: bool = True,
    generation_kwargs: Optional[dict] = None,
):
    """
    Load a huggingface model & adapt with PEFT.
    Borrowed from https://github.com/tloen/alpaca-lora/blob/main/generate.py
    """

    if device == "cuda":
        if not is_accelerate_available():
            raise ValueError("Install `accelerate`")
    if load_in_8bit and not is_bitsandbytes_available():
            raise ValueError("Install `bitsandbytes`")
    
    tokenizer = AutoTokenizer.from_pretrained(base_model_name)
    task = "text-generation"
    
    if device == "cuda":
        model = AutoModelForCausalLM.from_pretrained(
            base_model_name,
            load_in_8bit=load_in_8bit,
            torch_dtype=torch.float16,
            device_map="auto",
        )
        model = PeftModel.from_pretrained(
            model,
            lora_model_name,
            torch_dtype=torch.float16,
        )
    elif device == "mps":
        model = AutoModelForCausalLM.from_pretrained(
            base_model_name,
            device_map={"": device},
            torch_dtype=torch.float16,
        )
        model = PeftModel.from_pretrained(
            model,
            lora_model_name,
            device_map={"": device},
            torch_dtype=torch.float16,
        )
    else:
        model = AutoModelForCausalLM.from_pretrained(
            base_model_name, device_map={"": device}, low_cpu_mem_usage=True
        )
        model = PeftModel.from_pretrained(
            model,
            lora_model_name,
            device_map={"": device},
        )

    # unwind broken decapoda-research config
    model.config.pad_token_id = tokenizer.pad_token_id = 0  # unk
    model.config.bos_token_id = 1
    model.config.eos_token_id = 2

    if not load_in_8bit:
        model.half()  # seems to fix bugs for some users.

    model.eval()

    generation_kwargs = generation_kwargs if generation_kwargs is not None else {}
    config = GenerationConfig(
        do_sample=True,
        temperature=temperature,
        max_new_tokens=max_tokens,
        top_p=top_p,
        **generation_kwargs,
    )
    pipe = pipeline(
        task,
        model=model,
        tokenizer=tokenizer,
        batch_size=16, # TODO: make a parameter
        generation_config=config,
        framework="pt",
    )

    return pipe

In [None]:
pipe = load_adapted_hf_generation_pipeline(
    base_model_name="openlm-research/open_llama_3b_v2",
    lora_model_name="./data_1k"
)
prompt_template = ALPACA_TEMPLATE.format(
    instruction="請回答以下數學問題",
    input="小明有14顆糖果，他送給小紅5顆，還給小王4顆，請問他現在手中還剩幾顆糖果?"
)
print(pipe(prompt_template)[0]['generated_text'])

