# Low-Rank Adaption (LoRA)
This Notebook introduces how to apply low-rank adaptation (LoRA) to your model of choice using [Parameter-Efficient Fine-Tuning (PEFT) library developed by Hugging Face](https://huggingface.co/docs/peft/index). 


### Learning Objectives
1. Apply LoRA to a model
1. Fine-tune on your provided dataset
1. Save your model
1. Conduct inference using the fine-tuned model

In [None]:
!pip show ipykernel



In [1]:
!pip install peft==0.4.0

/bin/bash: /home/kookmin/chaewon/LLM-Document_Summarizer/.venv/bin/pip: /home/kookmin/chaewon/LLM_document_summary/.venv/bin/python3: bad interpreter: No such file or directory


In [3]:
!pip install accelerate

/bin/bash: /home/kookmin/chaewon/LLM-Document_Summarizer/.venv/bin/pip: /home/kookmin/chaewon/LLM_document_summary/.venv/bin/python3: bad interpreter: No such file or directory


In [4]:
mkdir cache

mkdir: cannot create directory ‘cache’: File exists


We will re-use the same dataset and model from the demo notebook.

In [5]:
mkdir offload

mkdir: cannot create directory ‘offload’: File exists


In [6]:
mkdir working

mkdir: cannot create directory ‘working’: File exists


In [7]:
!pip install datasets transformers

/bin/bash: /home/kookmin/chaewon/LLM-Document_Summarizer/.venv/bin/pip: /home/kookmin/chaewon/LLM_document_summary/.venv/bin/python3: bad interpreter: No such file or directory


In [None]:
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_name = "Bllossom/llama-3.2-Korean-Bllossom-3B"
foundation_model = AutoModelForCausalLM.from_pretrained(model_name, token=os.environ['TOKEN_1'], device_map='cuda', torch_dtype=torch.float32)
tokenizer = AutoTokenizer.from_pretrained(model_name, token=os.environ['TOKEN_1'], device_map='auto', torch_dtype=torch.float32)

data = load_dataset("Abirate/english_quotes", cache_dir="./working/cache"+"/datasets")
data = data.map(lambda samples: tokenizer(samples["quote"]), batched=True)
train_sample = data["train"].select(range(50))
display(train_sample) 

ImportError: Using `low_cpu_mem_usage=True` or a `device_map` requires Accelerate: `pip install 'accelerate>=0.26.0'`

In [4]:
# TODO
import peft
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=1,  # dimension of the updated matrices
    lora_alpha=4,  # parameter for scaling
    target_modules=[
        "q_proj",
        # "up_proj",
        # "o_proj",
        # "k_proj",
        # "down_proj",
        # "gate_proj",
        # "v_proj"
    ],
    lora_dropout=0.1,  # dropout probability for layers
    bias="none",
    task_type="CAUSAL_LM",
)

In [3]:
# TODO
peft_model = get_peft_model(foundation_model, lora_config)
print(peft_model.print_trainable_parameters())

trainable params: 172,032 || all params: 3,212,921,856 || trainable%: 0.00535437859089966
None


In [3]:
# TODO
import transformers
from transformers import TrainingArguments, Trainer
import os

tokenizer.pad_token = tokenizer.eos_token

output_directory = os.path.join("./cache/working", "peft_lab_outputs")
training_args = TrainingArguments(
    report_to="none",
    output_dir=output_directory,
    auto_find_batch_size=True,
    learning_rate= 3e-2, # Higher learning rate than full fine-tuning.
    num_train_epochs=5,
    # no_cuda=True
)

trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=train_sample,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
trainer.train()

NameError: name 'tokenizer' is not defined

In [2]:
import time
import os

time_now = time.time()

peft_model_path = os.path.join(output_directory, f"peft_model_{time_now}")

trainer.model.save_pretrained(peft_model_path)

NameError: name 'output_directory' is not defined

In [2]:
import torch
from transformers import BitsAndBytesConfig 
config = BitsAndBytesConfig(
            load_in_4bit=True,
            # bnb_4bit_quant_type="nf4",
            # bnb_4bit_use_double_quant=True,
            # bnb_4bit_compute_dtype=torch.bfloat16
        )

In [3]:
# TODO
import os
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

BASE_MODEL = "meta-llama/Llama-3.2-3B-Instruct"
model = AutoModelForCausalLM.from_pretrained(
        BASE_MODEL,
        device_map="cuda",  # 두 번째 GPU로 할당
        quantization_config=config,
        token="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"
    )
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, device_map="cuda", token="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")
tokenizer.add_special_tokens({"pad_token": tokenizer.eos_token})  # pad_token 설정


Loading checkpoint shards: 100%|██████████| 2/2 [00:08<00:00,  4.24s/it]


0

In [4]:
loaded_model = PeftModel.from_pretrained(model, "/workspace/1129backup/LLM-Document_Summarizer/results/checkpoint-27534", 
                                        is_trainable=False)

In [3]:
tokenizer("<|eot_id|>")

{'input_ids': [128000, 128009], 'attention_mask': [1, 1]}

In [5]:
from transformers import StoppingCriteria, StoppingCriteriaList

# 사용자 정의 StoppingCriteria
class StopOnKeyword(StoppingCriteria):
    def __init__(self, stop_words, tokenizer, max_words=500):
        self.stop_words = stop_words
        self.tokenizer = tokenizer
        self.words = 0
        self.max = max_words

    def __call__(self, input_ids, scores):
        generated_text = self.tokenizer.decode(input_ids[0], skip_special_tokens=True)
        self.words += 1
        if self.words > self.max:
            return any(word in generated_text[-1] for word in self.stop_words)
        return False


In [10]:
import os
import re
import torch
from transformers import StoppingCriteriaList

# CUDA 환경 설정 초기화
torch.cuda.empty_cache()
os.environ['CUDA_LAUNCH_BLOCKING'] = "0"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# 모델 장치 출력
print(f"Model device: {loaded_model.device}")

# 원본 텍스트
inputs_raw = """둘째, 중장기재무관리계획 작성, 정상화 대책 이행계획 제출 등 공공기관 스스로 작성하고 정부가 이를 평가한다고 했지만, MB정부 당시 가장 부채가 많은 한국토지주택공사(LH)를 상으로 추진했던 사업조정의 실패, 구분회계 도입의 실효성을 보건데, 대책이라고 보기에는 매우 부족함. 마지막으로 공운위 산하에 민관합동으로 ‘정상화협의회’를 구성한다고 했는데, 이 또한 기존의 폐쇄적인 공운위 운영방식에 다름 아니며, 현재 정부의 공공기관 정상화 진행 방식인 ‘불통과 배제’ 방식과 다름 아닌 획일적인 지침에 따른 수직적인 통제방식임. 잘못된 공공기관 정상화 억지 대책과 졸속개혁은 더 큰 부실과 국민피해만 남길 것임. 국회예산정책처 발제에 한 토론 다음으로 국회예산정책처 조영철 사업평가국장의 발제는 그동안 제기되었던 공공기관 재무건전성 악화에 한 정확한 진단, 지배구조 등 관리체계의 문제점에 해 정확히 지적하였음. 한편 전체 공공기관 부채 현황에 해 밝혔지만, 2012년 부채 상위 10개 공기업의 부채 규모가 424조원으로 전체 공공기관의 86% 수준임을 감안하면 304개 전체 공공기관 전반에 걸친 문제로 파악하는 것은 논란의 여지가 있기 때문에 한정할 필요가 있음."""

summary_len = len(inputs_raw) // 100 * 10
prompt = f"""
    MAKE SURE THAT YOU SUMMARIZE THE FOLLOWING TEXT TO A MAXIMUM OF {summary_len} TOKENS. THE SUMMARY CAN BE SHORTER if all essential information is included, ensuring the following rules:

    1. **Summary Quality:**
    - The summarized text should have no spelling errors or typos.
    - Avoid repeating similar content. If multiple sentences convey similar ideas, output only one concise sentence to represent them.
    - The text should be logically structured and divided into appropriate paragraphs to maintain readability.

    2. **Key Information:**
    - Ensure that the summary includes key points such as the causes of debt, government policies, and the need for improved debt management systems.

    3. **Prevent Duplication:**
    - Do not generate sentences that repeat or convey the same idea as other sentences within the summarized text.
    - If a sentence shares a similar meaning with another, only include the most concise and representative one. The rest must be omitted.
    - The summary does not need to reach a specific target length, as long as all essential information is included without duplication.

    4. **Example of a Good Summary:**
    - "The analysis highlights the need to distinguish between debt caused by price regulations and other factors, emphasizing government policy impacts and the necessity for better debt management."

    5. **Avoid This Type of Summary:**
    - "Debt is caused by many things. Government policies are involved. Management is needed." (Too vague and lacks detail)

    Ensure the final summarized text adheres to these rules and retains its readability and logical structure.
    """

inputs_raw = f"""<|begin_of_text|><|start_header_id|>user: <|end_header_id|>{prompt}
{inputs_raw}<|eot_id|><|start_header_id|>assistant: <|end_header_id|>
"""

# 종료 조건 설정
stop_words = ["."]  # 종료를 트리거하는 키워드
stopping_criteria = StoppingCriteriaList([StopOnKeyword(stop_words, tokenizer, summary_len)])

# 입력 토큰화
inputs = tokenizer(inputs_raw, return_tensors="pt").to('cuda')

# 출력 생성
outputs = loaded_model.generate(
    input_ids=inputs["input_ids"], 
    attention_mask=inputs["attention_mask"],
    max_new_tokens=len(inputs_raw) // 100 * 15,
    eos_token_id=128009,
    temperature=0.4,
    no_repeat_ngram_size=7,  # 반복을 방지
    # repetition_penalty = 1.2,
    stopping_criteria=stopping_criteria,  # 사용자 정의 종료 조건
    # early_stopping=True,
    do_sample=True
)

# 불필요한 특수 문자 제거 및 포맷팅
decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=False)[0]
filtered_output = decoded_output.replace(inputs_raw, '').strip()

# 한자 및 일본어 제거 함수
def remove_non_korean(text):
    """
    한자(중국어) 및 일본어를 제거하는 함수.
    유니코드 범위:
    - 한자: \u4E00-\u9FFF
    - 일본어(히라가나): \u3040-\u309F
    - 일본어(가타카나): \u30A0-\u30FF
    """
    return re.sub(r'[\u4E00-\u9FFF\u3040-\u309F\u30A0-\u30FF]+', '', text)

# 불필요한 문자 제거 및 최종 출력
final_output = remove_non_korean(filtered_output)
print(final_output)


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Model device: cuda:0


<|begin_of_text|>국회예산정책처의 발제는 공공기관 재무건강 악화에 대한 정확한 진단과 관리체계의 문제점이 지적되었다. 2012년 부채상위 10개 공기업 부채 규모가 425조원으로 전체 공공기업의 86% 수준이며 304개 전체 공공기업 전반에 걸친 문제라고 파악하는 것은 논란이 있다.


In [11]:
print(summary_len)
print(len(inputs_raw))
print(len(final_output))
print(len(inputs_raw) // 100 * 10)

50
2359
165
230


In [18]:
!export CUDA_LAUNCH_BLOCKING=1

UsageError: Cell magic `%%` not found.
