In [1]:
# !conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia -y

In [2]:
# !pip3 install  -U bitsandbytes==0.42.0
# !pip3 install  -U peft==0.8.2
# !pip3 install  -U trl==0.7.10
# !pip3 install  -U accelerate==0.27.1
# !pip3 install  -U datasets==2.17.0
# !pip3 install  -U transformers==4.38.1

In [3]:
# pip install huggingface_hub

In [4]:
import torch
import os
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import prepare_model_for_kbit_training

# GPU 메모리 초기화
torch.cuda.empty_cache()

# 모델 ID
model_id = "google/gemma-7b"

# GPU 메모리 관리 설정
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'  # 환경 변수 설정
torch.cuda.empty_cache()  # GPU 메모리 해제

# 모델 양자화 설정: 4비트 양자화를 사용하여 모델을 더 작은 메모리로 로드함
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

# 사전 학습된 토크나이저 불러오기
tokenizer = AutoTokenizer.from_pretrained(model_id)

# 사전 학습된 모델 불러오기
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto"  # 모델을 GPU에 로드
)

# 메모리 절약 방법: 모델 일부만 메모리에 유지, 나머지는 필요할 때 다시 계산
model.gradient_checkpointing_enable()

# 모델을 4비트 양자화에 맞게 훈련 준비
model = prepare_model_for_kbit_training(model)

# 텍스트 입력 생성 및 모델로 추론하기
input_text = "Machine Learning에 대한 시를 써주세요."
input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")

# 모델 추론 수행
outputs = model.generate(**input_ids)
print(tokenizer.decode(outputs[0]))



Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

2024-10-26 18:02:51.054316: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-10-26 18:02:51.076989: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


<bos>Machine Learning에 대한 시를 써주세요.

<h1>머신러닝의 


In [5]:
# CUDA 사용 가능 여부 확인
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [6]:
from peft import LoraConfig

lora_config = LoraConfig(      # lora configuration
    r=8,
    target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"],
    task_type="CAUSAL_LM",
)

In [7]:
import pandas as pd
from datasets import Dataset
# 데이터셋 불러오기
# 원본 train.csv에서 문제와 정답 페어를 한행에 저장한 train_1by1.csv를 사용
data = pd.read_csv('/home/ssrlab/gm/자연언어처리/data/train_1by1.csv') 
hf_dataset = Dataset.from_pandas(data)
hf_dataset

Dataset({
    features: ['질문', '답변'],
    num_rows: 6440
})

In [8]:
import wandb
secret_wandb = "13eb6f7ea549c767608607003c8f2f086efd8e14"
wandb.login(key = secret_wandb)
run = wandb.init(
    project='Fine tuning gemma singleQ', 
    job_type="training", 
    anonymous="allow"
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33myun010515[0m ([33myun010515-chungbuk-university[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/ssrlab/.netrc


In [9]:
new_model = "Fine tuning gemma singleQ"

In [10]:
print(hf_dataset[0])  # 데이터셋의 첫 번째 샘플 출력

{'질문': '면진장치가 뭐야?', '답변': '면진장치란 지반에서 오는 진동 에너지를 흡수하여 건물에 주는 진동을 줄여주는 진동 격리장치입니다.'}


In [11]:
import transformers
from trl import SFTTrainer,DataCollatorForCompletionOnlyLM

# CUDA 디버깅 활성화
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

def formatting_func(example): # example을 받아서 text로 변환
    output_texts =[]
    for i in range(len(example['질문'])):
        text = f"### Question: {example['질문'][i]}\n ### Answer: {example['답변'][i]}"
        output_texts.append(text)
    return output_texts
    
response_template = " ### Answer:"
collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)

trainer = SFTTrainer(  # Supervised Fine-tuning Trainer
    model=model,
    train_dataset=hf_dataset,
    args=transformers.TrainingArguments( 
        output_dir=new_model,
        per_device_train_batch_size=2, # 배치 크기 2인데 GPU 사용량 때매 1로 줄임
        gradient_accumulation_steps=2, # 누적 단계 원래 4인데 GPU 사용량 때매 2로 줄임
        #warmup_steps=2,
        warmup_ratio=0.03,
        num_train_epochs=10,
        #max_steps=10,
        learning_rate=2e-4,
        fp16=True, # GPU 메모리 절약에 도움이 되지만, 일부 GPU 환경에서는 불안정할 수 있음. False로 바꾸기
        logging_steps=1,
        optim="paged_adamw_8bit",
        report_to="wandb",
    ),
    peft_config=lora_config,
    formatting_func=formatting_func,
    data_collator=collator,
)

# 캐시를 사용하지 않도록 설정
model.config.use_cache = True

trainer.train()



Map:   0%|          | 0/6440 [00:00<?, ? examples/s]

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss
1,1.8156
2,1.587
3,1.6665
4,1.7583
5,1.9446
6,1.6692
7,1.8707
8,1.5873
9,1.6605
10,1.8535


RuntimeError: CUDA error: an illegal memory access was encountered
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [15]:
text = "### Question: 방청 페인트의 종류에는 어떤 것들이 있는지 알고 계신가요? 또한, 원목사이딩을 사용하는 것에 어떤 단점이 있을까요? \n ### Answer:"
inputs = tokenizer(text, return_tensors="pt").to(device)

In [16]:
outputs = model.generate(**inputs, max_new_tokens=300)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))



KeyboardInterrupt: 

In [17]:
trainer.model.save_pretrained(new_model)
trainer.tokenizer.save_pretrained(new_model)

('Fine tuning gemma singleQ/tokenizer_config.json',
 'Fine tuning gemma singleQ/special_tokens_map.json',
 'Fine tuning gemma singleQ/tokenizer.json')

In [18]:
HUGGINGFACE_AUTH_TOKEN = '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'
MODEL_SAVE_HUB_PATH = f'Coldbrew9/Fine tuning gemma singleQ'
trainer.model.push_to_hub(
			MODEL_SAVE_HUB_PATH, 
			use_temp_dir=True, 
			use_auth_token=HUGGINGFACE_AUTH_TOKEN
)
trainer.tokenizer.push_to_hub(
			MODEL_SAVE_HUB_PATH, 
			use_temp_dir=True, 
			use_auth_token=HUGGINGFACE_AUTH_TOKEN
)



HFValidationError: Repo id must use alphanumeric chars or '-', '_', '.', '--' and '..' are forbidden, '-' and '.' cannot start or end the name, max length is 96: 'Coldbrew9/Fine tuning gemma singleQ'.