In [1]:
import pandas as pd

df = pd.read_csv('/data/visi2/Dataset/instruction_tuning_dataset.csv').dropna(subset=['Instruction', 'Output'])
df['Instruction'] = df['Instruction'].astype(str)
df['Output'] = df['Output'].astype(str)

### Instruction tuning8B

In [None]:
import pandas as pd
from datasets import Dataset
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments, pipeline
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
from tqdm import tqdm
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
import logging

logging.basicConfig(level=logging.ERROR)
df = pd.read_csv('/data/visi2/Dataset/instruction_tuning_dataset.csv').dropna(subset=['Instruction', 'Output'])
df['Instruction'] = df['Instruction'].astype(str)
df['Output'] = df['Output'].astype(str)

train_dataset = Dataset.from_pandas(df[['Instruction', 'Output']])

def generate_prompt(example):
    output_texts = []
    for i in range(len(example['Instruction'])):
        prompt = f"### Instruction: {example['Instruction'][i]}\n\n### Response: {example['Output'][i]}"
        output_texts.append(prompt)
    return output_texts

lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.1,  
    target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"],
    task_type="CAUSAL_LM"
)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

BASE_MODEL = "MLP-KTLim/llama-3-Korean-Bllossom-8B"

model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, device_map="auto", quantization_config=bnb_config)
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, add_special_tokens=True)
tokenizer.padding_side = 'right'

trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    max_seq_length=512,
    args=TrainingArguments(
        output_dir="outputs",
        num_train_epochs = 10,
        per_device_train_batch_size=4, 
        gradient_accumulation_steps=4,
        optim="paged_adamw_8bit",
        warmup_steps=0.1,  
        learning_rate=1e-4, 
        fp16=True,
        logging_steps=100,
        push_to_hub=False,
        report_to='none',
    ),
    peft_config=lora_config,
    formatting_func=generate_prompt,
)

trainer.train()

model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, device_map='auto', torch_dtype=torch.float16)
model = PeftModel.from_pretrained(model, "lora_adapter_it", device_map='auto', torch_dtype=torch.float16)

model = model.merge_and_unload()
model.save_pretrained('Bllossom-8B')

finetune_model = AutoModelForCausalLM.from_pretrained("./Bllossom-8B", device_map={"":0})
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, add_special_tokens=True)

pipe_finetuned = pipeline("text-generation", model=finetune_model, tokenizer=tokenizer, batch_size=8)

prompt = "무지개다리 그럼 거의 첫째 날은 거의 다 잡힌 거네요 이 발화의 OCEAN 점수는 어떻게 될까요?"
formatted_prompt = f"### Instruction: {prompt}\n\n### Response:"

outputs = pipe_finetuned(
    formatted_prompt,
    do_sample=True,
    temperature=0.2,
    top_k=50,
    top_p=0.95,
    repetition_penalty=1.2,
    add_special_tokens=True
)

generated_text = outputs[0]["generated_text"]
response = generated_text[len(formatted_prompt):].strip()
response = response.split('###')[0].strip()

print(f"Instruction: {prompt}")
print(f"Response: {response}")

def calculate_bleu(reference, candidate):
    smooth = SmoothingFunction().method1
    return sentence_bleu([reference], candidate, smoothing_function=smooth)

def evaluate_bleu(dataframe, batch_size=8):
    bleu_scores = []

    for idx, row in tqdm(dataframe.iterrows(), total=len(dataframe), desc="Evaluating BLEU scores"):
        prompt = row['Instruction']
        reference_output = row['Output']
        
        formatted_prompt = f"### Instruction: {prompt}\n\n### Response:"
        
        outputs = pipe_finetuned(
            formatted_prompt,
            max_new_tokens=512,
            do_sample=True,
            temperature=0.2,
            top_k=50,
            top_p=0.95,
            repetition_penalty=1.2,
            add_special_tokens=True
        )
        
        generated_text = outputs[0]["generated_text"]
        generated_response = generated_text[len(formatted_prompt):].strip().split('###')[0].strip()
        
        bleu_score = calculate_bleu(reference_output, generated_response)
        bleu_scores.append(bleu_score)

    average_bleu_score = sum(bleu_scores) / len(bleu_scores)
    print(f"Average BLEU Score: {average_bleu_score * 100:.2f}")
    return average_bleu_score

evaluate_bleu(df)