In [1]:
import torch, gc
gc.collect()
torch.cuda.empty_cache()

In [2]:
# !pip install wandb -Uq

In [3]:
# import wandb

# wandb.login()

In [4]:
# wandb.init(project='cl')

In [2]:
import torch
from transformers import  AutoTokenizer, PreTrainedTokenizerFast, AdamW, AutoModelForCausalLM, BitsAndBytesConfig,HfArgumentParser, get_scheduler, set_seed

import pandas as pd
import numpy as np

from torch import nn
from torch.utils.data import Dataset, Subset
from torch.utils.data import DataLoader
from torch import cuda
from torch.optim import AdamW, SGD
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.cuda.amp import GradScaler

from tqdm import tqdm

from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model
import bitsandbytes as bnb
import os
import random

import numpy as np
from sklearn.model_selection import train_test_split

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
config = {'mode_ID':"microsoft/Phi-3-mini-4k-instruct",
          'seed': 1 ,
          'max_seq_len' : 4096,
          'epochs': 3,
          'lr': 2e-4,
          'batch': 4,
          'lora_r':8,
          'lora_alpha':32,
          'target_module':["q_proj", "up_proj", "o_proj", "k_proj", "down_proj","gate_proj", "v_proj"],
          'lora_dropout':0.05,
          'lora_tasktype' :'CAUSAL_LM',
          'lora_bias' : 'none',
          'optimizer': 'paged_adamw_8bit',
          'scheduler':'cosine'}

## Model 초기화

In [4]:
from peft import (
    get_peft_config,  # PEFT 설정을 가져오기 위한 함수
    get_peft_model,  # PEFT 모델을 가져오기 위한 함수
    get_peft_model_state_dict,  # PEFT 모델 상태 사전을 가져오기 위한 함수
    set_peft_model_state_dict,  # PEFT 모델 상태 사전을 설정하기 위한 함수
    LoraConfig,  # LoRA 모델 구성을 정의하는 클래스
    PeftType,  # PEFT 모델의 타입을 정의
    PrefixTuningConfig,  # PrefixTuning 모델 구성을 정의하는 클래스
    PromptEncoderConfig,  # PromptEncoder 모델 구성을 정의하는 클래스
    PeftModel,  # PEFT 모델을 정의하는 클래스
    PeftConfig,  # PEFT 모델의 구성을 정의하는 클래스
)

# PEFT 모델의 타입 설정 (LoRA로 설정)
peft_type = PeftType.LORA

# LoRA 모델을 위한 설정
peft_config = LoraConfig(
    r=config['lora_r'],  # LoRA 모델의 r 값
    lora_alpha=config['lora_alpha'],  # LoRA 모델의 alpha 값
    target_modules=config['target_module'],  # LoRA 모델의 타겟 모듈 리스트
    lora_dropout=config['lora_dropout'],  # LoRA 모델의 드롭아웃 비율
    bias=config['lora_bias'],  # LoRA 모델의 편향 설정
    task_type=config['lora_tasktype']  # LoRA 모델의 태스크 유형
)

# bnb_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_use_double_quant=True,
#     bnb_4bit_quant_type="nf4",
#     bnb_4bit_compute_dtype=torch.float16
# )


In [5]:
# AutoTokenizer를 사용하여 토크나이저 생성
tokenizer = AutoTokenizer.from_pretrained(config['mode_ID'], trust_remote_code=True, eos_token='</s>')
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

model = AutoModelForCausalLM.from_pretrained(
	config['mode_ID'],
	device_map="cuda",
	torch_dtype=torch.float16,
	trust_remote_code=True, 
	use_cache=False,
    # attn_implementation='flash_attention_2'
	# quantization_config=bnb_config,
)

model.gradient_checkpointing_enable() # 모델에서 그래디언트 체크포인팅 활성화 (메모리 효율 향상)

`flash-attention` package not found, consider installing for better performance: No module named 'flash_attn'.
Current `flash-attention` does not support `window_size`. Either upgrade or use `attn_implementation='eager'`.
Loading checkpoint shards: 100%|██████████| 2/2 [00:10<00:00,  5.38s/it]


In [6]:
print(f'Phi3 크기 : {model.num_parameters()/1000**2:.1f}M개의 파라미터')

Phi3 크기 : 3821.1M개의 파라미터


In [7]:
from peft import prepare_model_for_kbit_training # peft 라이브러리에서 k 비트 학습 준비 함수 임포트

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}") # CUDA 사용 가능 여부 확인

model = prepare_model_for_kbit_training(model)# k 비트 학습을 위해 모델 준비 - prepare_model_for_kbit_training 함수 사용
model = get_peft_model(model, peft_config) # PEFT 적용 
model = model.to(device) # 모델을 학습 장치 (GPU 등)로 이동
model.print_trainable_parameters()# 훈련 가능한 파라미터 출력 

Using device: cuda
trainable params: 4,456,448 || all params: 3,825,536,000 || trainable%: 0.1165


In [8]:
def make_prompt(user_request, answer):
    
    conversation = [ {'role': 'user', 'content': user_request},
                  {'role': 'assistant', 'content': answer}]
    prompt = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
    return prompt

In [9]:
# train 만들기

import pandas as pd
import json
with open('./data/pqaa_train_set.json','r') as f:
    train_data = json.load(f)
    
# 데이터프레임에 넣을 리스트 초기화
rows = []

# 딕셔너리를 순회하며 데이터프레임용 리스트 생성
for num, details in train_data.items():
    contexts_with_labels = '\n'.join([f"({label}) {context}" for label, context in zip(details['LABELS'], details['CONTEXTS'])])
    input = 'Question:\n' + details['QUESTION'] + '\nPlease give me the answer in formats: yes or no' + '\n' + 'Context:\n' + contexts_with_labels
    row = {
        'input' : input,
        'final_decision': details['final_decision']
    }
    rows.append(row)

# 데이터프레임 생성
df = pd.DataFrame(rows)

no_df = df[df['final_decision'] == 'no'].sample(n=400, random_state=42)

# 'yes'인 값 10000개 추출
yes_df = df[df['final_decision'] == 'yes'].sample(n=400, random_state=42)

# 두 데이터 프레임 합치기
combined_df_train = pd.concat([no_df, yes_df])

print(combined_df_train)

                                                    input final_decision
133561  Question:\nDoes aspirin increase bleeding comp...             no
123846  Question:\nAre measures of socioeconomic posit...             no
143951  Question:\nDoes dialysis within 24 hours of tr...             no
79644   Question:\nIs mild renal pelvic dilatation pre...             no
108150  Question:\nDoes acute blood pressure reduction...             no
...                                                   ...            ...
47899   Question:\nDoes inhibition of poly ( ADP-ribos...            yes
191520  Question:\nDoes tumor necrosis factor prevent ...            yes
30724   Question:\nDoes status of hepatic DNA methylom...            yes
79443   Question:\nIs the lower pole of the earlobe an...            yes
97016   Question:\nDo values anglo-american and mexica...            yes

[800 rows x 2 columns]


In [10]:
# valid 만들기
del_li = combined_df_train['input'].to_list()
df = df[~df['input'].isin(del_li)]

no_df = df[df['final_decision'] == 'no'].sample(n=50, random_state=42)

yes_df = df[df['final_decision'] == 'yes'].sample(n=50, random_state=42)

# 두 데이터 프레임 합치기
combined_df_valid = pd.concat([no_df, yes_df])

print(combined_df_valid)

                                                    input final_decision
50831   Question:\nIs vincristine-induced neuropathy i...             no
108941  Question:\nDoes topical nutlin-3a decrease pho...             no
189150  Question:\nDo racial differences in withdrawal...             no
135688  Question:\nRegional differences in home food a...             no
99209   Question:\nDoes pantoprazole affect performanc...             no
...                                                   ...            ...
18397   Question:\nIs cycling performance decrement gr...            yes
104660  Question:\nDoes knockdown of a proliferation-i...            yes
129365  Question:\nDo elevated angiogenin levels in th...            yes
193784  Question:\nDoes fat-specific transgenic expres...            yes
130858  Question:\nIs reduced muscle strength the majo...            yes

[100 rows x 2 columns]


In [11]:
# test 만들기

with open('./data/pqaa_dev_set.json','r') as f:
    test_data = json.load(f)

# 데이터프레임에 넣을 리스트 초기화
rows = []

# 딕셔너리를 순회하며 데이터프레임용 리스트 생성
for num, details in test_data.items():
    contexts_with_labels = '\n'.join([f"({label}) {context}" for label, context in zip(details['LABELS'], details['CONTEXTS'])])
    input = 'Question:\n' + details['QUESTION'] + '\nPlease give me the answer in formats: yes or no' + '\n' + 'Context:\n' + contexts_with_labels
    row = {
        'input' : input,
        'final_decision': details['final_decision']
    }
    rows.append(row)

# 데이터프레임 생성
df = pd.DataFrame(rows)

no_df = df[df['final_decision'] == 'no'].sample(n=50, random_state=42)

# 'yes'인 값 10000개 추출
yes_df = df[df['final_decision'] == 'yes'].sample(n=50, random_state=42)

# 두 데이터 프레임 합치기
combined_df_test = pd.concat([no_df, yes_df])

print(combined_df_test)

                                                   input final_decision
2774   Question:\nDoes trimetazidine modify blood lev...             no
4083   Question:\nDo aDAMTS-5 deficient mice develop ...             no
10063  Question:\nIs brucellosis a major cause of feb...             no
3060   Question:\nDoes sertraline alter the beta-adre...             no
7219   Question:\nAre salivary biomarkers suitable fo...             no
...                                                  ...            ...
1469   Question:\nDoes varus malalignment negate the ...            yes
3216   Question:\nDo inflammatory protein levels and ...            yes
8590   Question:\nDoes intraaortic balloon pumping im...            yes
4770   Question:\nDo girls ' childhood trajectories o...            yes
6134   Question:\nIs activation of phospholipase A2 a...            yes

[100 rows x 2 columns]


In [12]:
from sklearn.model_selection import train_test_split

X_train = combined_df_train['input']
y_train = combined_df_train['final_decision']

X_valid = combined_df_valid['input']
y_valid = combined_df_valid['final_decision']

# test 데이터셋
X_test = combined_df_test['input']
y_test = combined_df_test['final_decision']

In [13]:
combined_df_train['final_decision'].value_counts(), combined_df_valid['final_decision'].value_counts(), combined_df_test['final_decision'].value_counts()

(final_decision
 no     400
 yes    400
 Name: count, dtype: int64,
 final_decision
 no     50
 yes    50
 Name: count, dtype: int64,
 final_decision
 no     50
 yes    50
 Name: count, dtype: int64)

In [14]:
train_data_prompt_list = []
for x,y in zip(X_train, y_train):
    train_data_prompt_list.append(make_prompt(x,y))

valid_data_prompt_list = []
for x2,y2 in zip(X_valid, y_valid):
    valid_data_prompt_list.append(make_prompt(x2,y2))

test_data_prompt_list = []
for x3,y3 in zip(X_test, y_test):
    test_data_prompt_list.append(make_prompt(x3,y3))
    test_data_prompt_list = [test_data.split('<|end|>')[0] + '<|end|>' for test_data in test_data_prompt_list]

In [15]:
test_data_prompt_list[0].split('<|end|>')[0]

'<|user|>\nQuestion:\nDoes trimetazidine modify blood levels and immunosuppressant effects of cyclosporine A in renal allograft recipients?\nPlease give me the answer in formats: yes or no\nContext:\n(OBJECTIVE) In renal allograft recipients, trimetazidine (Vastarel) was proposed to be associated with the classic immunosuppressant treatments because it displays anti-ischaemic effects which may protect against cyclosporine A nephrotoxicity. The objective of this work was to assess the possibility of coadministering cyclosporin A, Sandimmun, and trimetazidine.\n(METHODS) Twelve renal transplant patients were selected on the basis of the stability of their cyclosporine A blood concentrations for the previous 3 months. They received trimetazidine, 40 mg twice daily orally for 5 days. Other coadministered drugs were kept unchanged during the study. Before and after trimetazidine administration, cyclosporine A blood concentrations, plasma interleukin-2 and soluble interleukin-2 receptor leve

In [16]:
test_data_prompt_list[0]

'<|user|>\nQuestion:\nDoes trimetazidine modify blood levels and immunosuppressant effects of cyclosporine A in renal allograft recipients?\nPlease give me the answer in formats: yes or no\nContext:\n(OBJECTIVE) In renal allograft recipients, trimetazidine (Vastarel) was proposed to be associated with the classic immunosuppressant treatments because it displays anti-ischaemic effects which may protect against cyclosporine A nephrotoxicity. The objective of this work was to assess the possibility of coadministering cyclosporin A, Sandimmun, and trimetazidine.\n(METHODS) Twelve renal transplant patients were selected on the basis of the stability of their cyclosporine A blood concentrations for the previous 3 months. They received trimetazidine, 40 mg twice daily orally for 5 days. Other coadministered drugs were kept unchanged during the study. Before and after trimetazidine administration, cyclosporine A blood concentrations, plasma interleukin-2 and soluble interleukin-2 receptor leve

In [17]:
class Dataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

In [18]:
train_dataset = Dataset(train_data_prompt_list)
valid_dataset = Dataset(valid_data_prompt_list)

In [19]:
def train(epoch, loader):

    model.train()
    loss_avg = 0
    for i, prompt in enumerate(loader):
        optimizer.zero_grad()
        with torch.cuda.amp.autocast():
            inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to(model.device)
            outputs = model(**inputs, labels=inputs['input_ids'])
            loss = outputs.loss

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        #loss.backward()
        #optimizer.step()
        scaler.update()
        # print(f"epoch : {epoch} - step : {i}/{len(loader)} - loss: {loss.item()}")
        loss_avg += loss.item()
        
        del inputs
        del outputs
        del loss

    # wandb.log({"loss": loss_avg/len(loader), "epoch": epoch})    
    print(f'Epoch: {epoch}, train_Loss:  {loss_avg/len(loader)}')
    loss_dic['Train'].append(loss_avg/len(loader))

        

In [20]:
def validate(epoch,loader):  
    model.eval()
    loss_avg = 0
    with torch.no_grad():       
        for i, prompt in enumerate(loader):
            inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to(model.device)
            outputs = model(**inputs, labels=inputs["input_ids"])
            loss = outputs.loss
            loss_avg += loss.item()
            
            del inputs
            del outputs
            del loss
            
    print(f'Epoch: {epoch}, Valid_Loss:  {loss_avg/len(loader)}')
    loss_dic['Val'].append(loss_avg/len(loader))

In [21]:
valid_loader = DataLoader(valid_dataset, batch_size=2, shuffle=True, num_workers=0)

In [22]:
# optimizer = AdamW(model.parameters(), lr = 3e-4)
# # optimizer = SGD(model.parameters(), lr=3e-4)
# scheduler = CosineAnnealingLR(optimizer, T_max=10)
scaler = GradScaler()

  scaler = GradScaler()


# Loss 기반 데이터 정렬 및 저장

In [23]:
def loss_based_sorting(dataset):
    # Loss 기반 Dataloader 정렬
    data_loss_dict = {}

    model.eval()
    with torch.no_grad():
        for prompt in tqdm(dataset):
            inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to(model.device)
            outputs = model(**inputs, labels=inputs["input_ids"])
            loss = outputs.loss.item()
            # # print(prompt[0])
            # # print(loss)
            data_loss_dict[prompt] = loss
            # break

    sorted_train_dict = dict(sorted(data_loss_dict.items(), key=lambda item: item[1]))

    sorted_li = list(sorted_train_dict.keys())
    # print(f'sorted_li의 길이 : {len(sorted_li)}')

    sorted_train_dataset = Dataset(sorted_li)
    # print(type(train_dataset[0]))
    sorted_train_loader = DataLoader(sorted_train_dataset, batch_size=2, shuffle=False, num_workers=0)

    return sorted_train_loader

# lr 수정 해당 파트 (optimizer)

In [None]:
optimizer = AdamW(model.parameters(), lr=5e-4)

In [24]:
lr_scheduler = get_scheduler(
    name='cosine',
    optimizer=optimizer,
    num_warmup_steps=227,
    num_training_steps=15000
)

In [25]:
from tqdm import tqdm
import itertools
from transformers import pipeline
from sklearn.metrics import accuracy_score 

loss_dic = {"epoch":[],"Train":[], "Val":[]}
best_loss = 100
early_stop_count = 0

for epoch in tqdm(range(1, 11)):

    loss_dic['epoch'].append(epoch)
    sorted_train_loader = loss_based_sorting(train_dataset)

    train(epoch, sorted_train_loader)
    validate(epoch, sorted_train_loader)
    lr_scheduler.step()
    
    if loss_dic['Val'][epoch - 1] > best_loss:
        early_stop_count += 1       
        if early_stop_count >= 2:
            loss_dic_df = pd.DataFrame(loss_dic)
            loss_dic_df.to_csv('./results/240822_our_loss_5e-4.csv', index=False)
            torch.save(model.state_dict(), f'./savedmodel/240822_our_bestmodel_5e-4.pth')
            break
    else:
        best_loss = loss_dic['Val'][epoch - 1]
        early_stop_count = 0

  0%|          | 0/10 [00:00<?, ?it/s]You are not running the flash-attention implementation, expect numerical differences.
100%|██████████| 800/800 [04:32<00:00,  2.94it/s]
  with torch.cuda.amp.autocast():
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Epoch: 1, train_Loss:  2.3611921179294586


 10%|█         | 1/10 [13:44<2:03:40, 824.51s/it]

Epoch: 1, Valid_Loss:  2.3612437799572943


100%|██████████| 800/800 [04:31<00:00,  2.95it/s]


Epoch: 2, train_Loss:  1.5020258857309818


 20%|██        | 2/10 [27:28<1:49:51, 823.95s/it]

Epoch: 2, Valid_Loss:  1.3316953828930855


100%|██████████| 800/800 [04:31<00:00,  2.95it/s]


In [25]:
# Early stopping 안됐을때 모델, 결과 따로 저장
torch.save(model.state_dict(), './savedmodel/240822_our_bestmodel_5e-4.pth')
loss_dic_df = pd.DataFrame(loss_dic)
loss_dic_df.to_csv('./results/240822_our_loss_5e-4.csv', index=False)

# 추론 시작

In [1]:
import torch, gc
gc.collect()
torch.cuda.empty_cache()

In [2]:
import torch
from transformers import  AutoTokenizer, PreTrainedTokenizerFast, AdamW, AutoModelForCausalLM, BitsAndBytesConfig,HfArgumentParser, get_scheduler, set_seed

import pandas as pd
import numpy as np

from torch import nn
from torch.utils.data import Dataset, Subset
from torch.utils.data import DataLoader
from torch import cuda
from torch.optim import AdamW, SGD
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.cuda.amp import GradScaler

from tqdm import tqdm

from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model
import bitsandbytes as bnb
import os
import random

import numpy as np
from sklearn.model_selection import train_test_split

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
config = {'mode_ID':"microsoft/Phi-3-mini-4k-instruct",
          'seed': 1 ,
          'max_seq_len' : 4096,
          'epochs': 3,
          'lr': 2e-4,
          'batch': 4,
          'lora_r':8,
          'lora_alpha':32,
          'target_module':["q_proj", "up_proj", "o_proj", "k_proj", "down_proj","gate_proj", "v_proj"],
          'lora_dropout':0.05,
          'lora_tasktype' :'CAUSAL_LM',
          'lora_bias' : 'none',
          'optimizer': 'paged_adamw_8bit',
          'scheduler':'cosine'}

In [4]:
from peft import (
    get_peft_config,  # PEFT 설정을 가져오기 위한 함수
    get_peft_model,  # PEFT 모델을 가져오기 위한 함수
    get_peft_model_state_dict,  # PEFT 모델 상태 사전을 가져오기 위한 함수
    set_peft_model_state_dict,  # PEFT 모델 상태 사전을 설정하기 위한 함수
    LoraConfig,  # LoRA 모델 구성을 정의하는 클래스
    PeftType,  # PEFT 모델의 타입을 정의
    PrefixTuningConfig,  # PrefixTuning 모델 구성을 정의하는 클래스
    PromptEncoderConfig,  # PromptEncoder 모델 구성을 정의하는 클래스
    PeftModel,  # PEFT 모델을 정의하는 클래스
    PeftConfig,  # PEFT 모델의 구성을 정의하는 클래스
)

# PEFT 모델의 타입 설정 (LoRA로 설정)
peft_type = PeftType.LORA

# LoRA 모델을 위한 설정
peft_config = LoraConfig(
    r=config['lora_r'],  # LoRA 모델의 r 값
    lora_alpha=config['lora_alpha'],  # LoRA 모델의 alpha 값
    target_modules=config['target_module'],  # LoRA 모델의 타겟 모듈 리스트
    lora_dropout=config['lora_dropout'],  # LoRA 모델의 드롭아웃 비율
    bias=config['lora_bias'],  # LoRA 모델의 편향 설정
    task_type=config['lora_tasktype']  # LoRA 모델의 태스크 유형
)

# bnb_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_use_double_quant=True,
#     bnb_4bit_quant_type="nf4",
#     bnb_4bit_compute_dtype=torch.float16
# )


In [5]:
model = AutoModelForCausalLM.from_pretrained(
	config['mode_ID'],
	device_map="cuda",
	torch_dtype=torch.float16,
	trust_remote_code=True, 
	use_cache=False,
    # attn_implementation='flash_attention_2'
	# quantization_config=bnb_config,
)

`flash-attention` package not found, consider installing for better performance: No module named 'flash_attn'.
Current `flash-attention` does not support `window_size`. Either upgrade or use `attn_implementation='eager'`.
Loading checkpoint shards: 100%|██████████| 2/2 [00:10<00:00,  5.26s/it]


In [6]:
model.gradient_checkpointing_enable()

model = prepare_model_for_kbit_training(model)

In [7]:
model = get_peft_model(model, peft_config) # PEFT 적용 

In [8]:
model.load_state_dict(torch.load('./savedmodel/240822_our_bestmodel_5e-4.pth'))

  model.load_state_dict(torch.load('./savedmodel/240822_our_bestmodel_3e-4.pth'))


<All keys matched successfully>

In [9]:
# AutoTokenizer를 사용하여 토크나이저 생성
tokenizer = AutoTokenizer.from_pretrained(config['mode_ID'], trust_remote_code=True, eos_token='</s>')
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

def make_prompt(user_request, answer):
    
    conversation = [ {'role': 'user', 'content': user_request},
                  {'role': 'assistant', 'content': answer}]
    prompt = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
    return prompt


import pandas as pd
import json

with open('./data/pqaa_dev_set.json','r') as f:
    test_data = json.load(f)
    
# 데이터프레임에 넣을 리스트 초기화
rows = []

# 딕셔너리를 순회하며 데이터프레임용 리스트 생성
for num, details in test_data.items():
    contexts_with_labels = '\n'.join([f"({label}) {context}" for label, context in zip(details['LABELS'], details['CONTEXTS'])])
    input = 'Question:\n' + details['QUESTION'] + '\nPlease give me the answer in formats: yes or no' + '\n' + 'Context:\n' + contexts_with_labels
    row = {
        'input' : input,
        'final_decision': details['final_decision']
    }
    rows.append(row)

# 데이터프레임 생성
df = pd.DataFrame(rows)

no_df = df[df['final_decision'] == 'no'].sample(n=50, random_state=42)

# 'yes'인 값 10000개 추출
yes_df = df[df['final_decision'] == 'yes'].sample(n=50, random_state=42)

# 두 데이터 프레임 합치기
combined_df_test = pd.concat([no_df, yes_df])

# print(combined_df_test)

X_test = combined_df_test['input']
y_test = combined_df_test['final_decision']

test_data_prompt_list = []
for x3,y3 in zip(X_test, y_test):
    test_data_prompt_list.append(make_prompt(x3,y3))
    test_data_prompt_list = [test_data.split('<|end|>')[0] + '<|end|>\n<|assistant|>\n' for test_data in test_data_prompt_list]

class Dataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

In [10]:
test_dataset = Dataset(test_data_prompt_list)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=0)

In [11]:
from transformers import pipeline 

def test(loader):
    output_li = []
    model.eval()

    pipe = pipeline( 
    "text-generation", 
    model=model, 
    tokenizer=tokenizer, 
    ) 

    generation_args = { 
        "max_new_tokens": 500, 
        "return_full_text": False, 
        "temperature": 0.5, 
        "do_sample": False, 
    } 

    with torch.no_grad():
        for output in tqdm(pipe(loader, **generation_args)):
            output_li.append(output)
            
    return output_li

In [12]:
outputs = test(test_dataset)
print(outputs[0])

The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'JambaForCausalLM', 'JetMoeForCausalLM', 'LlamaForCausalLM', 'MambaForCausalLM', 'Mamba2ForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MistralForCausalLM', 'MixtralForCausal

[{'generated_text': ' no'}]





In [13]:
pred_li = []
for output in outputs:
    # print(output[0].get('generated_text').strip())
    pred_li.append(output[0].get('generated_text').strip().lower())


In [14]:
pred_li

['no',
 'yes',
 'no',
 'no',
 'no',
 'no',
 'no',
 'no',
 'no',
 'no',
 'no',
 'no',
 'no',
 'no',
 'no',
 'yes',
 'no',
 'no',
 'no',
 'no',
 'no',
 'no',
 'no',
 'yes',
 'no',
 'no',
 'yes',
 'no',
 'no',
 'no',
 'no',
 'no',
 'yes',
 'yes',
 'yes',
 'no',
 'yes',
 'no',
 'no',
 'yes',
 'no',
 'yes',
 'no',
 'no',
 'no',
 'no',
 'yes',
 'no',
 'no',
 'no',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'no',
 'no',
 'no',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'no',
 'yes',
 'yes',
 'yes',
 'yes']

In [15]:
df = pd.DataFrame({'true': y_test, 'pred':pred_li})
df

Unnamed: 0,true,pred
2774,no,no
4083,no,yes
10063,no,no
3060,no,no
7219,no,no
...,...,...
1469,yes,no
3216,yes,yes
8590,yes,yes
4770,yes,yes


In [16]:
from sklearn.metrics import accuracy_score
accuracy_score(df['true'],df['pred'])

0.85

# Acc 기록

- 1e-5 : 0.8
- 3e-5 : 0.77
- 5e-5 : 0.83
- 1e-4 : 0.86
- 3e-4 : 0.85
- 5e-4 :
- 1e-3 :
- 3e-3 :
- 5e-3 :