## 학습된 모델 가중치 가져오기

In [1]:
import os
import easydict
import requests
import torch
import numpy as np
import pandas as pd

from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from keras_preprocessing.sequence import pad_sequences

In [2]:
def flat_accuracy(preds, labels):
    if len(preds.shape) > 1 and preds.shape[1] > 1:
        pred_flat = np.argmax(preds, axis=1).flatten()
    else:
        pred_flat = preds.flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

In [3]:
def generate_data_loader(file_path, tokenizer, args):

    def get_input_ids(data):
        document_bert = ["[CLS] " + str(s) + " [SEP]" for s in data]
        tokenized_texts = [tokenizer.tokenize(s) for s in tqdm(document_bert, "Tokenizing")]
        input_ids = [tokenizer.convert_tokens_to_ids(x) for x in tqdm(tokenized_texts, "Converting tokens to ids")]
        print("Padding sequences...")
        input_ids = pad_sequences(input_ids, maxlen=args.maxlen, dtype='long', truncating='post', padding='post')
        return input_ids

    def get_attention_masks(input_ids):
        attention_masks = []
        for seq in tqdm(input_ids, "Generating attention masks"):
            seq_mask = [float(i > 0) for i in seq]
            attention_masks.append(seq_mask)
        return attention_masks

    def get_data_loader(inputs, masks, labels, batch_size=args.batch):
        data = TensorDataset(torch.tensor(inputs), torch.tensor(masks), torch.tensor(labels))
        sampler = RandomSampler(data) if args.mode == 'train' else SequentialSampler(data)
        data_loader = DataLoader(data, sampler=sampler, batch_size=batch_size)
        return data_loader

    data_df = pd.read_csv(file_path)
    input_ids = get_input_ids(data_df['text'].values)
    attention_masks = get_attention_masks(input_ids)
    labels = data_df['label'].values  # 수정된 부분
    data_loader = get_data_loader(input_ids, attention_masks, labels)

    return data_loader, labels

In [4]:
def predict(model, args, data_loader):
    print('start predict')
    model.eval()

    eval_loss = []
    eval_accuracy = []
    logits = []

    for step, batch in tqdm(enumerate(data_loader)):
        batch = tuple(t.to(args.device) for t in batch)
        b_input_ids, b_input_mask, b_labels = batch

        with torch.no_grad():
            if args.mode == 'test':
                outputs = model(b_input_ids, attention_mask=b_input_mask)
                logit = outputs[0]
            else:
                outputs = model(b_input_ids, attention_mask=b_input_mask, labels=b_labels)
                loss, logit = outputs[:2]
                eval_loss.append(loss.item())

        logit = logit.detach().cpu().numpy()
        label = b_labels.cpu().numpy()

        logits.append(logit)

        if args.mode != 'test':
            accuracy = flat_accuracy(logit, label)
            eval_accuracy.append(accuracy)

    logits = np.vstack(logits)
    predict_labels = np.argmax(logits, axis=1)

    if args.mode == 'test':
        return predict_labels, None

    avg_eval_loss = np.mean(eval_loss)
    avg_eval_accuracy = np.mean(eval_accuracy)

    return predict_labels, avg_eval_loss, avg_eval_accuracy

In [6]:
# 상위 5개 모델 선택
top_n = 5

# 검증 정확도를 기준으로 내림차순 정렬하고 상위 N개 선택
df = pd.read_csv("./hparams_table.csv")
df

top_models = df.nlargest(top_n, 'best_val_accuracy')

# 선택된 모델들의 가중치 파일 경로 생성
model_paths = [f"best_models/model{i+1}.pth" for i in top_models.index]

model_paths


# 파라미터 설정
args = easydict.EasyDict({
    "valid_path": "./valid.csv",
    "device" : 'cpu',
    "mode" : "valid",
    "batch" : 64,
    "maxlen" : 32,
    "model_ckpt" : "monologg/koelectra-small-v3-discriminator",
})

# 토크나이저 생성
tokenizer = AutoTokenizer.from_pretrained(args.model_ckpt)

# 검증 데이터 로더 생성
valid_dataloader, valid_labels = generate_data_loader(args.valid_path, tokenizer, args)

# 각 모델에 대한 성능을 저장할 리스트
all_accuracies = []

# 모든 모델에 대해 반복
for model_path in model_paths:
    # 모델 생성
    model = AutoModelForSequenceClassification.from_pretrained(args.model_ckpt, num_labels=3)
    # 모델 가중치 로드
    model.load_state_dict(torch.load(model_path))
    model.to(args.device)
    model.eval()

    # 모델 예측 및 정확도 계산
    _, _, avg_eval_accuracy = predict(model, args, valid_dataloader)
    all_accuracies.append(avg_eval_accuracy)

for i, acc in enumerate(all_accuracies):
    print(f'모델 {i+1}의 검증 정확도: {acc}')

Tokenizing: 100%|██████████████████████████| 973/973 [00:00<00:00, 24849.04it/s]
Converting tokens to ids: 100%|███████████| 973/973 [00:00<00:00, 247351.83it/s]


Padding sequences...


Generating attention masks: 100%|█████████| 973/973 [00:00<00:00, 152757.07it/s]
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-small-v3-discriminator and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

16it [00:01,  9.97it/s]
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-small-v3-discriminator and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


start predict


16it [00:01, 10.15it/s]
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-small-v3-discriminator and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


start predict


16it [00:01, 10.23it/s]
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-small-v3-discriminator and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


start predict


16it [00:01, 10.12it/s]
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-small-v3-discriminator and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


start predict


16it [00:01, 10.67it/s]

모델 1의 검증 정확도: 0.94140625
모델 2의 검증 정확도: 0.9385516826923077
모델 3의 검증 정확도: 0.9404296875
모델 4의 검증 정확도: 0.9404296875
모델 5의 검증 정확도: 0.935546875





## 하드 보팅

In [7]:
def ensemble_predict(models, args, data_loader):
    print('start predict')
    all_logits = []

    for model in models:
        model.eval()
        model_logits = []

        for step, batch in tqdm(enumerate(data_loader)):
            batch = tuple(t.to(args.device) for t in batch)
            b_input_ids, b_input_mask, b_labels = batch

            with torch.no_grad():
                outputs = model(b_input_ids, attention_mask=b_input_mask)
                logits = outputs[0]

            logits = logits.detach().cpu().numpy()
            model_logits.append(logits)

        model_logits = np.vstack(model_logits)
        all_logits.append(model_logits)

    all_logits = np.stack(all_logits)
    ensemble_logits = np.sum(all_logits, axis=0)
    ensemble_preds = np.argmax(ensemble_logits, axis=1)

    return ensemble_preds

In [8]:
models = []

# 모든 모델에 대해 반복
for model_path in model_paths:
    # 모델 생성
    model = AutoModelForSequenceClassification.from_pretrained(args.model_ckpt, num_labels=3)
    # 모델 가중치 로드
    model.load_state_dict(torch.load(model_path))
    model.to(args.device)
    models.append(model)

# 앙상블 예측
ensemble_predictions = ensemble_predict(models, args, valid_dataloader)

# 앙상블 예측의 정확도 계산
accuracy = flat_accuracy(ensemble_predictions, valid_labels)
print(f'앙상블의 검증 정확도: {accuracy}')

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-small-v3-discriminator and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-small-v3-discriminator and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-small-v3-discriminator and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.wei

start predict


16it [00:01, 10.65it/s]
16it [00:01, 10.59it/s]
16it [00:01, 10.65it/s]
16it [00:01, 10.67it/s]
16it [00:01, 10.66it/s]

앙상블의 검증 정확도: 0.9383350462487153





## 소프트 보팅

In [9]:
def predict_proba(model, args, data_loader):
    print('start predict')
    model.eval()

    all_logits = []

    for step, batch in tqdm(enumerate(data_loader)):
        batch = tuple(t.to(args.device) for t in batch)
        b_input_ids, b_input_mask, b_labels = batch

        with torch.no_grad():
            outputs = model(b_input_ids, attention_mask=b_input_mask)
            logits = outputs[0]

        logits = logits.detach().cpu().numpy()
        all_logits.append(logits)

    all_logits = np.vstack(all_logits)
    predict_proba = np.exp(all_logits) / np.sum(np.exp(all_logits), axis=1, keepdims=True)

    return predict_proba

def ensemble_predict(models, args, data_loader):
    print('start predict')
    all_predict_proba = []

    for model in models:
        model.eval()
        model_predict_proba = predict_proba(model, args, data_loader)
        all_predict_proba.append(model_predict_proba)

    all_predict_proba = np.array(all_predict_proba)
    ensemble_predict_proba = np.mean(all_predict_proba, axis=0)
    ensemble_preds = np.argmax(ensemble_predict_proba, axis=1)

    return ensemble_preds

In [10]:
models = []

# 모든 모델에 대해 반복
for model_path in model_paths:
    # 모델 생성
    model = AutoModelForSequenceClassification.from_pretrained(args.model_ckpt, num_labels=3)
    # 모델 가중치 로드
    model.load_state_dict(torch.load(model_path))
    model.to(args.device)
    models.append(model)

# 앙상블 예측
ensemble_predictions = ensemble_predict(models, args, valid_dataloader)

# 앙상블 예측의 정확도 계산
accuracy = flat_accuracy(ensemble_predictions, valid_labels)
print(f'앙상블의 검증 정확도: {accuracy}')

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-small-v3-discriminator and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-small-v3-discriminator and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-small-v3-discriminator and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.wei

start predict
start predict


16it [00:01, 10.09it/s]


start predict


16it [00:01, 10.04it/s]


start predict


16it [00:01, 10.09it/s]


start predict


16it [00:01, 10.10it/s]


start predict


16it [00:01, 10.04it/s]

앙상블의 검증 정확도: 0.9393627954779034



