# 실험 1: AG News Full Text + SBERT-style BERT

## AG News + SBERT 실험 노트북

- 본 노트북은 HuggingFace `datasets`에서 AG News 데이터를 자동으로 다운로드합니다.
- 데이터 로딩과 라벨 구조는 `Baseline model(BERT)-agnews.ipynb`와 동일합니다.
- 이 노트북에서는 SBERT-style 구조로 의미 기반 분류를 수행합니다.

---

이 섹션에서는 AG News 데이터셋의 **전체 뉴스 본문**을 기반으로  
BERT(`bert-base-uncased`)에 Mean Pooling을 적용한 **SBERT-style 분류기**를 학습합니다.

- `datasets` 라이브러리를 통해 AG News 전체 본문 데이터를 로드합니다.
- 각 문장은 `bert-base-uncased`로 임베딩되며, [CLS] 벡터 대신 Mean Pooling을 적용합니다.
- short text 실험과 비교를 위해 동일한 구조의 분류기(`SBERTClassifier`)를 사용합니다.

---

## 데이터 로딩 및 라이브러리 안내

이 노트북은 HuggingFace `datasets` 라이브러리를 통해 AG News 데이터를 자동으로 다운로드합니다.  
따라서 별도의 데이터 파일을 GitHub에 업로드하지 않아도 됩니다.

### 필요한 라이브러리:

- `datasets`
- `transformers`
- `torch`

설치 명령어:

```bash
pip install datasets transformers torch


In [43]:
# 필수 라이브러리 불러오기
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW  # ✅ PyTorch에서 직접 불러오기

# HuggingFace Transformers 관련
from transformers import BertModel, BertTokenizer

# 기타 유틸
import random
import numpy as np

# 시드 고정 함수 정의
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

# 시드 적용
set_seed()


In [44]:
class SBERTClassifier(nn.Module):
    def __init__(self, pretrained_model_name='bert-base-uncased', hidden_dim=768, num_classes=2):
        super(SBERTClassifier, self).__init__()
        # 사전 학습된 BERT 모델 로드
        self.bert = BertModel.from_pretrained(pretrained_model_name)
        self.dropout = nn.Dropout(0.3)
        # 분류기: BERT의 [CLS] 벡터 또는 pooled 벡터를 이용
        self.classifier = nn.Linear(hidden_dim, num_classes)

    def mean_pooling(self, model_output, attention_mask):
        # SBERT 스타일 Mean Pooling
        token_embeddings = model_output.last_hidden_state  # (batch, seq_len, hidden)
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
        sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, dim=1)
        sum_mask = input_mask_expanded.sum(dim=1)
        return sum_embeddings / sum_mask  # 평균 벡터 반환

    def forward(self, input_ids, attention_mask):
        # BERT forward
        output = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = self.mean_pooling(output, attention_mask)
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)
        return logits

In [45]:
from transformers import BertTokenizer

# BERT 기반 토크나이저 로드
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [46]:
from torch.utils.data import Dataset
import torch

class SentenceClassificationDataset(Dataset):
    def __init__(self, sentences, labels, tokenizer, max_length=128):
        # 토큰화된 입력 저장
        self.encodings = tokenizer(
            sentences,
            padding=True,
            truncation=True,
            max_length=max_length,
            return_tensors="pt"
        )
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        # idx번째 샘플 구성
        item = {key: val[idx] for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

In [47]:
from torch.optim import AdamW
from torch.nn import CrossEntropyLoss
from tqdm import tqdm  # 일반 tqdm 사용

def train_model(model, dataset, epochs=3, batch_size=16, learning_rate=2e-5, device='cuda' if torch.cuda.is_available() else 'cpu'):
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    optimizer = AdamW(model.parameters(), lr=learning_rate)
    loss_fn = CrossEntropyLoss()

    model.to(device)
    model.train()

    for epoch in range(epochs):
        total_loss = 0
        print(f"\n[Epoch {epoch+1}/{epochs}]")
        
        for i, batch in enumerate(tqdm(dataloader, desc="Training", leave=True)):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            optimizer.zero_grad()
            outputs = model(input_ids, attention_mask)
            loss = loss_fn(outputs, labels)
            total_loss += loss.item()

            loss.backward()
            optimizer.step()

            # tqdm 외에도 주기적으로 직접 출력
            if i % 100 == 0:
                print(f"Batch {i}/{len(dataloader)} | Loss: {loss.item():.4f}")

        avg_loss = total_loss / len(dataloader)
        print(f"\nEpoch {epoch+1} Completed | Avg Loss: {avg_loss:.4f}")

In [48]:
def evaluate_model(model, dataset, batch_size=16, device='cuda' if torch.cuda.is_available() else 'cpu'):
    dataloader = DataLoader(dataset, batch_size=batch_size)
    correct = 0
    total = 0

    model.to(device)
    model.eval()

    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids, attention_mask)  # logits
            predictions = torch.argmax(outputs, dim=1)  # 예측 결과
            correct += (predictions == labels).sum().item()
            total += labels.size(0)

    accuracy = correct / total
    print(f"Accuracy: {accuracy:.4f}")
    return accuracy

In [49]:
from datasets import load_dataset

# AG News 데이터셋 로드
dataset = load_dataset("ag_news")
train_data = dataset['train']
test_data = dataset['test']

# ✅ 수정된 문장 및 라벨 추출
train_sentences = [x['text'] for x in train_data]
train_labels = [x['label'] for x in train_data]

test_sentences = [x['text'] for x in test_data]
test_labels = [x['label'] for x in test_data]

In [50]:
# 전처리된 문장 + 라벨 → SBERT 전용 Dataset 생성
train_dataset = SentenceClassificationDataset(train_sentences, train_labels, tokenizer)
test_dataset = SentenceClassificationDataset(test_sentences, test_labels, tokenizer)

Exception ignored in: <function tqdm.__del__ at 0x7f0f4e50f0a0>
Traceback (most recent call last):
  File "/home/elicer/.local/lib/python3.10/site-packages/tqdm/std.py", line 1148, in __del__
    self.close()
  File "/home/elicer/.local/lib/python3.10/site-packages/tqdm/notebook.py", line 279, in close
    self.disp(bar_style='danger', check_delay=False)
AttributeError: 'tqdm_notebook' object has no attribute 'disp'


In [31]:
# 4개 클래스 분류기
model = SBERTClassifier(num_classes=4)

# 학습 실행 (필요시 epoch/batch_size 조절)
train_model(model, train_dataset, epochs=3, batch_size=32, learning_rate=2e-5)


[Epoch 1/3]




Batch 0/3750 | Loss: 1.3450




Batch 100/3750 | Loss: 0.2515




Batch 200/3750 | Loss: 0.2855




Batch 300/3750 | Loss: 0.4033




Batch 400/3750 | Loss: 0.1755




Batch 500/3750 | Loss: 0.2962




Batch 600/3750 | Loss: 0.4930




Batch 700/3750 | Loss: 0.2413




Batch 800/3750 | Loss: 0.2850




Batch 900/3750 | Loss: 0.2274




Batch 1000/3750 | Loss: 0.2119




Batch 1100/3750 | Loss: 0.1052




Batch 1200/3750 | Loss: 0.2090




Batch 1300/3750 | Loss: 0.1032




Batch 1400/3750 | Loss: 0.2014




Batch 1500/3750 | Loss: 0.1441




Batch 1600/3750 | Loss: 0.2463




Batch 1700/3750 | Loss: 0.2210




Batch 1800/3750 | Loss: 0.1737




Batch 1900/3750 | Loss: 0.4950




Batch 2000/3750 | Loss: 0.1680




Batch 2100/3750 | Loss: 0.4011




Batch 2200/3750 | Loss: 0.2395




Batch 2300/3750 | Loss: 0.1074




Batch 2400/3750 | Loss: 0.1123




Batch 2500/3750 | Loss: 0.2951




Batch 2600/3750 | Loss: 0.5128




Batch 2700/3750 | Loss: 0.1056




Batch 2800/3750 | Loss: 0.0832




Batch 2900/3750 | Loss: 0.1587




Batch 3000/3750 | Loss: 0.2182




Batch 3100/3750 | Loss: 0.1006




Batch 3200/3750 | Loss: 0.1617




Batch 3300/3750 | Loss: 0.1148




Batch 3400/3750 | Loss: 0.1581




Batch 3500/3750 | Loss: 0.0846




Batch 3600/3750 | Loss: 0.1638




Batch 3700/3750 | Loss: 0.0456


Training: 100%|██████████| 3750/3750 [32:04<00:00,  1.95it/s]



Epoch 1 Completed | Avg Loss: 0.2074

[Epoch 2/3]




Batch 0/3750 | Loss: 0.1345




Batch 100/3750 | Loss: 0.0109




Batch 200/3750 | Loss: 0.1389




Batch 300/3750 | Loss: 0.0703




Batch 400/3750 | Loss: 0.1772




Batch 500/3750 | Loss: 0.0325




Batch 600/3750 | Loss: 0.4111




Batch 700/3750 | Loss: 0.0390




Batch 800/3750 | Loss: 0.0433




Batch 900/3750 | Loss: 0.1573




Batch 1000/3750 | Loss: 0.0746




Batch 1100/3750 | Loss: 0.1424




Batch 1200/3750 | Loss: 0.1051




Batch 1300/3750 | Loss: 0.2084




Batch 1400/3750 | Loss: 0.2008




Batch 1500/3750 | Loss: 0.1814




Batch 1600/3750 | Loss: 0.1972




Batch 1700/3750 | Loss: 0.1699




Batch 1800/3750 | Loss: 0.0871




Batch 1900/3750 | Loss: 0.1221




Batch 2000/3750 | Loss: 0.0195




Batch 2100/3750 | Loss: 0.1394




Batch 2200/3750 | Loss: 0.1001




Batch 2300/3750 | Loss: 0.0758




Batch 2400/3750 | Loss: 0.1325




Batch 2500/3750 | Loss: 0.2318




Batch 2600/3750 | Loss: 0.0718




Batch 2700/3750 | Loss: 0.1731




Batch 2800/3750 | Loss: 0.2541




Batch 2900/3750 | Loss: 0.2263




Batch 3000/3750 | Loss: 0.1089




Batch 3100/3750 | Loss: 0.0346




Batch 3200/3750 | Loss: 0.0853




Batch 3300/3750 | Loss: 0.1244




Batch 3400/3750 | Loss: 0.1276




Batch 3500/3750 | Loss: 0.0298




Batch 3600/3750 | Loss: 0.0851




Batch 3700/3750 | Loss: 0.1799


Training: 100%|██████████| 3750/3750 [32:04<00:00,  1.95it/s]



Epoch 2 Completed | Avg Loss: 0.1252

[Epoch 3/3]




Batch 0/3750 | Loss: 0.1562




Batch 100/3750 | Loss: 0.0275




Batch 200/3750 | Loss: 0.1072




Batch 300/3750 | Loss: 0.2004




Batch 400/3750 | Loss: 0.1178




Batch 500/3750 | Loss: 0.0431




Batch 600/3750 | Loss: 0.0041




Batch 700/3750 | Loss: 0.1524




Batch 800/3750 | Loss: 0.0975




Batch 900/3750 | Loss: 0.1148




Batch 1000/3750 | Loss: 0.0742




Batch 1100/3750 | Loss: 0.0571




Batch 1200/3750 | Loss: 0.0109




Batch 1300/3750 | Loss: 0.0213




Batch 1400/3750 | Loss: 0.0420




Batch 1500/3750 | Loss: 0.0158




Batch 1600/3750 | Loss: 0.0765




Batch 1700/3750 | Loss: 0.0397




Batch 1800/3750 | Loss: 0.0052




Batch 1900/3750 | Loss: 0.0228




Batch 2000/3750 | Loss: 0.2482




Batch 2100/3750 | Loss: 0.0205




Batch 2200/3750 | Loss: 0.0332




Batch 2300/3750 | Loss: 0.1910




Batch 2400/3750 | Loss: 0.0239




Batch 2500/3750 | Loss: 0.0326




Batch 2600/3750 | Loss: 0.1153




Batch 2700/3750 | Loss: 0.3288




Batch 2800/3750 | Loss: 0.1358




Batch 2900/3750 | Loss: 0.1001




Batch 3000/3750 | Loss: 0.1769




Batch 3100/3750 | Loss: 0.1537




Batch 3200/3750 | Loss: 0.1987




Batch 3300/3750 | Loss: 0.0766




Batch 3400/3750 | Loss: 0.1221




Batch 3500/3750 | Loss: 0.0059




Batch 3600/3750 | Loss: 0.0874




Batch 3700/3750 | Loss: 0.0627


Training: 100%|██████████| 3750/3750 [32:04<00:00,  1.95it/s]


Epoch 3 Completed | Avg Loss: 0.0842





In [None]:
# 모델 훈련이 끝난 후 저장 (파일명은 자유롭게 지정 가능)
torch.save(model.state_dict(), "sbert_model.pt")
print("✅ 모델이 'sbert_model.pt'로 저장되었습니다.")

In [53]:
def evaluate_model(model, dataset, batch_size=32, device='cuda' if torch.cuda.is_available() else 'cpu'):
    dataloader = DataLoader(dataset, batch_size=batch_size)
    model.to(device)
    model.eval()

    correct = 0
    total = 0
    total_loss = 0
    loss_fn = CrossEntropyLoss()

    from time import time
    start_time = time()

    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids, attention_mask)
            loss = loss_fn(outputs, labels)
            preds = torch.argmax(outputs, dim=1)

            total_loss += loss.item()
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    end_time = time()
    acc = correct / total
    avg_loss = total_loss / len(dataloader)
    runtime = end_time - start_time

    result = {
        'eval_loss': avg_loss,
        'eval_accuracy': acc,
        'eval_runtime': round(runtime, 4),
        'eval_samples_per_second': round(total / runtime, 2),
        'eval_steps_per_second': round(len(dataloader) / runtime, 2)
    }

    print("SBERT Evaluation Results:", result)  # ✅ baseline 스타일로 맞춤
    return result

In [54]:
sbert_eval_results = evaluate_model(model, test_dataset)
print(sbert_eval_results)

Evaluating:   0%|          | 0/238 [00:00<?, ?it/s]

Evaluating: 100%|██████████| 238/238 [00:39<00:00,  6.05it/s]

SBERT Evaluation Results: {'eval_loss': 0.17676264306279918, 'eval_accuracy': 0.9432894736842106, 'eval_runtime': 39.3363, 'eval_samples_per_second': 193.21, 'eval_steps_per_second': 6.05}
{'eval_loss': 0.17676264306279918, 'eval_accuracy': 0.9432894736842106, 'eval_runtime': 39.3363, 'eval_samples_per_second': 193.21, 'eval_steps_per_second': 6.05}





# 실험 2: AG News Short Text (Headline Only) + SBERT-style BERT

이 섹션에서는 AG News 데이터셋에서 **헤드라인(첫 문장)**만 추출한 short text를 기반으로  
SBERT-style BERT 분류기를 학습합니다.

- `text.split(".")[0]`을 사용해 뉴스 본문에서 첫 번째 문장(헤드라인)만 추출합니다.
- 전체 본문 실험과 동일하게 `bert-base-uncased` 모델과 Mean Pooling 기반 구조를 사용합니다.
- short text 환경에서 SBERT-style 모델의 표현력과 분류 성능을 확인하고,  
  **full text 실험(실험 1)**과 성능을 비교합니다.

---

## 실험 목표

- 짧은 문장에서는 의미 기반 임베딩(SBERT-style)의 성능이 어떻게 나타나는지 측정
- 동일한 모델 구조에서 입력 문장의 길이에 따른 성능 변화를 관찰

In [57]:
from datasets import load_dataset

# 셀 1: 데이터 로드 및 헤드라인만 추출
dataset = load_dataset("ag_news")
train_data = dataset["train"]
test_data = dataset["test"]

# 헤드라인만 사용 (일부 샘플만 추출 가능)
train_sentences = [x['text'].split('.')[0] for x in train_data]
train_labels = [x['label'] for x in train_data]
test_sentences = [x['text'].split('.')[0] for x in test_data]
test_labels = [x['label'] for x in test_data]

import pandas as pd
# 데이터 일부 확인
sample_df = pd.DataFrame({
    'headline': train_sentences[:5],
    'label': train_labels[:5]
})

In [58]:
from torch.utils.data import Dataset

class SentenceClassificationDataset(Dataset):
    def __init__(self, sentences, labels, tokenizer, max_length=32):
        # max_length를 줄여서 짧은 헤드라인 중심으로 실험
        self.encodings = tokenizer(
            sentences,
            padding='max_length',
            truncation=True,
            max_length=max_length,
            return_tensors="pt"
        )
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

# 토크나이저 로드 (bert-base-uncased)
from transformers import BertTokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Dataset 생성
train_dataset = SentenceClassificationDataset(train_sentences, train_labels, tokenizer)
test_dataset = SentenceClassificationDataset(test_sentences, test_labels, tokenizer)

In [60]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.optim import AdamW
from torch.nn import CrossEntropyLoss
from tqdm import tqdm

# SBERT 모델 정의
class SBERTClassifier(nn.Module):
    def __init__(self, pretrained_model_name='bert-base-uncased', hidden_dim=768, num_classes=4):
        super(SBERTClassifier, self).__init__()
        self.bert = BertModel.from_pretrained(pretrained_model_name)
        self.dropout = nn.Dropout(0.3)
        self.classifier = nn.Linear(hidden_dim, num_classes)

    def mean_pooling(self, model_output, attention_mask):
        token_embeddings = model_output.last_hidden_state
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
        sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, dim=1)
        sum_mask = input_mask_expanded.sum(dim=1)
        return sum_embeddings / sum_mask

    def forward(self, input_ids, attention_mask):
        output = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = self.mean_pooling(output, attention_mask)
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)
        return logits

# 학습 함수 정의
def train_model(model, dataset, epochs=3, batch_size=32, learning_rate=2e-5, device='cuda' if torch.cuda.is_available() else 'cpu'):
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    optimizer = AdamW(model.parameters(), lr=learning_rate)
    loss_fn = CrossEntropyLoss()

    model.to(device)
    model.train()

    for epoch in range(epochs):
        total_loss = 0
        print(f"\n[Epoch {epoch+1}/{epochs}]")
        for i, batch in enumerate(tqdm(dataloader, desc="Training", leave=True)):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            optimizer.zero_grad()
            outputs = model(input_ids, attention_mask)
            loss = loss_fn(outputs, labels)
            total_loss += loss.item()

            loss.backward()
            optimizer.step()

            if i % 100 == 0:
                print(f"Batch {i}/{len(dataloader)} | Loss: {loss.item():.4f}")

        avg_loss = total_loss / len(dataloader)
        print(f"Epoch {epoch+1} Completed | Avg Loss: {avg_loss:.4f}")

In [61]:
# SBERT 모델 인스턴스 생성 (4개 클래스 분류)
model = SBERTClassifier(num_classes=4)

# 학습 실행
train_model(
    model=model,
    dataset=train_dataset,
    epochs=3,
    batch_size=32,
    learning_rate=2e-5
)


[Epoch 1/3]


Training:   0%|          | 1/3750 [00:00<10:18,  6.06it/s]

Batch 0/3750 | Loss: 1.4609


Training:   3%|▎         | 101/3750 [00:15<10:57,  5.55it/s]

Batch 100/3750 | Loss: 0.3670


Training:   5%|▌         | 201/3750 [00:30<10:20,  5.72it/s]

Batch 200/3750 | Loss: 0.4710


Training:   8%|▊         | 301/3750 [00:46<10:08,  5.66it/s]

Batch 300/3750 | Loss: 0.2923


Training:  11%|█         | 401/3750 [01:01<09:46,  5.71it/s]

Batch 400/3750 | Loss: 0.3085


Training:  13%|█▎        | 501/3750 [01:16<09:45,  5.55it/s]

Batch 500/3750 | Loss: 0.4772


Training:  16%|█▌        | 601/3750 [01:32<09:11,  5.71it/s]

Batch 600/3750 | Loss: 0.4319


Training:  19%|█▊        | 701/3750 [01:47<09:09,  5.55it/s]

Batch 700/3750 | Loss: 0.4810


Training:  21%|██▏       | 801/3750 [02:02<08:38,  5.68it/s]

Batch 800/3750 | Loss: 0.1381


Training:  24%|██▍       | 901/3750 [02:18<08:18,  5.71it/s]

Batch 900/3750 | Loss: 0.2241


Training:  27%|██▋       | 1001/3750 [02:33<08:00,  5.72it/s]

Batch 1000/3750 | Loss: 0.2318


Training:  29%|██▉       | 1101/3750 [02:48<07:48,  5.65it/s]

Batch 1100/3750 | Loss: 0.0997


Training:  32%|███▏      | 1201/3750 [03:04<07:38,  5.56it/s]

Batch 1200/3750 | Loss: 0.2517


Training:  35%|███▍      | 1301/3750 [03:19<07:12,  5.67it/s]

Batch 1300/3750 | Loss: 0.2958


Training:  37%|███▋      | 1401/3750 [03:34<07:02,  5.56it/s]

Batch 1400/3750 | Loss: 0.1993


Training:  40%|████      | 1501/3750 [03:50<06:33,  5.72it/s]

Batch 1500/3750 | Loss: 0.1975


Training:  43%|████▎     | 1601/3750 [04:05<06:27,  5.55it/s]

Batch 1600/3750 | Loss: 0.0977


Training:  45%|████▌     | 1701/3750 [04:20<06:08,  5.56it/s]

Batch 1700/3750 | Loss: 0.3212


Training:  48%|████▊     | 1801/3750 [04:36<05:51,  5.55it/s]

Batch 1800/3750 | Loss: 0.1885


Training:  51%|█████     | 1901/3750 [04:51<05:25,  5.69it/s]

Batch 1900/3750 | Loss: 0.2833


Training:  53%|█████▎    | 2001/3750 [05:06<05:06,  5.71it/s]

Batch 2000/3750 | Loss: 0.3454


Training:  56%|█████▌    | 2101/3750 [05:22<04:52,  5.63it/s]

Batch 2100/3750 | Loss: 0.3297


Training:  59%|█████▊    | 2201/3750 [05:37<04:38,  5.55it/s]

Batch 2200/3750 | Loss: 0.3913


Training:  61%|██████▏   | 2301/3750 [05:52<04:21,  5.55it/s]

Batch 2300/3750 | Loss: 0.2703


Training:  64%|██████▍   | 2401/3750 [06:08<03:57,  5.68it/s]

Batch 2400/3750 | Loss: 0.2434


Training:  67%|██████▋   | 2501/3750 [06:23<03:38,  5.71it/s]

Batch 2500/3750 | Loss: 0.4154


Training:  69%|██████▉   | 2601/3750 [06:38<03:24,  5.63it/s]

Batch 2600/3750 | Loss: 0.3675


Training:  72%|███████▏  | 2701/3750 [06:54<03:06,  5.62it/s]

Batch 2700/3750 | Loss: 0.5195


Training:  75%|███████▍  | 2801/3750 [07:09<02:51,  5.55it/s]

Batch 2800/3750 | Loss: 0.1487


Training:  77%|███████▋  | 2901/3750 [07:24<02:29,  5.70it/s]

Batch 2900/3750 | Loss: 0.2606


Training:  80%|████████  | 3001/3750 [07:40<02:14,  5.55it/s]

Batch 3000/3750 | Loss: 0.3122


Training:  83%|████████▎ | 3101/3750 [07:55<01:54,  5.66it/s]

Batch 3100/3750 | Loss: 0.1872


Training:  85%|████████▌ | 3201/3750 [08:10<01:38,  5.55it/s]

Batch 3200/3750 | Loss: 0.1878


Training:  88%|████████▊ | 3301/3750 [08:26<01:18,  5.72it/s]

Batch 3300/3750 | Loss: 0.2766


Training:  91%|█████████ | 3401/3750 [08:41<01:02,  5.58it/s]

Batch 3400/3750 | Loss: 0.2316


Training:  93%|█████████▎| 3501/3750 [08:56<00:43,  5.71it/s]

Batch 3500/3750 | Loss: 0.2144


Training:  96%|█████████▌| 3601/3750 [09:11<00:26,  5.55it/s]

Batch 3600/3750 | Loss: 0.0655


Training:  99%|█████████▊| 3701/3750 [09:27<00:08,  5.55it/s]

Batch 3700/3750 | Loss: 0.1905


Training: 100%|██████████| 3750/3750 [09:34<00:00,  6.52it/s]


Epoch 1 Completed | Avg Loss: 0.2883

[Epoch 2/3]


Training:   0%|          | 1/3750 [00:00<14:28,  4.32it/s]

Batch 0/3750 | Loss: 0.3707


Training:   3%|▎         | 101/3750 [00:15<10:56,  5.56it/s]

Batch 100/3750 | Loss: 0.1695


Training:   5%|▌         | 201/3750 [00:30<10:40,  5.54it/s]

Batch 200/3750 | Loss: 0.2187


Training:   8%|▊         | 301/3750 [00:46<10:07,  5.67it/s]

Batch 300/3750 | Loss: 0.2144


Training:  11%|█         | 401/3750 [01:01<10:03,  5.55it/s]

Batch 400/3750 | Loss: 0.0689


Training:  13%|█▎        | 501/3750 [01:16<09:45,  5.55it/s]

Batch 500/3750 | Loss: 0.3691


Training:  16%|█▌        | 601/3750 [01:32<09:11,  5.71it/s]

Batch 600/3750 | Loss: 0.3975


Training:  19%|█▊        | 701/3750 [01:47<08:53,  5.71it/s]

Batch 700/3750 | Loss: 0.1111


Training:  21%|██▏       | 801/3750 [02:02<08:39,  5.67it/s]

Batch 800/3750 | Loss: 0.3413


Training:  24%|██▍       | 901/3750 [02:18<08:18,  5.71it/s]

Batch 900/3750 | Loss: 0.1257


Training:  27%|██▋       | 1001/3750 [02:33<08:05,  5.66it/s]

Batch 1000/3750 | Loss: 0.1021


Training:  29%|██▉       | 1101/3750 [02:48<07:43,  5.71it/s]

Batch 1100/3750 | Loss: 0.1611


Training:  32%|███▏      | 1201/3750 [03:04<07:26,  5.72it/s]

Batch 1200/3750 | Loss: 0.4914


Training:  35%|███▍      | 1301/3750 [03:19<07:13,  5.66it/s]

Batch 1300/3750 | Loss: 0.1770


Training:  37%|███▋      | 1401/3750 [03:34<06:51,  5.71it/s]

Batch 1400/3750 | Loss: 0.1659


Training:  40%|████      | 1501/3750 [03:50<06:36,  5.68it/s]

Batch 1500/3750 | Loss: 0.1952


Training:  43%|████▎     | 1601/3750 [04:05<06:18,  5.67it/s]

Batch 1600/3750 | Loss: 0.1765


Training:  45%|████▌     | 1701/3750 [04:20<05:58,  5.72it/s]

Batch 1700/3750 | Loss: 0.0771


Training:  48%|████▊     | 1801/3750 [04:36<05:42,  5.69it/s]

Batch 1800/3750 | Loss: 0.0605


Training:  51%|█████     | 1901/3750 [04:51<05:33,  5.55it/s]

Batch 1900/3750 | Loss: 0.2818


Training:  53%|█████▎    | 2001/3750 [05:06<05:15,  5.55it/s]

Batch 2000/3750 | Loss: 0.0610


Training:  56%|█████▌    | 2101/3750 [05:22<04:49,  5.69it/s]

Batch 2100/3750 | Loss: 0.1272


Training:  59%|█████▊    | 2201/3750 [05:37<04:33,  5.65it/s]

Batch 2200/3750 | Loss: 0.1809


Training:  61%|██████▏   | 2301/3750 [05:52<04:14,  5.68it/s]

Batch 2300/3750 | Loss: 0.0794


Training:  64%|██████▍   | 2401/3750 [06:08<03:56,  5.70it/s]

Batch 2400/3750 | Loss: 0.1630


Training:  67%|██████▋   | 2501/3750 [06:23<03:38,  5.72it/s]

Batch 2500/3750 | Loss: 0.3667


Training:  69%|██████▉   | 2601/3750 [06:38<03:27,  5.55it/s]

Batch 2600/3750 | Loss: 0.1112


Training:  72%|███████▏  | 2701/3750 [06:54<03:03,  5.71it/s]

Batch 2700/3750 | Loss: 0.5044


Training:  75%|███████▍  | 2801/3750 [07:09<02:50,  5.56it/s]

Batch 2800/3750 | Loss: 0.0195


Training:  77%|███████▋  | 2901/3750 [07:24<02:28,  5.71it/s]

Batch 2900/3750 | Loss: 0.3347


Training:  80%|████████  | 3001/3750 [07:40<02:14,  5.55it/s]

Batch 3000/3750 | Loss: 0.2125


Training:  83%|████████▎ | 3101/3750 [07:55<01:56,  5.55it/s]

Batch 3100/3750 | Loss: 0.1313


Training:  85%|████████▌ | 3201/3750 [08:10<01:36,  5.71it/s]

Batch 3200/3750 | Loss: 0.3233


Training:  88%|████████▊ | 3301/3750 [08:26<01:19,  5.67it/s]

Batch 3300/3750 | Loss: 0.1772


Training:  91%|█████████ | 3401/3750 [08:41<01:01,  5.71it/s]

Batch 3400/3750 | Loss: 0.0959


Training:  93%|█████████▎| 3501/3750 [08:56<00:43,  5.72it/s]

Batch 3500/3750 | Loss: 0.3200


Training:  96%|█████████▌| 3601/3750 [09:12<00:26,  5.69it/s]

Batch 3600/3750 | Loss: 0.3981


Training:  99%|█████████▊| 3701/3750 [09:27<00:08,  5.58it/s]

Batch 3700/3750 | Loss: 0.2772


Training: 100%|██████████| 3750/3750 [09:34<00:00,  6.52it/s]


Epoch 2 Completed | Avg Loss: 0.1847

[Epoch 3/3]


Training:   0%|          | 1/3750 [00:00<13:26,  4.65it/s]

Batch 0/3750 | Loss: 0.1354


Training:   3%|▎         | 101/3750 [00:15<10:57,  5.55it/s]

Batch 100/3750 | Loss: 0.0072


Training:   5%|▌         | 201/3750 [00:30<10:39,  5.55it/s]

Batch 200/3750 | Loss: 0.2057


Training:   8%|▊         | 301/3750 [00:46<10:07,  5.68it/s]

Batch 300/3750 | Loss: 0.1573


Training:  11%|█         | 401/3750 [01:01<09:45,  5.72it/s]

Batch 400/3750 | Loss: 0.0783


Training:  13%|█▎        | 501/3750 [01:16<09:41,  5.58it/s]

Batch 500/3750 | Loss: 0.0867


Training:  16%|█▌        | 601/3750 [01:32<09:11,  5.71it/s]

Batch 600/3750 | Loss: 0.1226


Training:  19%|█▊        | 701/3750 [01:47<08:54,  5.70it/s]

Batch 700/3750 | Loss: 0.2597


Training:  21%|██▏       | 801/3750 [02:02<08:43,  5.64it/s]

Batch 800/3750 | Loss: 0.1239


Training:  24%|██▍       | 901/3750 [02:18<08:19,  5.71it/s]

Batch 900/3750 | Loss: 0.0736


Training:  27%|██▋       | 1001/3750 [02:33<08:03,  5.68it/s]

Batch 1000/3750 | Loss: 0.0949


Training:  29%|██▉       | 1101/3750 [02:48<07:44,  5.70it/s]

Batch 1100/3750 | Loss: 0.0141


Training:  32%|███▏      | 1201/3750 [03:04<07:25,  5.72it/s]

Batch 1200/3750 | Loss: 0.1008


Training:  35%|███▍      | 1301/3750 [03:19<07:10,  5.69it/s]

Batch 1300/3750 | Loss: 0.1410


Training:  37%|███▋      | 1401/3750 [03:34<06:51,  5.72it/s]

Batch 1400/3750 | Loss: 0.0914


Training:  40%|████      | 1501/3750 [03:50<06:39,  5.63it/s]

Batch 1500/3750 | Loss: 0.0709


Training:  43%|████▎     | 1601/3750 [04:05<06:27,  5.55it/s]

Batch 1600/3750 | Loss: 0.1164


Training:  45%|████▌     | 1701/3750 [04:20<05:58,  5.71it/s]

Batch 1700/3750 | Loss: 0.0904


Training:  48%|████▊     | 1801/3750 [04:36<05:43,  5.68it/s]

Batch 1800/3750 | Loss: 0.0454


Training:  51%|█████     | 1901/3750 [04:51<05:23,  5.72it/s]

Batch 1900/3750 | Loss: 0.1788


Training:  53%|█████▎    | 2001/3750 [05:06<05:15,  5.55it/s]

Batch 2000/3750 | Loss: 0.2940


Training:  56%|█████▌    | 2101/3750 [05:22<04:48,  5.71it/s]

Batch 2100/3750 | Loss: 0.1993


Training:  59%|█████▊    | 2201/3750 [05:37<04:31,  5.71it/s]

Batch 2200/3750 | Loss: 0.0532


Training:  61%|██████▏   | 2301/3750 [05:52<04:14,  5.68it/s]

Batch 2300/3750 | Loss: 0.0391


Training:  64%|██████▍   | 2401/3750 [06:08<03:56,  5.71it/s]

Batch 2400/3750 | Loss: 0.1092


Training:  67%|██████▋   | 2501/3750 [06:23<03:38,  5.72it/s]

Batch 2500/3750 | Loss: 0.0308


Training:  69%|██████▉   | 2601/3750 [06:38<03:22,  5.68it/s]

Batch 2600/3750 | Loss: 0.0446


Training:  72%|███████▏  | 2701/3750 [06:54<03:04,  5.70it/s]

Batch 2700/3750 | Loss: 0.2531


Training:  75%|███████▍  | 2801/3750 [07:09<02:47,  5.67it/s]

Batch 2800/3750 | Loss: 0.1164


Training:  77%|███████▋  | 2901/3750 [07:24<02:28,  5.70it/s]

Batch 2900/3750 | Loss: 0.0596


Training:  80%|████████  | 3001/3750 [07:40<02:11,  5.71it/s]

Batch 3000/3750 | Loss: 0.2763


Training:  83%|████████▎ | 3101/3750 [07:55<01:55,  5.63it/s]

Batch 3100/3750 | Loss: 0.0752


Training:  85%|████████▌ | 3201/3750 [08:10<01:36,  5.71it/s]

Batch 3200/3750 | Loss: 0.0418


Training:  88%|████████▊ | 3301/3750 [08:26<01:19,  5.67it/s]

Batch 3300/3750 | Loss: 0.0094


Training:  91%|█████████ | 3401/3750 [08:41<01:01,  5.67it/s]

Batch 3400/3750 | Loss: 0.1062


Training:  93%|█████████▎| 3501/3750 [08:56<00:43,  5.72it/s]

Batch 3500/3750 | Loss: 0.2004


Training:  96%|█████████▌| 3601/3750 [09:12<00:26,  5.69it/s]

Batch 3600/3750 | Loss: 0.1418


Training:  99%|█████████▊| 3701/3750 [09:27<00:08,  5.71it/s]

Batch 3700/3750 | Loss: 0.1292


Training: 100%|██████████| 3750/3750 [09:34<00:00,  6.52it/s]

Epoch 3 Completed | Avg Loss: 0.1303





In [63]:
from torch.nn import CrossEntropyLoss
from time import time

def evaluate_model(model, dataset, batch_size=32, device='cuda' if torch.cuda.is_available() else 'cpu'):
    dataloader = DataLoader(dataset, batch_size=batch_size)
    model.to(device)
    model.eval()

    correct = 0
    total = 0
    total_loss = 0
    loss_fn = CrossEntropyLoss()

    start_time = time()

    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids, attention_mask)
            loss = loss_fn(outputs, labels)
            preds = torch.argmax(outputs, dim=1)

            total_loss += loss.item()
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    end_time = time()
    acc = correct / total
    avg_loss = total_loss / len(dataloader)
    runtime = end_time - start_time

    result = {
        'eval_loss': avg_loss,
        'eval_accuracy': acc,
        'eval_runtime': round(runtime, 4),
        'eval_samples_per_second': round(total / runtime, 2),
        'eval_steps_per_second': round(len(dataloader) / runtime, 2)
    }

    print(f"\n✅ SBERT Evaluation Results: {result}")
    return result


# 평가 실행
sbert_eval_results = evaluate_model(model, test_dataset)

Evaluating: 100%|██████████| 238/238 [00:11<00:00, 20.48it/s]


✅ SBERT Evaluation Results: {'eval_loss': 0.4764830847309918, 'eval_accuracy': 0.8331578947368421, 'eval_runtime': 11.6217, 'eval_samples_per_second': 653.95, 'eval_steps_per_second': 20.48}



