In [10]:
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"Current GPU: {torch.cuda.get_device_name()}")

PyTorch version: 2.5.1+cpu
CUDA available: False
Using device: cpu


In [8]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from transformers import AutoTokenizer, AutoModel, get_linear_schedule_with_warmup
import pandas as pd
import numpy as np
from sklearn.utils.class_weight import compute_class_weight
import os

In [9]:
# 디바이스 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# 하이퍼파라미터 설정
EPOCHS = 20
BATCH_SIZE = 32
LEARNING_RATE = 2e-5
WARMUP_STEPS = 100

# 파일 경로 설정
DATA_PATH = r"D:\project\OSS_Project\AI\text-model\data\preprocessed_data.csv"

class MultiLabelClassifier(nn.Module):
    def __init__(self, tokenizer_name=None):
        super().__init__()
        if tokenizer_name is None:
            tokenizer_name = "klue/bert-base"
            
        self.bert = AutoModel.from_pretrained(tokenizer_name)
        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
        
        # 중간 레이어
        self.intermediate = nn.Sequential(
            nn.Linear(768, 512),
            nn.ReLU(),
            nn.BatchNorm1d(512),
            nn.Dropout(0.3),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.BatchNorm1d(256),
            nn.Dropout(0.3)
        )
        
        # 분류기 레이어들
        self.classifier_도수 = nn.Sequential(
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 4)
        )
        self.classifier_술종류 = nn.Sequential(
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 5)
        )
        self.classifier_맛 = nn.Sequential(
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 6)
        )

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs[0][:, 0, :]
        intermediate_output = self.intermediate(pooled_output)
        
        return {
            '도수': self.classifier_도수(intermediate_output),
            '술종류': self.classifier_술종류(intermediate_output),
            '맛': self.classifier_맛(intermediate_output)
        }

# 모델과 토크나이저 초기화
tokenizer_name = "klue/bert-base"
model = MultiLabelClassifier(tokenizer_name=tokenizer_name)
model = model.to(device)  # 모델을 GPU로 이동
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)

# 데이터 로드
data = pd.read_csv(DATA_PATH)

# 레이블 매핑
도수_매핑 = {'낮은': 0, '중간': 1, '높은': 2, '알 수 없음': 3}
술종류_매핑 = {'칵테일': 0, '럼': 1, '위스키': 2, '보드카': 3, '알 수 없음': 4}
맛_매핑 = {'달달한': 0, '쓴맛': 1, '상큼한': 2, '신맛': 3, '부드러운': 4, '알 수 없음': 5}

# 입력 문장 토크나이징
inputs = tokenizer(
    list(data['입력 문장']),
    padding=True,
    truncation=True,
    max_length=512,
    return_tensors="pt"
)
inputs = {k: v.to(device) for k, v in inputs.items()}  # 입력을 GPU로 이동

# 레이블 변환
도수_labels = torch.tensor([도수_매핑[도수] for 도수 in data['도수']]).to(device)
술종류_labels = torch.tensor([술종류_매핑[종류] for 종류 in data['술 종류']]).to(device)
맛_labels = torch.tensor([맛_매핑[맛] for 맛 in data['맛']]).to(device)

# 클래스 가중치 계산 및 CPU로 이동
도수_weights = compute_class_weight('balanced', classes=np.unique(data['도수']), y=data['도수'])
술종류_weights = compute_class_weight('balanced', classes=np.unique(data['술 종류']), y=data['술 종류'])
맛_weights = compute_class_weight('balanced', classes=np.unique(data['맛']), y=data['맛'])

도수_class_weights = torch.FloatTensor(도수_weights).to(device)
술종류_class_weights = torch.FloatTensor(술종류_weights).to(device)
맛_class_weights = torch.FloatTensor(맛_weights).to(device)

# 데이터셋 및 데이터로더 설정
dataset = TensorDataset(
    inputs['input_ids'],
    inputs['attention_mask'],
    도수_labels,
    술종류_labels,
    맛_labels
)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

# 손실 함수 설정
loss_fn_도수 = nn.CrossEntropyLoss(weight=도수_class_weights)
loss_fn_술종류 = nn.CrossEntropyLoss(weight=술종류_class_weights)
loss_fn_맛 = nn.CrossEntropyLoss(weight=맛_class_weights)

# 옵티마이저와 스케줄러 설정
optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE)
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=WARMUP_STEPS,
    num_training_steps=len(dataloader) * EPOCHS
)

# 학습 루프
best_loss = float('inf')
for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    
    for batch in dataloader:
        input_ids, attention_mask, 도수_label, 술종류_label, 맛_label = [b.to(device) for b in batch]
        
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        
        도수_loss = loss_fn_도수(outputs['도수'], 도수_label)
        술종류_loss = loss_fn_술종류(outputs['술종류'], 술종류_label)
        맛_loss = loss_fn_맛(outputs['맛'], 맛_label)
        
        loss = 도수_loss + 술종류_loss + 맛_loss
        total_loss += loss.item()
        
        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()
    
    avg_loss = total_loss / len(dataloader)
    print(f"Epoch {epoch+1}/{EPOCHS}, Average Loss: {avg_loss:.4f}")
    
    # 모델 저장
    if avg_loss < best_loss:
        best_loss = avg_loss
        if not os.path.exists('bert_model'):
            os.makedirs('bert_model')
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': best_loss,
        }, "bert_model/best_model.pt")

# 토크나이저 저장
tokenizer.save_pretrained("bert_model/")
print("학습 완료!")

Using device: cpu


KeyboardInterrupt: 

In [7]:
# 디바이스 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"Current GPU: {torch.cuda.get_device_name()}")

Using device: cpu
