# [3주차] 심화과제: Machine translation(기계 번역)

In [10]:
# !pip install tqdm boto3 requests regex sentencepiece sacremoses datasets safetensors transformers tokenizers matplotlib torchinfo tqdm sacrebleu pandas scikit-learn

# [MY CODE] Language Translation (English-French) dataset 준비

## ✅ 1. 데이터 불러오기 & 확인

In [11]:
import pandas as pd

eng_french_data = pd.read_csv('eng_-french.csv')
print(eng_french_data.shape)
print(eng_french_data.columns)
print(eng_french_data.head())

(175621, 2)
Index(['English words/sentences', 'French words/sentences'], dtype='object')
  English words/sentences French words/sentences
0                     Hi.                 Salut!
1                    Run!                Cours !
2                    Run!               Courez !
3                    Who?                  Qui ?
4                    Wow!             Ça alors !


## ✅ 2. 훈련/테스트 셋 분리 (Train/Test Split)

In [12]:
from sklearn.model_selection import train_test_split

# 훈련 데이터: 80%, 테스트 데이터: 20%
train_data, test_data = train_test_split(eng_french_data, test_size=0.2, random_state=42)

print(f"훈련 데이터 크기: {len(train_data)}")
print(f"테스트 데이터 크기: {len(test_data)}")

훈련 데이터 크기: 140496
테스트 데이터 크기: 35125


## ✅ 3. T5 토크나이저 준비 & 토크나이징

In [16]:
import torch
from transformers import T5Tokenizer
tokenizer = T5Tokenizer.from_pretrained('t5-small')  # t5-small, t5-base, t5-large

english_column = 'English words/sentences'
french_column  = 'French words/sentences'

# 훈련 및 테스트 데이터 토크나이징
train_encodings = tokenizer(list(train_data[english_column]), padding=True, truncation=True, max_length=512)
test_encodings = tokenizer(list(test_data[english_column]), padding=True, truncation=True, max_length=512)

# 라벨(프랑스어) 토크나이징
train_labels = tokenizer(list(train_data[french_column]), padding=True, truncation=True, max_length=512)
test_labels = tokenizer(list(test_data[french_column]), padding=True, truncation=True, max_length=512)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


## ✅ 4. 데이터셋 클래스로 변환 (PyTorch Dataset)

In [17]:
import torch

class TranslationDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.encodings.input_ids)

    def __getitem__(self, idx):
        return {
            'input_ids': self.encodings['input_ids'][idx],
            'attention_mask': self.encodings['attention_mask'][idx],
            'labels': self.labels['input_ids'][idx],
        }

train_dataset = TranslationDataset(train_encodings, train_labels)
test_dataset = TranslationDataset(test_encodings, test_labels)

## ✅ 5. DataLoader 준비

In [30]:
from torch.utils.data import DataLoader

# collate_fn 정의
def collate_fn(batch):
    input_ids = torch.tensor([item['input_ids'] for item in batch])
    attention_mask = torch.tensor([item['attention_mask'] for item in batch])
    labels = torch.tensor([item['labels'] for item in batch])

    return {
        'input_ids': input_ids,
        'attention_mask': attention_mask,
        'labels': labels
    }



batch_size = 64

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else
                      "mps" if torch.backends.mps.is_available() else
                      "cpu")

print(f"Using device: {device}")

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import sacrebleu

def accuracy(model, dataloader, tokenizer):
    cnt = 0
    acc = 0

    model.eval()
    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(device)
            labels = batch['labels'].to(device)

            # T5는 generate()를 사용해 예측 시퀀스를 생성
            preds = model.generate(input_ids=input_ids, max_length=labels.size(1))

            # 토큰 -> 텍스트로 변환 후 비교 (정확도 측정)
            pred_texts = [tokenizer.decode(p, skip_special_tokens=True) for p in preds]
            label_texts = [tokenizer.decode(l, skip_special_tokens=True) for l in labels]

            # 문장 단위 비교 (정확히 일치하는 문장만 정답으로 간주)
            for p, l in zip(pred_texts, label_texts):
                if p == l:
                    acc += 1
                cnt += 1

    return acc / cnt



# BLEU 점수 계산 함수 추가
def calculate_bleu(model, dataloader, tokenizer):
    model.eval()
    predictions = []
    references = []

    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(device)
            labels = batch['labels'].to(device)

            outputs = model.generate(input_ids=input_ids, max_length=labels.size(1))

            pred_texts = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
            ref_texts = [tokenizer.decode(label, skip_special_tokens=True) for label in labels]

            predictions.extend(pred_texts)
            references.extend(ref_texts)

    bleu_score = sacrebleu.corpus_bleu(predictions, [references]).score
    return bleu_score

def evaluate_model(model, dataloader, tokenizer):
    model.eval()
    predictions = []
    references = []
    acc = 0
    cnt = 0

    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(device)
            labels = batch['labels'].to(device)

            # 예측 생성
            preds = model.generate(input_ids=input_ids, max_length=labels.size(1))

            # 토큰 -> 텍스트 변환
            pred_texts = [tokenizer.decode(p, skip_special_tokens=True) for p in preds]
            ref_texts = [tokenizer.decode(l, skip_special_tokens=True) for l in labels]

            # 정확도 계산 (문장 단위 비교)
            for p, l in zip(pred_texts, ref_texts):
                if p == l:
                    acc += 1
                cnt += 1

            # BLEU 계산을 위한 데이터 축적
            predictions.extend(pred_texts)
            references.extend(ref_texts)

    # BLEU 점수 계산
    bleu_score = sacrebleu.corpus_bleu(predictions, [references]).score
    accuracy = acc / cnt

    return accuracy, bleu_score


def evaluate_model(model, dataloader, tokenizer):
    model.eval()
    total_loss = 0
    predictions = []
    references = []
    acc = 0
    cnt = 0

    with torch.no_grad():
        print("Evaluating ...")
        for batch in tqdm(dataloader, desc="Evaluation"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            # 1) Loss 계산
            outputs = model(input_ids=input_ids,
                            attention_mask=attention_mask,
                            labels=labels)
            loss = outputs.loss
            total_loss += loss.item()

            # 2) generate()로 텍스트 예측
            preds = model.generate(input_ids=input_ids, max_length=labels.size(1))
            pred_texts = [tokenizer.decode(p, skip_special_tokens=True) for p in preds]
            ref_texts  = [tokenizer.decode(l, skip_special_tokens=True) for l in labels]

            # 3) Accuracy 계산
            for p, r in zip(pred_texts, ref_texts):
                if p == r:
                    acc += 1
                cnt += 1

            # 4) BLEU 계산용 데이터
            predictions.extend(pred_texts)
            references.extend(ref_texts)

    # 평균 Loss와 Perplexity
    avg_loss = total_loss / len(dataloader)
    ppl = torch.exp(torch.tensor(avg_loss))

    # BLEU 점수
    bleu_score = sacrebleu.corpus_bleu(predictions, [references]).score
    accuracy = acc / cnt

    return avg_loss, ppl, accuracy, bleu_score

def plot_acc(ax, title, train_accuracies, test_accuracies, label1='train', label2='test'):
    x = np.arange(len(train_accuracies))
    ax.set_title(title)
    if train_accuracies is not None:
        ax.plot(x, train_accuracies, label=label1)
    if test_accuracies is not None:
        ax.plot(x, test_accuracies, label=label2)
    ax.legend()

# [MY CODE] T5 모델 학습

In [32]:
from transformers import T5ForConditionalGeneration
from torch.optim import AdamW
import time
from tqdm import tqdm

## ✅ 1. pre-trained + Full Fine-tuning

In [35]:
model = T5ForConditionalGeneration.from_pretrained('t5-small').to(device)

# 인코더 가중치 고정 (디코더만 학습)
#for param in model.encoder.parameters():
 #   param.requires_grad = False  # 인코더 가중치 고정

lr = 0.001
optimizer = AdamW(model.parameters(), lr=lr)

n_epochs = 10
start_epoch = 0

time_list = []
train_average_loss_list = []
test_average_loss_list = []
test_accuracies = []
train_perplexity_list = []
test_perplexity_list = []
test_bleu_scores = []

checkpoint_path = 'checkpoint_t5_1.pth'


try:
    checkpoint = torch.load(checkpoint_path, weights_only=False, map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    start_epoch = checkpoint['epoch']  # 이어서 시작할 에포크
    time_list = checkpoint['time_list']
    train_average_loss_list = checkpoint['train_average_loss_list']
    test_average_loss_list = checkpoint['test_average_loss_list']
    test_accuracies = checkpoint['test_accuracies']
    train_perplexity_list = checkpoint['train_perplexity_list']
    test_perplexity_list = checkpoint['test_perplexity_list']
    test_bleu_scores = checkpoint['test_bleu_scores']
    for epoch in range(0, start_epoch):
        print(f"Epoch {epoch+1:3d} |"
        f" Time: {time_list[epoch]:.2f} seconds |"
        f" Train Loss: {train_average_loss_list[epoch]:.2f} |"
        f" Test Loss: {test_average_loss_list[epoch]:.2f} |"
        f" Test Acc: {test_accuracies[epoch]:.3f} |"
        f" Train Perplexity: {train_perplexity_list[epoch]:.2f} |"
        f" Test Perplexity: {test_perplexity_list[epoch]:.2f} |"
        f" Test BLEU Score: {test_bleu_scores[epoch]:.2f}")

    if start_epoch < n_epochs -1:
        print(f"이어서 시작~ {start_epoch + 1}.")
except FileNotFoundError:
    print("새롭게 시작~")


# 훈련 루프 수정
for epoch in range(start_epoch, n_epochs):
    start_time = time.time()  # 에포크 시작 시간 기록

    total_train_loss = 0.
    model.train()

    for batch in tqdm(train_loader):  # tqdm으로 진행 상황 시각화
        optimizer.zero_grad()  # 기울기 초기화
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        # T5 모델에서 직접 loss 계산
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss  # T5는 자체적으로 loss 반환

        # 역전파 및 최적화
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()


    # 한 에포크 끝난 뒤, 평균 학습 손실 & PPL
    train_average_loss = total_train_loss / len(train_loader)
    train_perplexity = torch.exp(torch.tensor(train_average_loss))
    train_average_loss_list.append(train_average_loss)
    train_perplexity_list.append(train_perplexity)

    with torch.no_grad():
        model.eval()

        # 정확도, BLEU 계산
        test_average_loss, test_perplexity, test_acc, test_bleu_score = evaluate_model(model, test_loader, tokenizer)

        test_average_loss_list.append(test_average_loss)
        test_accuracies.append(test_acc)
        test_bleu_scores.append(test_bleu_score)
        test_perplexity_list.append(test_perplexity)

        # 에포크 실행 시간 계산
        end_time = time.time()
        epoch_time = end_time - start_time
        time_list.append(epoch_time)


    # 체크포인트 저장
    torch.save({
        'epoch': epoch + 1,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'time_list': time_list,
        'train_average_loss_list': train_average_loss_list,
        'test_average_loss_list': test_average_loss_list,
        'test_accuracies': test_accuracies,
        'train_perplexity_list': train_perplexity_list,
        'test_perplexity_list': test_perplexity_list,
        'test_bleu_scores': test_bleu_scores

    }, checkpoint_path)


    # 결과 출력
    print(f"Epoch {epoch+1:3d} |"
    f" Time: {epoch_time:.2f} seconds |"
    f" Train Loss: {train_average_loss:.2f} |"
    f" Test Loss: {test_average_loss:.2f} |"
    f" Test Acc: {test_acc:.3f} |"
    f" Train Perplexity: {train_perplexity:.2f} |"
    f" Test Perplexity: {test_perplexity:.2f} |"
    f" Test BLEU Score: {test_bleu_score:.2f}")


# 서브플롯 생성 (2행 2열)
fig, axes = plt.subplots(2, 2, figsize=(10, 8))  # 2x2 서브플롯

# 서브플롯 위치에 그래프 그리기
plot_acc(axes[0, 0], "Loss", train_average_loss_list, test_average_loss_list)
plot_acc(axes[0, 1], "Perplexity", train_perplexity_list, test_perplexity_list)
plot_acc(axes[1, 0], "Accuracy", None, test_accuracies)
plot_acc(axes[1, 1], "BLEU Score", None, test_bleu_scores)

# 간격 조정
plt.tight_layout()
plt.show()

새롭게 시작~


  0%|          | 0/2196 [00:00<?, ?it/s]


KeyboardInterrupt: 

## ✅ 2. pre-trained + Decoder Fine-tuning

In [None]:
model = T5ForConditionalGeneration.from_pretrained('t5-small').to(device)

# 인코더 가중치 고정 (디코더만 학습)
for param in model.encoder.parameters():
    param.requires_grad = False  # 인코더 가중치 고정

lr = 0.001
optimizer = AdamW(model.parameters(), lr=lr)

n_epochs = 10
start_epoch = 0

time_list = []
train_average_loss_list = []
test_average_loss_list = []
test_accuracies = []
train_perplexity_list = []
test_perplexity_list = []
test_bleu_scores = []

checkpoint_path = 'checkpoint_t5_2.pth'


try:
    checkpoint = torch.load(checkpoint_path, weights_only=False, map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    start_epoch = checkpoint['epoch']  # 이어서 시작할 에포크
    time_list = checkpoint['time_list']
    train_average_loss_list = checkpoint['train_average_loss_list']
    test_average_loss_list = checkpoint['test_average_loss_list']
    test_accuracies = checkpoint['test_accuracies']
    train_perplexity_list = checkpoint['train_perplexity_list']
    test_perplexity_list = checkpoint['test_perplexity_list']
    test_bleu_scores = checkpoint['test_bleu_scores']
    for epoch in range(0, start_epoch):
        print(f"Epoch {epoch+1:3d} |"
        f" Time: {time_list[epoch]:.2f} seconds |"
        f" Train Loss: {train_average_loss_list[epoch]:.2f} |"
        f" Test Loss: {test_average_loss_list[epoch]:.2f} |"
        f" Test Acc: {test_accuracies[epoch]:.3f} |"
        f" Train Perplexity: {train_perplexity_list[epoch]:.2f} |"
        f" Test Perplexity: {test_perplexity_list[epoch]:.2f} |"
        f" Test BLEU Score: {test_bleu_scores[epoch]:.2f}")

    if start_epoch < n_epochs -1:
        print(f"이어서 시작~ {start_epoch + 1}.")
except FileNotFoundError:
    print("새롭게 시작~")


# 훈련 루프 수정
for epoch in range(start_epoch, n_epochs):
    start_time = time.time()  # 에포크 시작 시간 기록

    total_train_loss = 0.
    model.train()

    for batch in tqdm(train_loader):  # tqdm으로 진행 상황 시각화
        optimizer.zero_grad()  # 기울기 초기화
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        # T5 모델에서 직접 loss 계산
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss  # T5는 자체적으로 loss 반환

        # 역전파 및 최적화
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()


    # 한 에포크 끝난 뒤, 평균 학습 손실 & PPL
    train_average_loss = total_train_loss / len(train_loader)
    train_perplexity = torch.exp(torch.tensor(train_average_loss))
    train_average_loss_list.append(train_average_loss)
    train_perplexity_list.append(train_perplexity)

    with torch.no_grad():
        model.eval()

        # 정확도, BLEU 계산
        test_average_loss, test_perplexity, test_acc, test_bleu_score = evaluate_model(model, test_loader, tokenizer)

        test_average_loss_list.append(test_average_loss)
        test_accuracies.append(test_acc)
        test_bleu_scores.append(test_bleu_score)
        test_perplexity_list.append(test_perplexity)

        # 에포크 실행 시간 계산
        end_time = time.time()
        epoch_time = end_time - start_time
        time_list.append(epoch_time)


    # 체크포인트 저장
    torch.save({
        'epoch': epoch + 1,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'time_list': time_list,
        'train_average_loss_list': train_average_loss_list,
        'test_average_loss_list': test_average_loss_list,
        'test_accuracies': test_accuracies,
        'train_perplexity_list': train_perplexity_list,
        'test_perplexity_list': test_perplexity_list,
        'test_bleu_scores': test_bleu_scores

    }, checkpoint_path)


    # 결과 출력
    print(f"Epoch {epoch+1:3d} |"
    f" Time: {epoch_time:.2f} seconds |"
    f" Train Loss: {train_average_loss:.2f} |"
    f" Test Loss: {test_average_loss:.2f} |"
    f" Test Acc: {test_acc:.3f} |"
    f" Train Perplexity: {train_perplexity:.2f} |"
    f" Test Perplexity: {test_perplexity:.2f} |"
    f" Test BLEU Score: {test_bleu_score:.2f}")


# 서브플롯 생성 (2행 2열)
fig, axes = plt.subplots(2, 2, figsize=(10, 8))  # 2x2 서브플롯

# 서브플롯 위치에 그래프 그리기
plot_acc(axes[0, 0], "Loss", train_average_loss_list, test_average_loss_list)
plot_acc(axes[0, 1], "Perplexity", train_perplexity_list, test_perplexity_list)
plot_acc(axes[1, 0], "Accuracy", None, test_accuracies)
plot_acc(axes[1, 1], "BLEU Score", None, test_bleu_scores)

# 간격 조정
plt.tight_layout()
plt.show()

## ✅ 3. none-trained T5

In [None]:
from transformers import T5Config, T5ForConditionalGeneration
config = T5Config.from_pretrained('t5-small')
model = T5ForConditionalGeneration(config).to(device)

lr = 0.001
optimizer = AdamW(model.parameters(), lr=lr)

n_epochs = 10
start_epoch = 0

time_list = []
train_average_loss_list = []
test_average_loss_list = []
test_accuracies = []
train_perplexity_list = []
test_perplexity_list = []
test_bleu_scores = []

checkpoint_path = 'checkpoint_t5_3.pth'


try:
    checkpoint = torch.load(checkpoint_path, weights_only=False, map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    start_epoch = checkpoint['epoch']  # 이어서 시작할 에포크
    time_list = checkpoint['time_list']
    train_average_loss_list = checkpoint['train_average_loss_list']
    test_average_loss_list = checkpoint['test_average_loss_list']
    test_accuracies = checkpoint['test_accuracies']
    train_perplexity_list = checkpoint['train_perplexity_list']
    test_perplexity_list = checkpoint['test_perplexity_list']
    test_bleu_scores = checkpoint['test_bleu_scores']
    for epoch in range(0, start_epoch):
        print(f"Epoch {epoch+1:3d} |"
        f" Time: {time_list[epoch]:.2f} seconds |"
        f" Train Loss: {train_average_loss_list[epoch]:.2f} |"
        f" Test Loss: {test_average_loss_list[epoch]:.2f} |"
        f" Test Acc: {test_accuracies[epoch]:.3f} |"
        f" Train Perplexity: {train_perplexity_list[epoch]:.2f} |"
        f" Test Perplexity: {test_perplexity_list[epoch]:.2f} |"
        f" Test BLEU Score: {test_bleu_scores[epoch]:.2f}")

    if start_epoch < n_epochs -1:
        print(f"이어서 시작~ {start_epoch + 1}.")
except FileNotFoundError:
    print("새롭게 시작~")


# 훈련 루프 수정
for epoch in range(start_epoch, n_epochs):
    start_time = time.time()  # 에포크 시작 시간 기록

    total_train_loss = 0.
    model.train()

    for batch in tqdm(train_loader):  # tqdm으로 진행 상황 시각화
        optimizer.zero_grad()  # 기울기 초기화
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        # T5 모델에서 직접 loss 계산
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss  # T5는 자체적으로 loss 반환

        # 역전파 및 최적화
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()


    # 한 에포크 끝난 뒤, 평균 학습 손실 & PPL
    train_average_loss = total_train_loss / len(train_loader)
    train_perplexity = torch.exp(torch.tensor(train_average_loss))
    train_average_loss_list.append(train_average_loss)
    train_perplexity_list.append(train_perplexity)

    with torch.no_grad():
        model.eval()

        # 정확도, BLEU 계산
        test_average_loss, test_perplexity, test_acc, test_bleu_score = evaluate_model(model, test_loader, tokenizer)

        test_average_loss_list.append(test_average_loss)
        test_accuracies.append(test_acc)
        test_bleu_scores.append(test_bleu_score)
        test_perplexity_list.append(test_perplexity)

        # 에포크 실행 시간 계산
        end_time = time.time()
        epoch_time = end_time - start_time
        time_list.append(epoch_time)


    # 체크포인트 저장
    torch.save({
        'epoch': epoch + 1,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'time_list': time_list,
        'train_average_loss_list': train_average_loss_list,
        'test_average_loss_list': test_average_loss_list,
        'test_accuracies': test_accuracies,
        'train_perplexity_list': train_perplexity_list,
        'test_perplexity_list': test_perplexity_list,
        'test_bleu_scores': test_bleu_scores

    }, checkpoint_path)


    # 결과 출력
    print(f"Epoch {epoch+1:3d} |"
    f" Time: {epoch_time:.2f} seconds |"
    f" Train Loss: {train_average_loss:.2f} |"
    f" Test Loss: {test_average_loss:.2f} |"
    f" Test Acc: {test_acc:.3f} |"
    f" Train Perplexity: {train_perplexity:.2f} |"
    f" Test Perplexity: {test_perplexity:.2f} |"
    f" Test BLEU Score: {test_bleu_score:.2f}")


# 서브플롯 생성 (2행 2열)
fig, axes = plt.subplots(2, 2, figsize=(10, 8))  # 2x2 서브플롯

# 서브플롯 위치에 그래프 그리기
plot_acc(axes[0, 0], "Loss", train_average_loss_list, test_average_loss_list)
plot_acc(axes[0, 1], "Perplexity", train_perplexity_list, test_perplexity_list)
plot_acc(axes[1, 0], "Accuracy", None, test_accuracies)
plot_acc(axes[1, 1], "BLEU Score", None, test_bleu_scores)

# 간격 조정
plt.tight_layout()
plt.show()