<a href="https://colab.research.google.com/github/arumshin-dev/python_conda_jupyter/blob/main/codeit/3_2_4_Seq2Seq_Attention_Transformer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("snap/amazon-fine-food-reviews")

print("Path to dataset files:", path)

Using Colab cache for faster access to the 'amazon-fine-food-reviews' dataset.
Path to dataset files: /kaggle/input/amazon-fine-food-reviews


In [None]:
import pandas as pd

# Reviews.csv 파일에서 'Text'와 'Summary' 컬럼을 50000줄만 불러오기
df = pd.read_csv(f"{path}/Reviews.csv", usecols=["Text", "Summary"], nrows=50000)

# 데이터의 일부 확인
print(df.shape)
df.head()

(50000, 2)


Unnamed: 0,Summary,Text
0,Good Quality Dog Food,I have bought several of the Vitality canned d...
1,Not as Advertised,Product arrived labeled as Jumbo Salted Peanut...
2,"""Delight"" says it all",This is a confection that has been around a fe...
3,Cough Medicine,If you are looking for the secret ingredient i...
4,Great taffy,Great taffy at a great price. There was a wid...


# 데이터 전처리

In [None]:
# Reviews.csv 파일에서 "Text"와 "Summary" 컬럼 추출하여 리스트로 변환
reviews_texts = df["Text"].tolist()
reviews_summaries = df["Summary"].tolist()

In [None]:
import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler
from sklearn.model_selection import train_test_split

# 하이퍼파라미터 및 전역 변수 정의
MAX_LENGTH = 100  # 최대 시퀀스 길이 (원문과 요약 모두에 적용)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 특수 토큰 정의 (텍스트 요약에서는 target에 SOS, EOS가 필요)
SOS_token = 0
EOS_token = 1
PAD_token = 2
UNK_token = 3

# Lang 클래스 (특수 토큰을 미리 등록)
class Lang:
    def __init__(self, name):
        self.name = name
        # 초기에는 PAD, SOS, EOS, UNK 토큰을 미리 등록 (단어 → 인덱스)
        self.word2index = {"PAD": PAD_token, "SOS": SOS_token, "EOS": EOS_token, "<unk>": UNK_token}
        self.index2word = {PAD_token: "PAD", SOS_token: "SOS", EOS_token: "EOS", UNK_token: "<unk>"}
        self.word2count = {}
        self.n_words = 4  # PAD, SOS, EOS, UNK 포함

    def addSentence(self, sentence, tokenizer):
        for word in tokenizer(sentence):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.index2word[self.n_words] = word
            self.word2count[word] = 1
            self.n_words += 1
        else:
            self.word2count[word] += 1

# 간단한 토크나이저 (공백 기준 분리)
def simple_tokenizer(sentence):
    # 문자열이 아니거나 NaN이면 빈 문자열로 처리
    if not isinstance(sentence, str) or pd.isnull(sentence):
        sentence = ""
    return sentence.split()

# 데이터 준비 함수
def prepareData(lang1, lang2, tokenizer_text, tokenizer_summary, texts, summaries):
    input_lang = Lang(lang1)
    output_lang = Lang(lang2)
    # 텍스트 요약 문제의 경우 원문과 요약의 쌍을 생성
    pairs = list(zip(texts, summaries))
    print("Read %s sentence pairs" % len(pairs))
    for pair in pairs:
        input_lang.addSentence(pair[0], tokenizer_text)
        output_lang.addSentence(pair[1], tokenizer_summary)
    return input_lang, output_lang, pairs

input_lang, output_lang, pairs = prepareData("text", "summary", simple_tokenizer, simple_tokenizer,
                                               reviews_texts, reviews_summaries)

# 텐서 변환 함수: 문장을 인덱스 시퀀스로 변환 후, MAX_LENGTH에 맞게 PAD 토큰 추가
def tensorFromSentence(lang, sentence, tokenizer):
    indexes = [SOS_token]
    # 원문/요약 모두 MAX_LENGTH-2 길이까지 토큰화 (SOS, EOS 고려)
    indexes += [lang.word2index.get(word, UNK_token) for word in tokenizer(sentence)[:MAX_LENGTH - 2]]
    indexes.append(EOS_token)
    # 길이가 MAX_LENGTH에 미치지 않으면 PAD 토큰으로 채움
    while len(indexes) < MAX_LENGTH:
        indexes.append(PAD_token)
    return torch.tensor(indexes[:MAX_LENGTH], dtype=torch.long, device=device)

# DataLoader 생성 함수
def get_dataloader(pairs, batch_size):
    # pairs의 첫 번째 원소: 원문, 두 번째 원소: 요약
    input_tensors = [tensorFromSentence(input_lang, inp, simple_tokenizer) for inp, _ in pairs]
    target_tensors = [tensorFromSentence(output_lang, tgt, simple_tokenizer) for _, tgt in pairs]

    # 텐서 스택 (배치 차원 생성)
    input_tensors = torch.stack(input_tensors, dim=0)   # [num_samples, MAX_LENGTH]
    target_tensors = torch.stack(target_tensors, dim=0) # [num_samples, MAX_LENGTH]

    dataset = TensorDataset(input_tensors, target_tensors)
    train_sampler = RandomSampler(dataset)
    train_dataloader = DataLoader(dataset, sampler=train_sampler, batch_size=batch_size)

    print(f"input_tensors.shape: {input_tensors.shape}, target_tensors.shape: {target_tensors.shape}")
    return train_dataloader

# 전체 문장 쌍을 80/20 비율로 분할 (random_state는 재현성을 위해 설정)
train_pairs, test_pairs = train_test_split(pairs, test_size=0.2, random_state=42)
train_dataloader = get_dataloader(train_pairs, batch_size=42)
test_dataloader = get_dataloader(test_pairs, batch_size=42)

Read 50000 sentence pairs
input_tensors.shape: torch.Size([40000, 100]), target_tensors.shape: torch.Size([40000, 100])
input_tensors.shape: torch.Size([10000, 100]), target_tensors.shape: torch.Size([10000, 100])


# Seq2Seq

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import random
import torch.nn.functional as F

class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, dropout_p=0.1):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.dropout = nn.Dropout(dropout_p)

    def forward(self, input, hidden):
        embedded = self.dropout(self.embedding(input))  # [batch_size, seq_len, hidden_size]
        output, hidden = self.gru(embedded, hidden)
        return output, hidden

class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderRNN, self).__init__()
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.out = nn.Linear(hidden_size, output_size)

    def forward(self, encoder_outputs, encoder_hidden, target_tensor=None):
        batch_size = encoder_outputs.size(0)
        decoder_input = torch.empty(batch_size, 1, dtype=torch.long, device=device).fill_(SOS_token)
        decoder_hidden = encoder_hidden
        decoder_outputs = []

        for i in range(MAX_LENGTH):
            decoder_output, decoder_hidden  = self.forward_step(decoder_input, decoder_hidden)
            decoder_outputs.append(decoder_output)

            if target_tensor is not None:
                # Teacher forcing: Feed the target as the next input
                decoder_input = target_tensor[:, i].unsqueeze(1) # Teacher forcing
            else:
                # Without teacher forcing: use its own predictions as the next input
                _, topi = decoder_output.topk(1)
                decoder_input = topi.squeeze(-1).detach()  # detach from history as input

        decoder_outputs = torch.cat(decoder_outputs, dim=1)
        decoder_outputs = F.log_softmax(decoder_outputs, dim=-1)
        return decoder_outputs, decoder_hidden, None # We return `None` for consistency in the training loop

    def forward_step(self, input, hidden):
        output = self.embedding(input)
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        output = self.out(output)
        return output, hidden

In [None]:
def train_epoch(dataloader, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, teacher_forcing_ratio=0.3):
    total_loss = 0
    for input_batch, target_batch in dataloader:
        encoder_optimizer.zero_grad()
        decoder_optimizer.zero_grad()

        batch_size = input_batch.size(0)

        encoder_hidden = torch.zeros(1, batch_size, encoder.hidden_size, device=device)
        encoder_outputs, encoder_hidden = encoder(input_batch, encoder_hidden)

        # teacher forcing 여부에 따라 target_tensor 전달
        if random.random() < teacher_forcing_ratio:
            decoder_outputs, _, _ = decoder(encoder_outputs, encoder_hidden, target_tensor=target_batch)
        else:
            decoder_outputs, _, _ = decoder(encoder_outputs, encoder_hidden)

        loss = criterion(
            decoder_outputs.view(-1, decoder_outputs.size(-1)),  # [B*T, vocab]
            target_batch.view(-1)                                # [B*T]
        )
        loss.backward()
        encoder_optimizer.step()
        decoder_optimizer.step()

        total_loss += loss.item()

    return total_loss / len(dataloader)


def train_model(train_dataloader, encoder, decoder, n_epochs, learning_rate=0.001, teacher_forcing_ratio=0.5):
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)
    criterion = nn.NLLLoss()

    for epoch in range(1, n_epochs + 1):
        epoch_loss = train_epoch(train_dataloader, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, teacher_forcing_ratio)
        print(f"Epoch {epoch}/{n_epochs}, Average Loss: {epoch_loss:.4f}")

In [None]:
# 모델 정의 및 학습 하이퍼파라미터 설정: 실행시간 18분
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
hidden_size = 128

encoder = EncoderRNN(input_lang.n_words, hidden_size).to(device)
decoder = DecoderRNN(hidden_size, output_lang.n_words).to(device)

train_model(train_dataloader, encoder, decoder, n_epochs=10)

Epoch 1/10, Average Loss: 0.5052
Epoch 2/10, Average Loss: 0.3505
Epoch 3/10, Average Loss: 0.3412
Epoch 4/10, Average Loss: 0.3309
Epoch 5/10, Average Loss: 0.3216
Epoch 6/10, Average Loss: 0.3120
Epoch 7/10, Average Loss: 0.3022
Epoch 8/10, Average Loss: 0.2959
Epoch 9/10, Average Loss: 0.2893
Epoch 10/10, Average Loss: 0.2780


In [None]:
def indices_to_sentence(indices, lang):
    words = []
    for idx in indices:
        if isinstance(idx, torch.Tensor):
            idx = idx.item()
        if idx == EOS_token:
            break
        word = lang.index2word.get(idx, "<unk>")
        if word not in ["PAD", "SOS", "EOS"]:
            words.append(word)
    return " ".join(words)

def generate_summary(encoder, decoder, input_tensor):
    input_tensor = input_tensor.unsqueeze(0)  # [1, MAX_LENGTH]
    batch_size = input_tensor.size(0)

    # hidden 초기화 후 인코딩
    encoder_hidden = torch.zeros(1, batch_size, encoder.hidden_size, device=device)
    encoder_outputs, encoder_hidden = encoder(input_tensor, encoder_hidden)

    # 디코더: inference 모드 → target_tensor 없이 호출
    decoder_outputs, _, _ = decoder(encoder_outputs, encoder_hidden)
    predicted_ids = decoder_outputs.argmax(dim=-1)  # [1, MAX_LENGTH]
    return predicted_ids[0]  # 시퀀스 텐서 반환

In [None]:
import pandas as pd

generated_data = []

for input_batch, target_batch in test_dataloader:
    for i in range(input_batch.size(0)):
        input_tensor = input_batch[i].to(device)
        target_tensor = target_batch[i].to(device)

        pred_tensor = generate_summary(encoder, decoder, input_tensor)

        text = indices_to_sentence(input_tensor, input_lang)
        reference_summary = indices_to_sentence(target_tensor, output_lang)
        generated_summary = indices_to_sentence(pred_tensor, output_lang)

        generated_data.append({
            'text': text,
            'summary': reference_summary,
            'summary_generated': generated_summary
        })

    if len(generated_data) >= 20:
        break

# DataFrame 생성 후 출력
df_generated = pd.DataFrame(generated_data[:20])
df_generated.head(20)

Unnamed: 0,text,summary,summary_generated
0,My daughter is allergic to everything this ite...,Best bread out there for allergic people,Great for
1,Been buying greenies for years. Decide to try ...,Dog got sick,My dog
2,I ordered these nuts off a whim because of the...,Great Snack,Great
3,I've not been a big tea drinker but wanted to ...,Great tea,Great
4,Bertolli Extra Virgin Olive Oil is a fixture o...,You SHOULDN'T be using anything else!,A little to but
5,All the subtlety of coconut milk with the rich...,best of both worlds,Not
6,"Yummy, mild, and delicious. My new favorite! M...",My New Favorite,Great for
7,What an amazing find. The price on Amazon blow...,Nutrient Powerhouse,Great
8,I receiced the order quickly of both the Hicko...,WideRide Hickory Beef Jerky,Not what I expected
9,I was afraid at firt to try this- I really hat...,Zipfizz grape-,A is the but


# Attention

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# Luong Attention 정의
class LuongAttention(nn.Module):
    def __init__(self, hidden_size):
        super(LuongAttention, self).__init__()
        self.attn = nn.Linear(hidden_size, hidden_size)

    def forward(self, query, keys):
        # query: (1, B, H), keys: (B, T, H)
        # 내적 기반 스코어 계산
        query = query.permute(1, 0, 2)  # (B, 1, H)
        keys_proj = self.attn(keys)     # (B, T, H)
        scores = torch.bmm(query, keys_proj.transpose(1, 2))  # (B, 1, T)
        weights = F.softmax(scores, dim=-1)  # (B, 1, T)
        context = torch.bmm(weights, keys)  # (B, 1, H)
        return context, weights

# Attn 기반 디코더
class AttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1):
        super(AttnDecoderRNN, self).__init__()
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.attention = LuongAttention(hidden_size)
        self.gru = nn.GRU(hidden_size * 2, hidden_size, batch_first=True)
        self.out = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(dropout_p)

    def forward(self, encoder_outputs, encoder_hidden, target_tensor=None):
        batch_size = encoder_outputs.size(0)
        decoder_input = torch.full((batch_size, 1), SOS_token, dtype=torch.long, device=device)
        decoder_hidden = encoder_hidden
        decoder_outputs = []
        attentions = []

        for i in range(MAX_LENGTH):
            decoder_output, decoder_hidden, attn_weights = self.forward_step(
                decoder_input, decoder_hidden, encoder_outputs
            )
            decoder_outputs.append(decoder_output)
            attentions.append(attn_weights)

            if target_tensor is not None:
                decoder_input = target_tensor[:, i].unsqueeze(1)
            else:
                _, topi = decoder_output.topk(1)
                decoder_input = topi.squeeze(-1).detach()

        decoder_outputs = torch.cat(decoder_outputs, dim=1)
        decoder_outputs = F.log_softmax(decoder_outputs, dim=-1)
        attentions = torch.cat(attentions, dim=1)
        return decoder_outputs, decoder_hidden, attentions

    def forward_step(self, input, hidden, encoder_outputs):
        embedded = self.dropout(self.embedding(input))  # (B, 1, H)
        query = hidden  # (1, B, H)
        context, attn_weights = self.attention(query, encoder_outputs)  # (B, 1, H), (B, 1, T)
        rnn_input = torch.cat((embedded, context), dim=2)
        output, hidden = self.gru(rnn_input, hidden)
        output = self.out(output)
        return output, hidden, attn_weights

In [None]:
# 실행시간 29분
encoder = EncoderRNN(input_lang.n_words, hidden_size).to(device)
decoder = AttnDecoderRNN(hidden_size, output_lang.n_words).to(device)

train_model(train_dataloader, encoder, decoder, n_epochs=10)

Epoch 1/10, Average Loss: 0.4863
Epoch 2/10, Average Loss: 0.3521
Epoch 3/10, Average Loss: 0.3405
Epoch 4/10, Average Loss: 0.3286
Epoch 5/10, Average Loss: 0.3214
Epoch 6/10, Average Loss: 0.3105
Epoch 7/10, Average Loss: 0.3016
Epoch 8/10, Average Loss: 0.2900
Epoch 9/10, Average Loss: 0.2844
Epoch 10/10, Average Loss: 0.2758


In [None]:
generated_data = []

for input_batch, target_batch in test_dataloader:
    for i in range(input_batch.size(0)):
        input_tensor = input_batch[i].to(device)
        target_tensor = target_batch[i].to(device)

        pred_tensor = generate_summary(encoder, decoder, input_tensor)

        text = indices_to_sentence(input_tensor, input_lang)
        reference_summary = indices_to_sentence(target_tensor, output_lang)
        generated_summary = indices_to_sentence(pred_tensor, output_lang)

        generated_data.append({
            'text': text,
            'summary': reference_summary,
            'summary_generated': generated_summary
        })

    if len(generated_data) >= 20:
        break

# DataFrame 생성 후 출력
df_generated = pd.DataFrame(generated_data[:20])
df_generated.head(20)

Unnamed: 0,text,summary,summary_generated
0,Switch Orange/Tangerine soda comes in a smalle...,"Pleasing Orange Soda, A Tad Pricy but worth it",Not as good as
1,This coffee is surprisingly delicious. We neve...,Yummy hazelnut,Great
2,Popchips are so good. You can eat quite a few ...,LOVE popchips,Great
3,These little chocolate cream filled cookies ar...,Hello Panda Chocolate Cream Filled Mini Cookies.,Great
4,By far the best coffee the USA has to offer an...,Simply amazing!,Best Coffee
5,You'd be surprised what you can do with a thre...,all I have to say is,Great
6,They were stale and hard. Had to suck on them ...,Chewey Sweet Tarts,Great
7,Dog treats. What do you do with dog treats? We...,Crunchy treats for the dogs in our life,My dog loves
8,I am surprised just how yummy these chips are ...,LOVE these with one caveat,Great for and
9,I am buying this product on a monthly basis to...,Allure Mano,Great


# Transformer

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# 포지셔널 인코딩 정의 (단어 위치 정보 반영)
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-torch.log(torch.tensor(10000.0)) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe.unsqueeze(0))

    def forward(self, x):
        return x + self.pe[:, :x.size(1)]

# 인코더 정의
class TransformerEncoder(nn.Module):
    def __init__(self, vocab_size, emb_size, n_heads, ff_dim, num_layers, dropout=0.1):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, emb_size, padding_idx=PAD_token)
        self.pos_encoding = PositionalEncoding(emb_size)
        encoder_layer = nn.TransformerEncoderLayer(d_model=emb_size, nhead=n_heads, dim_feedforward=ff_dim, dropout=dropout, batch_first=True)
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.hidden_size = emb_size

    def forward(self, src, src_key_padding_mask=None):
        x = self.embedding(src)
        x = self.pos_encoding(x)
        if src_key_padding_mask is None:
            src_key_padding_mask = (src == PAD_token)
        return self.encoder(x, src_key_padding_mask=src_key_padding_mask)

# 디코더 정의
class TransformerDecoder(nn.Module):
    def __init__(self, vocab_size, emb_size, n_heads, ff_dim, num_layers, dropout=0.1):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, emb_size, padding_idx=PAD_token)
        self.pos_encoding = PositionalEncoding(emb_size)
        decoder_layer = nn.TransformerDecoderLayer(d_model=emb_size, nhead=n_heads, dim_feedforward=ff_dim, dropout=dropout, batch_first=True)
        self.decoder = nn.TransformerDecoder(decoder_layer, num_layers=num_layers)
        self.fc_out = nn.Linear(emb_size, vocab_size)

    def forward(self, tgt, memory, tgt_mask=None, tgt_key_padding_mask=None, memory_key_padding_mask=None):
        x = self.embedding(tgt)
        x = self.pos_encoding(x)
        return self.fc_out(self.decoder(x, memory, tgt_mask=tgt_mask, tgt_key_padding_mask=tgt_key_padding_mask, memory_key_padding_mask=memory_key_padding_mask))

# 디코더 마스킹 함수 (Look-ahead Mask)
def generate_square_subsequent_mask(sz):
    return torch.triu(torch.ones(sz, sz), diagonal=1).bool().to(device)

# 요약 생성 함수
def evaluate_transformer(encoder, decoder, src, max_len, sos_token):
    encoder.eval()
    decoder.eval()
    src = src.unsqueeze(0).to(device)
    src_mask = (src == PAD_token)
    memory = encoder(src, src_key_padding_mask=src_mask)

    tgt_indices = [sos_token]
    for _ in range(max_len - 1):
        tgt_tensor = torch.tensor(tgt_indices, dtype=torch.long, device=device).unsqueeze(0)
        tgt_mask = generate_square_subsequent_mask(tgt_tensor.size(1))
        output = decoder(tgt_tensor, memory, tgt_mask=tgt_mask)
        next_token = output[:, -1, :].argmax(-1).item()
        tgt_indices.append(next_token)
        if next_token == EOS_token:
            break
    return tgt_indices

# 트랜스포머 학습 함수
def train_transformer(encoder, decoder, dataloader, optimizer_enc, optimizer_dec, criterion, num_epochs=10):
    for epoch in range(1, num_epochs + 1):
        encoder.train()
        decoder.train()
        total_loss = 0

        for src, tgt in dataloader:
            src, tgt = src.to(device), tgt.to(device)
            tgt_input = tgt[:, :-1]
            tgt_output = tgt[:, 1:]

            optimizer_enc.zero_grad()
            optimizer_dec.zero_grad()

            src_padding_mask = (src == PAD_token)
            tgt_padding_mask = (tgt_input == PAD_token)
            tgt_mask = generate_square_subsequent_mask(tgt_input.size(1))

            memory = encoder(src, src_key_padding_mask=src_padding_mask)
            output = decoder(
                tgt_input, memory,
                tgt_mask=tgt_mask,
                tgt_key_padding_mask=tgt_padding_mask,
                memory_key_padding_mask=src_padding_mask
            )

            loss = criterion(output.view(-1, output.size(-1)), tgt_output.reshape(-1))
            loss.backward()

            optimizer_enc.step()
            optimizer_dec.step()

            total_loss += loss.item()

        print(f"Epoch {epoch}: Loss = {total_loss / len(dataloader):.4f}")

In [None]:
# 모델 초기화
input_dim = input_lang.n_words
output_dim = output_lang.n_words
emb_dim = 256
n_heads = 8
ff_dim = 512
num_layers = 3

tf_encoder = TransformerEncoder(input_dim, emb_dim, n_heads, ff_dim, num_layers).to(device)
tf_decoder = TransformerDecoder(output_dim, emb_dim, n_heads, ff_dim, num_layers).to(device)

# 옵티마이저와 손실함수
optimizer_enc = torch.optim.Adam(tf_encoder.parameters(), lr=0.0005)
optimizer_dec = torch.optim.Adam(tf_decoder.parameters(), lr=0.0005)
criterion = nn.CrossEntropyLoss(ignore_index=PAD_token)

# 학습 실행
train_transformer(
    encoder=tf_encoder,
    decoder=tf_decoder,
    dataloader=train_dataloader,
    optimizer_enc=optimizer_enc,
    optimizer_dec=optimizer_dec,
    criterion=criterion,
    num_epochs=10
)

Epoch 1: Loss = 6.3808
Epoch 2: Loss = 5.4202
Epoch 3: Loss = 4.8060
Epoch 4: Loss = 4.2550
Epoch 5: Loss = 3.7700
Epoch 6: Loss = 3.3696
Epoch 7: Loss = 3.0516
Epoch 8: Loss = 2.7944
Epoch 9: Loss = 2.5758
Epoch 10: Loss = 2.3951


In [None]:
generated_data = []

for input_batch, target_batch in test_dataloader:
    for i in range(input_batch.size(0)):
        input_tensor = input_batch[i].to(device)
        target_tensor = target_batch[i].to(device)

        # 기존 generate_summary() 대신 직접 evaluate_transformer 호출
        pred_tensor = evaluate_transformer(tf_encoder, tf_decoder, input_tensor, max_len=MAX_LENGTH, sos_token=SOS_token)

        text = indices_to_sentence(input_tensor, input_lang)
        reference_summary = indices_to_sentence(target_tensor, output_lang)
        generated_summary = indices_to_sentence(pred_tensor, output_lang)

        generated_data.append({
            'text': text,
            'summary': reference_summary,
            'summary_generated': generated_summary
        })

    if len(generated_data) >= 20:
        break

df_generated = pd.DataFrame(generated_data[:20])
df_generated.head(20)

Unnamed: 0,text,summary,summary_generated
0,Gustaf's Wine Gums have all the things I like ...,A Tasty Change of Pace,"Great taste, shame about the pieces."
1,Good idea but sadly the product does not deliv...,These things suck!,"Good price, good taste"
2,The product is delicious and was delivered in ...,angelhair,Great product
3,"Nice Aroma, very tasty and the crema....<br />...",Great Coffee,Great taste
4,I am usually not a fan of flavored coffe or co...,Surprisingly delicious,Great taste and convenient
5,Having been a professional cook for over 20 ye...,ok,Great product
6,"If I were a Sleeping Beauty, I would so want m...",Once Upon A Popchip,Great product
7,I was pleasantly surprised making our holiday ...,Works well for cookies!,Great product
8,"exactly what I ordered, shipped immediately, a...",awesome,Great product
9,"Spicy, but not ridiculously so, with a great f...",One of my Favorite Salsas,"Great taste, shame about the pieces."
