In [1]:
import pickle # 데이터 불러오는 용도 (벡터 리스트, 전처리 자료들 불러오는 용도)
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

In [2]:
# 벡터 리스트 불러오기
device = "cuda" if torch.cuda.is_available() else "cpu"

with open("sound_vector_list.pkl", mode = "rb") as f:
    vectors = torch.tensor(pickle.load(f), dtype = torch.float, device = device)
with open("sound_dict.pkl", mode = "rb") as f:
    sound_dict = pickle.load(f)

print(vectors.shape)
pad_idx = sound_dict["<pad>"]
sos_idx = sound_dict["<sos>"]
eos_idx = sound_dict["<eos>"]

torch.Size([11181, 76])


In [3]:
# 데이터 불러오기

def dataload(x_file_name, t_file_name, batch_size, shuffle, device) :
    with open(x_file_name, mode = "rb") as f:
        tensor_x = torch.tensor(pickle.load(f), dtype = torch.float, device = device)
        print(tensor_x.shape)
    with open(t_file_name, mode = "rb") as f:
        tensor_t = torch.tensor(pickle.load(f), dtype = torch.long, device = device)
        print(tensor_t.shape)
    return DataLoader(list(zip(tensor_x, tensor_t)), batch_size=batch_size, shuffle=shuffle)

train_dataloader = dataload("data/sound_data_train.pkl", "data/text_data_train.pkl", 100, True, device)
test_dataloader = dataload("data/sound_data_test.pkl", "data/text_data_test.pkl", 100, True, device)

print(len(train_dataloader))
print(len(test_dataloader))

torch.Size([10002, 196, 64])
torch.Size([10002, 6])
torch.Size([1250, 182, 64])
torch.Size([1250, 6])
101
13


In [4]:
# 신경만 미리 만들어 놓은거 불러오기

from NN import Encoder
from NN import Attention
from NN import Decoder

In [5]:

encoder = Encoder(64, vectors.shape[1], num_layers=2, dropout_p=0.1, bidirectional=True)
decoder = Decoder(vectors, vectors.shape[1], num_layers=2, dropout_p=0.1, bidirectional=True, max_len=6, pad_idx=pad_idx, sos_idx=sos_idx)
encoder.to(device) # 신경망 gpu로 재지정
decoder.to(device)
loss_function = nn.CrossEntropyLoss(ignore_index=pad_idx) # ignore_index 파라미터, t값이 해당값인 경우는 역전파 하지 않겠다는 뜻 (= 미분 x, 함수를 바꾸지 않겠다)
encoder_optim = torch.optim.Adam(encoder.parameters(), lr = 0.01)
decoder_optim = torch.optim.Adam(decoder.parameters(), lr = 0.02)
epoch = 100
prev_acc = 0
cnt = 0

for e in range(epoch) :
    loss_sum = 0
    encoder.train() # 드롭아웃 켜주기
    decoder.train()
    for x, t in train_dataloader :
        # 순전파
        y, h, c = encoder(x)
        y, h, c = decoder(y, h, c)
        # 손실함수 계산
        loss = loss_function(y.reshape(-1, y.shape[-1]), t.reshape(-1))
        loss_sum += loss.item()
        # 역전파
        decoder_optim.zero_grad()
        encoder_optim.zero_grad()
        loss.backward()
        decoder_optim.step()
        encoder_optim.step()
    loss_sum /= len(train_dataloader)
    # 중간 acc 점검
    correct = 0
    total = 0
    encoder.eval() # 드롭아웃 꺼주기
    decoder.eval()
    for x, t in test_dataloader :
        with torch.no_grad() : #미분계산을 안하기 때문에 계산이 조금 더 빨라진다. (안 넣어도 문제는 없습니다)
            mask = (t != pad_idx) # y와 t를 비교할 때, <pad>값이 아닌 값들만 비교해야 하니까 이를 위한 mask

            y, h, c = encoder(x)
            y, h, c = decoder(y, h, c)
            correct += (y.argmax(dim=-1)[mask] == t[mask]).sum().item()
            total += mask.sum().item()
    acc = correct / total
    # earlystopper
    if acc <= prev_acc + 0.001 :
        cnt += 1
    else :
        cnt = 0
        prev_acc = acc
        torch.save(encoder, "encoder.pt")
        torch.save(decoder, "decoder.pt")
    print(f"epoch {e} | loss {loss_sum} | acc {acc} | cnt {cnt}")
    if cnt >= 5 :
        print("train stopped")
        break


epoch 0 | loss 3.789317780201978 | acc 0.31575262543757293 | cnt 0
epoch 1 | loss 2.7851523892714245 | acc 0.40233372228704783 | cnt 0
epoch 2 | loss 2.039760982636178 | acc 0.5439906651108518 | cnt 0
epoch 3 | loss 1.4886220793912905 | acc 0.6224037339556593 | cnt 0
epoch 4 | loss 1.1296302658496518 | acc 0.6865810968494749 | cnt 0
epoch 5 | loss 0.8852840160379315 | acc 0.7176196032672112 | cnt 0
epoch 6 | loss 0.7874724084492957 | acc 0.7388564760793466 | cnt 0
epoch 7 | loss 0.6069534226588094 | acc 0.7542590431738623 | cnt 0
epoch 8 | loss 0.4897121382268644 | acc 0.784830805134189 | cnt 0
epoch 9 | loss 0.4181017959767049 | acc 0.7717619603267212 | cnt 1
epoch 10 | loss 0.718812396657644 | acc 0.7568261376896149 | cnt 2
epoch 11 | loss 0.42295567285601454 | acc 0.7722287047841306 | cnt 3
epoch 12 | loss 0.35812473518423515 | acc 0.7922987164527421 | cnt 0
epoch 13 | loss 0.31275543600025746 | acc 0.7969661610268378 | cnt 0
epoch 14 | loss 0.3109368655351129 | acc 0.80256709451575

In [6]:
# gpu로 저장 된 것을 다시 불러와서 cpu로 재저장

encoder = torch.load("encoder.pt", weights_only=False, map_location="cpu")
decoder = torch.load("decoder.pt", weights_only=False, map_location="cpu")

torch.save(encoder, "encoder.pt")
torch.save(decoder, "decoder.pt")