In [None]:
# from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [None]:
from modules.util import load_pkl
IMDB_512 = load_pkl("./database/embedded_pad_token.pkl")

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from torch.utils.data import TensorDataset, DataLoader
from torch.utils.data.dataset import random_split

torch.manual_seed(42)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Param
LEARNING_RATE = 0.001
N_EPOCHS = 25
BATCH_SIZE = 256

VOCAB_SIZE = 28996
EMBED_SIZE = 128
HIDDEN_SIZE = 64
HIDDEN_LAYER_DEPTH = 1
OUTPUT_DIM = 1

In [None]:
IMDB_trainset = IMDB_512[["padded_token","sentiment"]]
x_train = torch.tensor(IMDB_trainset["padded_token"].to_list(),dtype=torch.int32)
y_train = torch.tensor(IMDB_trainset["sentiment"].to_list(),dtype=torch.float32).unsqueeze(1)

total_sample_size, content_length = x_train.shape
total_sample_size, content_length

(47745, 512)

In [None]:
# 미니배치 단위로 Loss Backprop을 하기 위해 DataLoader 설정하기.
dataset = TensorDataset(x_train, y_train)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

In [None]:
class SentimentAnalysisRNN(nn.Module):
    def __init__(
        self,
        vocab_dim=VOCAB_SIZE,
        embedding_dim=EMBED_SIZE,
        hidden_dim=HIDDEN_SIZE,
        layer_depth=HIDDEN_LAYER_DEPTH,
        output_dim=OUTPUT_DIM,
        device=device,
    ) -> None:
        super(SentimentAnalysisRNN, self).__init__()

        self.embed = nn.Embedding(vocab_dim, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_dim, num_layers=layer_depth, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.sigmoid = nn.Sigmoid()  # 시그모이드 활성화 함수 추가

        self.hidden_dim = hidden_dim
        self.layer_depth = layer_depth
        self.device = device

    def forward(self, x):
        embed = self.embed(x)
        y_t_list, h_t_list = self.rnn(embed)
        y_t = y_t_list[:, -1, :]
        out = self.fc(y_t)  # 마지막 RNN 출력을 사용
        return out


# Training setup
model = SentimentAnalysisRNN(device=device).to(device)
loss_fn = nn.BCELoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [None]:
# loss_obs = []
# for i in tqdm(range(100)):
#     for batch_idx, samples in enumerate(dataloader):
#         x_batch,y_batch = samples
#         x_batch = x_batch.to(device)
#         y_batch = y_batch.to(device)
        
#         optimizer.zero_grad()
#         predicted = model(x_batch)
        
#         loss = loss_fn(predicted,y_batch.float())
#         loss.backward()
#         optimizer.step()
        
#         if batch_idx%15==0:
#             print(loss,predicted,y_batch)
#             break
        

#     break