In [1]:
import torch
import torch.nn as nn

model = torch.nn.RNN(300, 512)

In [2]:
print(torch.__version__)

2.6.0+cu124


In [31]:
from fastai.text.all import *

# Recurrent Neural Networks(RNN)

In [34]:
from datasets import load_dataset

ds = load_dataset("stanfordnlp/imdb")

In [35]:
ds

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 25000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 25000
    })
    unsupervised: Dataset({
        features: ['text', 'label'],
        num_rows: 50000
    })
})

In [5]:
class RNNCell(nn.Module):

    def __init__(self, input_size, hidden_size):
        super(RNNCell, self).__init__()     # nn.Module의 __init__ 호출

        self.input_size = input_size
        self.hidden_size = hidden_size
        self.ih = nn.Linear(input_size, hidden_size)
        self.hh = nn.Linear(hidden_size, hidden_size)

    def forward(self, x, h = None):
        if h is None:
            h = torch.zeros(x.size(0), self.hidden_size)
        h = torch.tanh(self.ih(x) + self.hh(h))
        return h

In [6]:
class RNN(nn.Module):

    def __init__(self, input_size, hidden_size):
        super(RNN, self).__init__()
        self.cell = RNNCell(input_size, hidden_size)

    def forward(self, x, h = None):

        print(x.shape)
        for i in range(x.shape[1]):
            h = self.cell(x[:,i], h)

        return h

In [None]:
class TextClassifier(nn.Module):

    def __init__(self, vocab_size, hidden_size):
        super(TextClassifier, self).__init__()
        self.emb = nn.Embedding(vocab_size, hidden_size)
        self.rnn = RNN(hidden_size, hidden_size)
        self.fc1 = nn.Linear(hidden_size, 10)
        self.fc2 = nn.Linear(10, 1)

    def forward(self, x):

        x = self.emb(x)
        x = self.rnn(x)
        x = self.fc1(x)
        out = self.fc2(x)

        return out

In [38]:
class_names = ds['train'].features['label'].names
class_names

['neg', 'pos']

In [45]:
dls.show_batch()

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
# DatasetDict -> DataBlock (참고 : https://barisdeniz.is-a.dev/posts/fastai-huggingface-datasets/)
def nosplit(items): 
    return list(range(len(items))), []

dblock = DataBlock(
    blocks=(TextBlock(tok_tfm=None), CategoryBlock),
    get_x=lambda record: record['text'],
    get_y=lambda record: class_names[record['label']],
    splitter = nosplit,
)

train_dl = dblock.dataloaders(ds['train']).train
test_dl = dblock.dataloaders(ds['test']).train
dls = DataLoaders(train_dl, test_dl)

dls.show_batch()

AttributeError: 'list' object has no attribute 'truncate'

In [42]:
learn = Learner(dls, TextClassifier(5, 100),
                loss_func=CrossEntropyLossFlat(),
                metrics=accuracy)
learn.fit(10)

epoch,train_loss,valid_loss,accuracy,time


torch.Size([64, 13704, 100])


RuntimeError: CUDA error: CUBLAS_STATUS_ALLOC_FAILED when calling `cublasCreate(handle)`

In [None]:
# torch의 nn.RNN 사용할 경우
class TextClassifier(nn.Module):

    def __init__(self, vocab_size, hidden_size):
        super(TextClassifier, self).__init__()
        self.emb = nn.Embedding(vocab_size, hidden_size)
        self.rnn = nn.RNN(hidden_size, hidden_size, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, 10)
        self.fc2 = nn.Linear(10, 2)

    def forward(self, x):

        x = self.emb(x)
        _, x = self.rnn(x)
        x = self.fc1(x)
        out = self.fc2(x)

        return out

In [None]:
learn = Learner(dls, TextClassifier(len(dls.vocab[0]), 100),
                loss_func=CrossEntropyLossFlat(),
                metrics=accuracy)
learn.fit(10)

# Bidirectional RNN

In [None]:
class TextClassifier(nn.Module):

    def __init__(self, vocab_size, hidden_size):
        super(TextClassifier, self).__init__()
        self.emb = nn.Embedding(vocab_size, hidden_size)
        self.rnn = nn.RNN(hidden_size, hidden_size,
                          bidirectional=True, batch_first=True)
        self.fc1 = nn.Linear(hidden_size * 2, 10)
        self.fc2 = nn.Linear(10, 2)

    def forward(self, x):

        x = self.emb(x)
        _, x = self.rnn(x)
        x = torch.cat((x[0], x[1]), dim=-1)
        x = self.fc1(x)
        out = self.fc2(x)

        return out

In [None]:
learn = Learner(dls, TextClassifier(len(dls.vocab[0]), 100),
                loss_func=CrossEntropyLossFlat(),
                metrics=accuracy)
learn.fit(10)

# Long Short-Term Memory(LSTM)

In [None]:
class TextClassifier(nn.Module):

    def __init__(self, vocab_size, hidden_size):
        super(TextClassifier, self).__init__()
        self.emb = nn.Embedding(vocab_size, hidden_size)
        self.rnn = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, 10)
        self.fc2 = nn.Linear(10, 2)

    def forward(self, x):

        x = self.emb(x)
        x, _ = self.rnn(x)[1]
        x = self.fc1(x)
        out = self.fc2(x)

        return out

In [None]:
learn = Learner(dls, TextClassifier(len(dls.vocab[0]), 100),
                loss_func=CrossEntropyLossFlat(),
                metrics=accuracy)
learn.fit(10)

# Gated Recurrent Units(GRU)

In [None]:
class TextClassifier(nn.Module):

    def __init__(self, vocab_size, hidden_size):
        super(TextClassifier, self).__init__()
        self.emb = nn.Embedding(vocab_size, hidden_size)
        self.rnn = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, 10)
        self.fc2 = nn.Linear(10, 2)

    def forward(self, x):

        x = self.emb(x)
        _, x = self.rnn(x)
        x = self.fc1(x)
        out = self.fc2(x)

        return out

In [None]:
learn = Learner(dls, TextClassifier(len(dls.vocab[0]), 100),
                loss_func=CrossEntropyLossFlat(),
                metrics=accuracy)
learn.fit(10)