# Natural Language Processing Assignment: Spam Filter
## Import necessary libs and datasets

In [1]:
import numpy as np
import pandas as pd
import urllib.request

import torch
from torch import nn, optim, autograd
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable
import torch.nn.functional as F
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

urllib.request.urlretrieve("https://raw.githubusercontent.com/mohitgupta-omg/Kaggle-SMS-Spam-Collection-Dataset-/master/spam.csv", filename="spam.csv")
data = pd.read_csv('spam.csv', encoding='latin1')

In [2]:
# check data
data.head()

Unnamed: 0,v1,v2,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,ham,"Go until jurong point, crazy.. Available only ...",,,
1,ham,Ok lar... Joking wif u oni...,,,
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,,,
3,ham,U dun say so early hor... U c already then say...,,,
4,ham,"Nah I don't think he goes to usf, he lives aro...",,,


In [3]:
del data['Unnamed: 2']
del data['Unnamed: 3']
del data['Unnamed: 4']

data['v1'] = data['v1'].replace(['ham','spam'],[0,1])
data['text'] = data['v2']
data['isSpam'] = data['v1']

del data['v1'], data['v2']

print(f'Data Shape: {data.shape}')
# imbalanced data
print(data['isSpam'].value_counts())
data.head()

Data Shape: (5572, 2)
0    4825
1     747
Name: isSpam, dtype: int64


Unnamed: 0,text,isSpam
0,"Go until jurong point, crazy.. Available only ...",0
1,Ok lar... Joking wif u oni...,0
2,Free entry in 2 a wkly comp to win FA Cup fina...,1
3,U dun say so early hor... U c already then say...,0
4,"Nah I don't think he goes to usf, he lives aro...",0


## train, test split
### 평가에 사용할 예정이니 트레인, 테스트 스플릿 코드는 그대로 유지시켜주세요

In [4]:
from sklearn.model_selection import train_test_split

X, y = data['text'], data['isSpam']
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0,
                                                   stratify=y, test_size=0.1)

print(len(X_train), len(X_test))

5014 558


## Preprocessing
### 텍스트 전처리함수입니다.

In [5]:
import re
from nltk.corpus import stopwords
# nltk.download()

def preprocess(string: str, *args, **kwargs) -> str:
    '''
    1. remove puctuations
    2. remove stop words
    3. lower case
    '''
    # to lower case
    low_str = string.lower()
    
    # remove punctuation
    nopunc = re.sub(r'[^\w\s]', '', low_str)
    
    # remove stop words
    STOPWORDS = stopwords.words('english') + ['u', 'ü', 'ur', '4', '2', 'im', 'dont', 'doin', 'ure']
    return ' '.join([word for word in nopunc.split() if word not in STOPWORDS])

In [6]:
# Walkthrough
cleaned = X_train.apply(preprocess)
cleaned.head()

5448                              aight pick open tonight
1707                test earlier appreciate call tomorrow
2117      wish many many returns day happy birthday vikky
1357    good afternoon loverboy goes day luck come way...
787     ever thought living good life perfect partner ...
Name: text, dtype: object

#### 앞에서 보셨다시피 raw text를 그대로 사용하기엔 무리가 있습니다.(특수기호 및 불용어 문제 등)
#### 따라서 전처리되지 않은 raw string을 전처리하는 함수를 만들어주세요. <br>
```python
preprocess('Helllllo World-!') = 'hello world'
```
<br>

#### ```re``` library를 이용해서 전처리를 쉽게 할 수 있습니다.


[re documentation](https://docs.python.org/3/library/re.html)

## Tokenizing
### 전처리된 텍스트를 토크나이징 해주는 함수입니다.
#### ```SpaCy, nltk``` 등 영어 tokenizing 라이브러리를 쓰셔도 괜찮습니다.

In [7]:
def tokenize(string: str, *args, **kwargs) -> list:
    '''
    return tokenized text as a list
    '''
    return string.split()

In [8]:
# Walkthrough
tokens = cleaned.apply(tokenize)
tokens.head()

5448                         [aight, pick, open, tonight]
1707          [test, earlier, appreciate, call, tomorrow]
2117    [wish, many, many, returns, day, happy, birthd...
1357    [good, afternoon, loverboy, goes, day, luck, c...
787     [ever, thought, living, good, life, perfect, p...
Name: text, dtype: object

<br>

Ex) 
```python
tokenize('hello world!',  *args, **kwargs) = ['hello', 'world']
```

## Build Vocabulary
### 토큰들을 이용해서 자주 등장한 순서대로 n개의 원소를 갖는 딕셔너리를 만들어주세요.

In [9]:
from collections import Counter

def build_vocab(n, tokens, *args, **kwargs):
    '''
    In custom dataset, get list of tokenized sentences as input
    padding_idx = 0
    unk_idx = 1
    '''
    # clean_txt = data.text.apply(preprocess)
    # tokens = clean_txt.apply(tokenize)
    vocab = Counter()

    for msg in tokens:
        vocab.update(msg)
    
    vocabulary = dict()
    vocabulary['padding_idx'] = 0
    vocabulary['unk_idx'] = 1
    
    for i in range(n-2):
        vocabulary[vocab.most_common()[i][0]] = i+2
    return vocabulary

In [10]:
# Walkthrough
vocab = build_vocab(5000, tokens)
vocab

{'padding_idx': 0,
 'unk_idx': 1,
 'call': 2,
 'get': 3,
 'go': 4,
 'free': 5,
 'ok': 6,
 'ltgt': 7,
 'know': 8,
 'got': 9,
 'like': 10,
 'ill': 11,
 'good': 12,
 'come': 13,
 'time': 14,
 'day': 15,
 'love': 16,
 'send': 17,
 'want': 18,
 'text': 19,
 'one': 20,
 'going': 21,
 'need': 22,
 'txt': 23,
 'r': 24,
 'home': 25,
 'lor': 26,
 'still': 27,
 'see': 28,
 'back': 29,
 'sorry': 30,
 'today': 31,
 'stop': 32,
 'tell': 33,
 'n': 34,
 'mobile': 35,
 'reply': 36,
 'later': 37,
 'new': 38,
 'hi': 39,
 'think': 40,
 'well': 41,
 'please': 42,
 'da': 43,
 'cant': 44,
 'phone': 45,
 'take': 46,
 'week': 47,
 'night': 48,
 'oh': 49,
 'ì_': 50,
 'hey': 51,
 'happy': 52,
 'great': 53,
 'much': 54,
 'dear': 55,
 'pls': 56,
 'claim': 57,
 'hope': 58,
 'way': 59,
 'make': 60,
 'work': 61,
 'give': 62,
 'thats': 63,
 'wat': 64,
 'right': 65,
 'already': 66,
 'say': 67,
 'prize': 68,
 'ask': 69,
 'said': 70,
 'number': 71,
 'yes': 72,
 '1': 73,
 'yeah': 74,
 'c': 75,
 'message': 76,
 'tomorrow':

<br>

Ex) 
```python
vocab = build_vocab(4, *args, **kwargs)
vocab = {'padding_idx': 0, 'unk_idx': 1, 'hello': 2, 'world': 3}
```

#### 여기서 ```padding_idx```는 패딩에 쓰이는 인덱스, ```unk_idx```는 unknown token을 의미합니다.

### toTensor
#### 토큰들을 텐서로 바꿔주는 함수입니다.

In [11]:
def toTensor(max_length, tokens, vocab, labels, *args, **kwargs):
    # Tokenize & Vectorize sequences
    vectorized_seqs = []
    for msg in tokens:
        vec = [0] * len(msg)
        for i, token in enumerate(msg):
            if token in vocab.keys():
                vec[i] = vocab[token]
            else:
                vec[i] = vocab['unk_idx']
        vectorized_seqs.append(vec)

    # 전처리 과정에서 seq_length = 0 되는 것들 제거
    # e.g. pd.Series(vec_seq)[pd.Series(vec_seq).apply(sum) == 0]
    label_tensor = torch.tensor(np.array(labels))
    idx = pd.Series(vectorized_seqs)[pd.Series(vectorized_seqs).apply(sum) != 0].index
    vectorized_seqs = pd.Series(vectorized_seqs)[idx]
    label_tensor = pd.Series(label_tensor)[idx]
    vectorized_seqs = vectorized_seqs.tolist()
    label_tensor = torch.tensor(np.array(label_tensor))
    
    # Save the lengths of sequences
    seq_lengths = torch.LongTensor(list(map(len, vectorized_seqs)))
    
    
    # Add padding(0)
    seq_tensor = Variable(torch.zeros((len(vectorized_seqs), max_length))).long()
    for idx, (seq, seqlen) in enumerate(zip(vectorized_seqs, seq_lengths)):
        
        if seqlen <= max_length:
            seq_tensor[idx, :seqlen] = torch.LongTensor(seq)
        else:
            seq_tensor[idx, ] = torch.LongTensor(seq[:max_length])
   
    return seq_tensor, seq_lengths, label_tensor

In [12]:
# Walkthrough
seq_tensor, seq_lengths, label_tensor = toTensor(100, tokens, vocab, y_train)
print(len(seq_tensor), len(seq_lengths), len(label_tensor))

5006 5006 5006


시퀀스의 max length가 5일 때 다음과 같습니다.
<br>

Ex)
```python
toTensor(5, ['hello', 'world!', 'yonsei']) = torch.LongTensor([2, 3, 1, 0, 0])
```

여기서 ```yonsei``` 단어는 아까 만든 단어장(vocab)에 포함되지 않은 단어로 ```unk_idx```로 처리됩니다.

### 위의 함수들을 이용하고 적절한 코드 및 parameter를 적용해서 
### MailDataset과 train에 쓸 DataLoader를 만들어주세요.

In [13]:
# train/test, input/label을 앞서 나눠놓았으므로 shuffle은 하지 않는걸로,,, 귀찮,,,
import torch.utils.data.sampler as splr

class CustomDataLoader(object):
    def __init__(self, seq_tensor, seq_lengths, label_tensor, batch_size):
        self.batch_size = batch_size
        self.seq_tensor = seq_tensor
        self.seq_lengths = seq_lengths
        self.label_tensor = label_tensor
        self.sampler = splr.BatchSampler(splr.RandomSampler(self.label_tensor), self.batch_size, False)
        self.sampler_iter = iter(self.sampler)
        
    def __iter__(self):
        self.sampler_iter = iter(self.sampler) # reset sampler iterator
        return self

    def _next_index(self):
        return next(self.sampler_iter) # may raise StopIteration

    def __next__(self):
        index = self._next_index()

        subset_seq_tensor = self.seq_tensor[index]
        subset_seq_lengths = self.seq_lengths[index]
        subset_label_tensor = self.label_tensor[index]

        # order by length to use pack_padded_sequence()
        subset_seq_lengths, perm_idx = subset_seq_lengths.sort(0, descending=True)
        subset_seq_tensor = subset_seq_tensor[perm_idx]
        subset_label_tensor = subset_label_tensor[perm_idx]

        return subset_seq_tensor, subset_seq_lengths, subset_label_tensor

    def __len__(self):
        return len(self.sampler)

In [14]:
batch_size = 32
max_length = 100

train_cleaned, test_cleaned = X_train.apply(preprocess), X_test.apply(preprocess)

train_tokens, test_tokens = train_cleaned.apply(tokenize), test_cleaned.apply(tokenize)

vocab = build_vocab(5000, X.apply(preprocess).apply(tokenize))

train_seq_tensor, train_seq_lengths, train_label = toTensor(max_length, train_tokens, vocab, y_train)
test_seq_tensor, test_seq_lengths, test_label = toTensor(max_length, test_tokens, vocab, y_test)

train_loader = CustomDataLoader(train_seq_tensor, train_seq_lengths, train_label, batch_size)
test_loader = CustomDataLoader(test_seq_tensor, test_seq_lengths, test_label, batch_size)

train_size = len(train_loader.seq_tensor)
test_size = len(test_loader.seq_tensor)

In [15]:
# sanity check
print(len(train_loader.seq_tensor), len(train_loader.seq_lengths), len(train_loader.label_tensor))

5006 5006 5006


### 훈련 인스턴스를 사용해서 train 함수를 통해 training을 해주시고,
### eval 함수를 통해 40개의 test example에 대해서 accuracy를 측정해주세요.
### 함수 및 클래스 signature와 내부 코드는 적절히 알아서 짜주시면 됩니다.

In [16]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# torch.manual_seed(1)
if device == 'cuda':
    torch.cuda.manual_seed_all(1)

print(device)

cpu


In [17]:
class SpamClassifier(nn.Module):
    '''
    vanila LSTM
    '''
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_size, n_layers,\
                 drop_lstm=0.1, drop_out = 0.1):

        super().__init__()

        self.output_size = output_size
        self.n_layers = n_layers
        self.hidden_dim = hidden_dim
        
        # embedding 
        self.embedding = nn.Embedding(vocab_size, embedding_dim)

        # LSTM layers
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers, 
                            dropout=drop_lstm, batch_first=True)
        
        # dropout layer
        self.dropout = nn.Dropout(drop_out)
        
        # linear and sigmoid layers
        self.fc = nn.Linear(hidden_dim, output_size)
        self.sig = nn.Sigmoid()
        

    def forward(self, x, seq_lengths):

        # embeddings
        embedded_seq_tensor = self.embedding(x)
                
        # pack, remove pads
        packed_input = pack_padded_sequence(embedded_seq_tensor, seq_lengths.cpu().numpy(), batch_first=True)
        
        # lstm
        packed_output, (ht, ct) = self.lstm(packed_input, None)
          # https://pytorch.org/docs/stable/_modules/torch/nn/modules/rnn.html
          # If `(h_0, c_0)` is not provided, both **h_0** and **c_0** default to zero

        # unpack, recover padded sequence
        output, input_sizes = pad_packed_sequence(packed_output, batch_first=True)
       
        # collect the last output in each batch
        # last_idxs = (input_sizes - 1).to(device)
        last_idxs = input_sizes - torch.ones_like(input_sizes)
        output = torch.gather(output, 1, last_idxs.view(-1, 1).unsqueeze(2).repeat(1, 1, self.hidden_dim)).squeeze() # [batch_size, hidden_dim]
        
        # dropout and fully-connected layer
        output = self.dropout(output)
        output = self.fc(output).squeeze()
               
        # sigmoid function
        output = self.sig(output)
        
        return output

In [18]:
class SpamClassifier2(nn.Module):
    '''
    Bi-LSTM
    '''
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_size, n_layers,
                 drop_lstm=0.1, drop_out = 0.1):

        super().__init__()

        self.output_size = output_size
        self.n_layers = n_layers
        self.hidden_dim = hidden_dim
        
        # embedding 
        self.embedding = nn.Embedding(vocab_size, embedding_dim)

        # LSTM layers
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers, 
                            dropout=drop_lstm, bidirectional=True, batch_first=True)
        
        # dropout layer
        self.dropout = nn.Dropout(drop_out)
        
        # linear and sigmoid layers
        self.fc = nn.Linear(hidden_dim*2, output_size)
        self.sig = nn.Sigmoid()
        

    def forward(self, x, seq_lengths):

        # embeddings
        embedded_seq_tensor = self.embedding(x)
                
        # pack, remove pads
        packed_input = pack_padded_sequence(embedded_seq_tensor, seq_lengths.cpu().numpy(), batch_first=True)
        # [num_layers*2, batch_size, hidden_dim]
        (h_0, c_0) = (Variable(torch.zeros(self.n_layers*2, x.size(0), self.hidden_dim)),
                      Variable(torch.zeros(self.n_layers*2, x.size(0), self.hidden_dim)))
        # lstm
        packed_output, (ht, ct) = self.lstm(packed_input, (h_0, c_0))
        # https://pytorch.org/docs/stable/_modules/torch/nn/modules/rnn.html
        # If `(h_0, c_0)` is not provided, both **h_0** and **c_0** default to zero

        # unpack, recover padded sequence
        output, input_sizes = pad_packed_sequence(packed_output, batch_first=True)
       
        # collect the last output in each batch
        # last_idxs = (input_sizes - 1).to(device)
        last_idxs = input_sizes - torch.ones_like(input_sizes)
        # print(last_idxs.size())
        # print(last_idxs.view(-1, 1).size())
        # print(last_idxs.view(-1, 1).unsqueeze(2).size())
        # print(last_idxs.view(-1, 1).unsqueeze(2).repeat(1, 1, self.hidden_dim).size())
        # print(last_idxs.view(-1, 1).unsqueeze(2).repeat(1, 1, self.hidden_dim).squeeze().size())
        # output = torch.gather(output, 1, last_idxs.view(-1, 1).unsqueeze(2).repeat(1, 1, self.hidden_dim)).squeeze() # [batch_size, hidden_dim]
        output = torch.gather(output, 1, last_idxs.view(-1, 1).unsqueeze(2).repeat(1, 1, 2*self.hidden_dim)).squeeze()
        # print(output.size())
        
        # dropout and fully-connected layer
        output = self.dropout(output)
        output = self.fc(output).squeeze()
               
        # sigmoid function
        output = self.sig(output)
        
        return output

In [27]:
def train(model, loss_criterion, optimizer, epochs):
    '''
    
    '''
    clip = 5 # gradient clipping
    history = []
    best_acc = 0.0

    for epoch in range(epochs):
        print("Epoch: {}/{}".format(epoch+1, epochs))
        
        # Set to training mode
        model.train()
        
        # Loss and Accuracy within the epoch
        train_loss = 0.0
        train_acc = 0.0
        i = 0
        
        for seq_tensor, seq_tensor_lengths, label in iter(train_loader):
            i += 1
            
            seq_tensor = seq_tensor.to(device)
            seq_tensor_lengths = seq_tensor_lengths.to(device)
            label = label.to(device)
 
            # get the output from the model
            output = net(seq_tensor, seq_tensor_lengths)
    
            # get the loss and backprop
            loss = loss_criterion(output, label.float())
            optimizer.zero_grad() 
            loss.backward()
        
            # prevent the exploding gradient
            nn.utils.clip_grad_norm_(net.parameters(), clip)
            optimizer.step()
            
            # Compute the total loss for the batch and add it to train_loss
            train_loss += loss.item() * seq_tensor.size(0)
            
            # Compute the accuracy
            binary_output = (output >= 0.5).short() # short(): torch.int16
            correct_counts = torch.eq(binary_output, label)
            
            # Convert correct_counts to float and then compute the mean
            acc = torch.mean(correct_counts.type(torch.FloatTensor))
            
            # Compute total accuracy in the whole batch and add to train_acc
            train_acc += acc.item() * seq_tensor.size(0)
            
            if i%20==0:
                print("Batch number: {}, Training: Loss: {:.4f}, Accuracy: {:.4f}".format(i, loss.item(), acc.item()))
        
        # Find average training loss and training accuracy
        avg_train_loss = train_loss/train_size
        avg_train_acc = train_acc/train_size
        print("Epoch : {:03d}, Training: Loss: {:.4f}, Accuracy: {:.4f}%".format(epoch+1, avg_train_loss, avg_train_acc*100))

In [20]:
def eval(model, criterion):
    test_losses = []
    sums = []
    sizes = []

    model.eval()
    
    test_losses = []
    for seq_tensor, seq_tensor_lengths, label in iter(test_loader):
        seq_tensor = seq_tensor.to(device)
        seq_tensor_lengths = seq_tensor_lengths.to(device)
        label = label.to(device)
        output = net(seq_tensor, seq_tensor_lengths)
        
        # losses
        test_loss = criterion(output, label.float())     
        test_losses.append(test_loss.item())

        # accuracy
        binary_output = (output >= 0.5).short() # short(): torch.int16
        right_or_not = torch.eq(binary_output, label)
        sums.append(torch.sum(right_or_not).float().item())
        sizes.append(right_or_not.shape[0])

    accuracy = np.sum(sums) / np.sum(sizes)
    print("Test Loss: {:.6f}\t".format(np.mean(test_losses)),
          "Accuracy: {:.3f}".format(accuracy))

## Execution

In [28]:
# Instantiate the model w/ hyperparams
vocab_size = len(vocab)
embedding_dim = 100 # int(vocab_size ** 0.25) # 8
hidden_dim = 8
output_size = 1
n_layers = 2

# LSTM
net = SpamClassifier(vocab_size, embedding_dim, hidden_dim,
                     output_size, n_layers, 0.2, 0.5)
net = net.to(device)
print(net)
print()

# Bi-LSTM
net2 = SpamClassifier2(vocab_size, embedding_dim, hidden_dim,
                     output_size, n_layers, 0.2, 0.5)
net2 = net2.to(device)
print(net2)

SpamClassifier(
  (embedding): Embedding(5000, 100)
  (lstm): LSTM(100, 8, num_layers=2, batch_first=True, dropout=0.2)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=8, out_features=1, bias=True)
  (sig): Sigmoid()
)

SpamClassifier2(
  (embedding): Embedding(5000, 100)
  (lstm): LSTM(100, 8, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=16, out_features=1, bias=True)
  (sig): Sigmoid()
)


In [29]:
# loss and optimization functions
epochs = 10
lr=0.03

In [30]:
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(net.parameters(), lr=lr)

train(net, criterion, optimizer, epochs)

Epoch: 1/10
Batch number: 20, Training: Loss: 0.1635, Accuracy: 0.9688
Batch number: 40, Training: Loss: 0.3579, Accuracy: 0.8438
Batch number: 60, Training: Loss: 0.1673, Accuracy: 0.9375
Batch number: 80, Training: Loss: 0.2966, Accuracy: 0.9062
Batch number: 100, Training: Loss: 0.0718, Accuracy: 1.0000
Batch number: 120, Training: Loss: 0.1166, Accuracy: 0.9688
Batch number: 140, Training: Loss: 0.0209, Accuracy: 1.0000
Epoch : 001, Training: Loss: 0.2296, Accuracy: 91.8098%
Epoch: 2/10
Batch number: 20, Training: Loss: 0.0104, Accuracy: 1.0000
Batch number: 40, Training: Loss: 0.0058, Accuracy: 1.0000
Batch number: 60, Training: Loss: 0.0345, Accuracy: 1.0000
Batch number: 80, Training: Loss: 0.2216, Accuracy: 0.9688
Batch number: 100, Training: Loss: 0.1836, Accuracy: 0.8750
Batch number: 120, Training: Loss: 0.1081, Accuracy: 0.9688
Batch number: 140, Training: Loss: 0.0155, Accuracy: 1.0000
Epoch : 002, Training: Loss: 0.0683, Accuracy: 98.1023%
Epoch: 3/10
Batch number: 20, Tr

In [31]:
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(net2.parameters(), lr=lr)

train(net2, criterion, optimizer, epochs)

Epoch: 1/10
Batch number: 20, Training: Loss: 0.0140, Accuracy: 1.0000
Batch number: 40, Training: Loss: 0.0124, Accuracy: 1.0000
Batch number: 60, Training: Loss: 0.0067, Accuracy: 1.0000
Batch number: 80, Training: Loss: 0.0990, Accuracy: 0.9375
Batch number: 100, Training: Loss: 0.2961, Accuracy: 0.9062
Batch number: 120, Training: Loss: 0.0059, Accuracy: 1.0000
Batch number: 140, Training: Loss: 0.0280, Accuracy: 1.0000
Epoch : 001, Training: Loss: 0.0412, Accuracy: 98.7615%
Epoch: 2/10
Batch number: 20, Training: Loss: 0.0062, Accuracy: 1.0000
Batch number: 40, Training: Loss: 0.0117, Accuracy: 1.0000
Batch number: 60, Training: Loss: 0.0178, Accuracy: 1.0000
Batch number: 80, Training: Loss: 0.0089, Accuracy: 1.0000
Batch number: 100, Training: Loss: 0.0596, Accuracy: 0.9688
Batch number: 120, Training: Loss: 0.1348, Accuracy: 0.9375
Batch number: 140, Training: Loss: 0.0874, Accuracy: 0.9688
Epoch : 002, Training: Loss: 0.0398, Accuracy: 98.8813%
Epoch: 3/10
Batch number: 20, Tr

In [32]:
print("Using LSTM")
eval(net, criterion)
print("Using Bi-LSTM")
eval(net2, criterion)

Using LSTM
Test Loss: 0.096052	 Accuracy: 0.978
Using Bi-LSTM
Test Loss: 0.104543	 Accuracy: 0.978


**More to Improve...**  
- activation function : `nn.Sigmoid()`외에 `F.log_softmax()` 등 사용 가능  

- validation set 나눠서 가장 성능 좋은 모델 저장해두기  

- epoch 수 늘리기, dropout rate 달리하기  

- loss function : `BCELoss`외에 `NLLLoss`, `CrossEntropyLoss` 등 사용 가능