In [None]:
import os
import torch
import torch.nn as nn
from torchtext.legacy import data, datasets 
from google.colab import drive
drive.mount('/content/drive')

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

batch_size = 20
learning_rate = 0.001

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
cpu


In [None]:
TEXT = data.Field(sequential=True, batch_first=True, lower=True)
LABEL = data.Field(sequential=False, batch_first=True)
#MORM = data.Field(sequential=False, batch_first=True)
trainset, validset, testset = datasets.SST.splits(TEXT, LABEL)

In [None]:
TEXT.build_vocab(trainset, min_freq=5)
LABEL.build_vocab(trainset)

In [None]:
train_iter, valid_iter, test_iter = data.BucketIterator.splits(
    (trainset, validset, testset), batch_size = batch_size, shuffle = True, repeat = False)

In [None]:
vocab_size = len(TEXT.vocab)
n_classes = 5 # very negative, negative, neutral, positive, very positive 5개

In [None]:
print("[TrainSet]: %d [VallidSet]: %d [TestSet]: %d [Vocab]: %d [Classes] %d"
      % (len(trainset),len(validset), len(testset), vocab_size, n_classes))

[TrainSet]: 8544 [VallidSet]: 1101 [TestSet]: 2210 [Vocab]: 3428 [Classes] 5


In [None]:
class BasicGRU(nn.Module):
    def __init__(self, n_layers, hidden_dim, n_vocab, embed_dim, n_classes, dropout_p=0.2):
        super(BasicGRU, self).__init__()
        self.n_layers = n_layers

        self.embed = nn.Embedding(n_vocab, embed_dim)

        self.hidden_dim = hidden_dim
        self.dropout = nn.Dropout(dropout_p)

        self.gru = nn.GRU(embed_dim, self.hidden_dim,
                          num_layers=self.n_layers,
                          batch_first=True)
        
        self.out = nn.Linear(self.hidden_dim, n_classes)

    def forward(self, x):
        
        x = self.embed(x)
        x, _ = self.gru(x)


        h_t = x[:,-1,:]

        self.dropout(h_t)

        out = self.out(h_t)  # [b, h] -> [b, o]
        return out

In [None]:
model = BasicGRU(1, 256, vocab_size, 128, n_classes, 0.5).to(device)

In [None]:
while 1:
  criterion = torch.nn.CrossEntropyLoss().to(device)    # Softmax
  optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
  epoch = 0
  accuracy_old = 0
  accuracy_new = 0
  accuracy_dif = 0
  accuracy_old = accuracy_new

  for batch in train_iter:
    X, Y = batch.text.to(device), batch.label.to(device)
    Y.data.sub_(1)
    optimizer.zero_grad()
    hypothesis = model(X)
    cost = criterion(hypothesis, Y)
    cost.backward()
    optimizer.step()
  
  model.eval()

  with torch.no_grad():
    for batch in valid_iter:
      X_test, Y_test = batch.text.to(device), batch.label.to(device)
      prediction = model(X_test)
      correct_prediction = torch.argmax(prediction, 1) == Y_test
      accuracy = correct_prediction.float().mean()
      print("Accuracy: ", accuracy.item())
      accuracy_new = accuracy
  batch_size += 1
  learning_rate += 0.001

  accuracy_dif = accuracy_new-accuracy_old
  model.train()
  epoch += 1
  if (accuracy_dif<0.0001): 
    endstr = " 2 epoch 동안 accuracy 증가가 없어 {} epoch 학습 후 종료합니다.".format(epoch)
    print(endstr)
    break


print('Learning Finished!')
torch.save(model.state_dict(), '/content/drive/MyDrive/2017732054_남유상_fin_model.pt')

Accuracy:  0.0
Accuracy:  0.05000000074505806
Accuracy:  0.0
Accuracy:  0.15000000596046448
Accuracy:  0.05000000074505806
Accuracy:  0.15000000596046448
Accuracy:  0.20000000298023224
Accuracy:  0.05000000074505806
Accuracy:  0.30000001192092896
Accuracy:  0.0
Accuracy:  0.10000000149011612
Accuracy:  0.10000000149011612
Accuracy:  0.15000000596046448
Accuracy:  0.05000000074505806
Accuracy:  0.10000000149011612
Accuracy:  0.0
Accuracy:  0.20000000298023224
Accuracy:  0.10000000149011612
Accuracy:  0.25
Accuracy:  0.0
Accuracy:  0.15000000596046448
Accuracy:  0.3499999940395355
Accuracy:  0.10000000149011612
Accuracy:  0.10000000149011612
Accuracy:  0.05000000074505806
Accuracy:  0.05000000074505806
Accuracy:  0.05000000074505806
Accuracy:  0.20000000298023224
Accuracy:  0.05000000074505806
Accuracy:  0.0
Accuracy:  0.15000000596046448
Accuracy:  0.10000000149011612
Accuracy:  0.25
Accuracy:  0.0
Accuracy:  0.15000000596046448
Accuracy:  0.05000000074505806
Accuracy:  0.30000001192092