In [7]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F
    

In [9]:
dtype = torch.FloatTensor
## 데이터 타입 

# Text-CNN Parameter
embedding_size = 2 # n-gram
sequence_length = 3
num_classes = 2  # 0 or 1
filter_sizes = [2, 2, 2] # n-gram window
num_filters = 3
## Text-CNN 파라미터들을 결정해 줍니다.

# 3 words sentences (=sequence_length is 3)
sentences = ["i love you", "he loves him", "she likes baseball", "i hate you", "sorry for that", "this is awful"]
labels = [1, 1, 1, 0, 0, 0]  # 1 is good, 0 is not good.

word_list = " ".join(sentences).split()
word_list = list(set(word_list))
## 단어 쪼개서 중복 제거해서 리스트만든당
word_dict = {w : i for i, w in enumerate(word_list)} 
## 단어 쪼갠 것들에 인덱스 부여해서 단어를 키, 인덱스를 밸류로 갖는 딕셔너리 ㅅ애성
vocab_size = len(word_dict)
#

inputs = []
for sen in sentences:
    inputs.append(np.asarray([word_dict[n] for n in sen.split()]))
    # 한문장씩 sen 이걸 쪼갠거에서 n을 뽑고, 
    # word_dict[n] 으로 n을 넣는다. 한 워드겠지, 그럼이걸 딕셔너리에 넣으면 인덱스가 뽑아진다. 
    # 즉, ASARRAY()  는 단어들의 문장을 단어 인덱스의 리스트로 만들어주는 것이고
    # 각 문장들을 숫자배열로 바꾼 INPUT을 만들어주게 된다. 

targets = []
for out in labels:
    targets.append(out) # To using Torch Softmax Loss function
    # LABELS 는 긍정부정잉ㄴ데 하나씪 꺼내서TARGETS에 추가. 근데 걍 복사같은데 이거 왜하지 

input_batch = Variable(torch.LongTensor(inputs))
target_batch = Variable(torch.LongTensor(targets))



In [10]:
class TextCNN(nn.Module):
    def __init__(self):
        super(TextCNN, self).__init__()

        self.num_filters_total = num_filters * len(filter_sizes)
        self.W = nn.Parameter(torch.empty(vocab_size, embedding_size).uniform_(-1, 1)).type(dtype)
        self.Weight = nn.Parameter(torch.empty(self.num_filters_total, num_classes).uniform_(-1, 1)).type(dtype)
        self.Bias = nn.Parameter(0.1 * torch.ones([num_classes])).type(dtype)

        
        
    def forward(self, X):
        embedded_chars = self.W[X] # [batch_size, sequence_length, sequence_length]
        embedded_chars = embedded_chars.unsqueeze(1) 
        # add channel(=1) [batch, channel(=1), sequence_length, embedding_size]

        pooled_outputs = []
        for filter_size in filter_sizes:
            # conv : [input_channel(=1), output_channel(=3), (filter_height, filter_width), bias_option]
            conv = nn.Conv2d(1, num_filters, (filter_size, embedding_size), bias=True)(embedded_chars)
            h = F.relu(conv)
            
            # mp : ((filter_height, filter_width))
            mp = nn.MaxPool2d((sequence_length - filter_size + 1, 1))
            #mp 틀을 만들어주고
            
            # pooled : [batch_size(=6), output_height(=1), output_width(=1), output_channel(=3)]
            pooled = mp(h).permute(0, 3, 2, 1)
            # pooled는 만들어준 것들을 채워준다. 그리고 나서 크기를 바꿔주는거고
            pooled_outputs.append(pooled)
            
        ## 여기서 풀링된 결과가 pooled outputs로 나오게 되고@
        
        h_pool = torch.cat(pooled_outputs, len(filter_sizes)) 
        # [batch_size(=6), output_height(=1), output_width(=1), output_channel(=3) * 3]
        h_pool_flat = torch.reshape(h_pool, [-1, self.num_filters_total]) 
        # [batch_size(=6), output_height * output_width * (output_channel * 3)]

        model = torch.mm(h_pool_flat, self.Weight) + self.Bias # [batch_size, num_classes]
        return model

model = TextCNN()
# 모델을 생성했고, 

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# crossentropyloss를 사용, optimizer는 아담 옵티마이져를 사용한다. 


In [13]:
pooled_outputs

NameError: name 'pooled_outputs' is not defined

In [11]:

# Training
for epoch in range(5000):
    optimizer.zero_grad()
    output = model(input_batch)

    # output : [batch_size, num_classes], target_batch : [batch_size] (LongTensor, not one-hot)
    loss = criterion(output, target_batch)
    if (epoch + 1) % 1000 == 0:
        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))

    loss.backward()
    optimizer.step()
    #backward로 가고, optimizer 다음단계로~~


Epoch: 1000 cost = 0.617534
Epoch: 2000 cost = 0.197633
Epoch: 3000 cost = 0.164820
Epoch: 4000 cost = 0.076932
Epoch: 5000 cost = 0.063234


In [12]:

# Test
test_text = 'she hate awful he '
tests = [np.asarray([word_dict[n] for n in test_text.split()])]
test_batch = Variable(torch.LongTensor(tests))

# Predict
predict = model(test_batch).data.max(1, keepdim=True)[1]
if predict[0][0] == 0:
    print(test_text,"is Bad Mean...")
else:
    print(test_text,"is Good Mean!!")

she hate awful he  is Good Mean!!
