In [0]:
'''
  code by Minho Ryu @bzantium
  reference : https://github.com/graykode/nlp-tutorial/blob/master/2-1.TextCNN/TextCNN_Torch.ipynb
'''
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F

from torch import LongTensor as LT
from torch import FloatTensor as FT

# Text-CNN Parameter
embedding_size = 2
sequence_length = 3
num_classes = 2  # 0 or 1
filter_sizes = [2, 2, 2] # n-gram window
num_filters = 3

# words sentences
sentences = ["i love you", "he loves me", "she likes baseball", "i hate you", "sorry for that", "this is awful"]
labels = [1, 1, 1, 0, 0, 0]  # 1 is good, 0 is bad.

word_list = " ".join(sentences).split()
word_list = list(set(word_list))
word_dict = {w: i for i, w in enumerate(word_list)}
vocab_size = len(word_dict)

inputs = []
for sen in sentences:
    inputs.append(np.asarray([word_dict[n] for n in sen.split()]))

targets = []
for out in labels:
    targets.append(out) # To using Torch Softmax Loss function

input_batch = LT(inputs)
target_batch = LT(targets)


class TextCNN(nn.Module):
    def __init__(self, vocab_size, sequence_length, embedding_size, 
                 filter_sizes, num_filters, num_classes):
        super(TextCNN, self).__init__()
        self.vocab_size = vocab_size
        self.sequence_length = sequence_length
        self.embedding_size = embedding_size
        self.filter_sizes = filter_sizes
        self.num_filters = num_filters
        self.num_classes = num_classes
        
        self.num_filters_total = self.num_filters * len(self.filter_sizes)
        self.embeddings = nn.Embedding(self.vocab_size, self.embedding_size)
        self.linear = nn.Linear(self.num_filters_total, self.num_classes)
        self.convs = nn.ModuleList([nn.Conv1d(self.embedding_size, self.num_filters, filter_size) for filter_size in self.filter_sizes])
        self.maxpools = nn.ModuleList([nn.MaxPool1d(self.sequence_length - filter_size + 1) for filter_size in self.filter_sizes])
        
    def forward(self, X):
        embedded_chars = self.embeddings(X) # [batch_size, sequence_length, embedding_size]
        embedded_chars = embedded_chars.permute(0, 2, 1) # [batch_size, embedding_size, sequence_length]
        
        pooled_outputs = []
        for i in range(len(self.filter_sizes)):
            conv = self.convs[i](embedded_chars)
            h = F.relu(conv)
            pooled = self.maxpools[i](h).permute(0, 2, 1)
            pooled_outputs.append(pooled)
        h_pool = torch.cat(pooled_outputs, len(self.filter_sizes) - 1) # [batch_size, 1, (num_filters * 3)]
        h_pool_flat = torch.reshape(h_pool, [-1, self.num_filters_total]) # [batch_size, 1 * (num_filters * 3)]
        logits = self.linear(h_pool_flat)
        return logits

model = TextCNN(vocab_size, sequence_length, embedding_size, filter_sizes, num_filters, num_classes)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training
for epoch in range(5000):
    optimizer.zero_grad()
    output = model(input_batch)

    # output : [batch_size, num_classes], target_batch : [batch_size] (LongTensor, not one-hot)
    loss = criterion(output, target_batch)
    if (epoch + 1) % 1000 == 0:
        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))

    loss.backward()
    optimizer.step()
    
# Test
test_text = ['sorry hate you', 'you love me']
tests = []
for text in test_text:
    tests.append(np.asarray([word_dict[n] for n in text.split()]))
test_batch = LT(tests)

# Predict
result = model(test_batch).data.max(1)[1]
for i, text in enumerate(test_text):
  if result[i] == 0:
      print("\'" + text + "\'", "is bad :(")
  else:
      print("\'"+ text + "\'", "is good :)")

Epoch: 1000 cost = 0.003092
Epoch: 2000 cost = 0.000566
Epoch: 3000 cost = 0.000192
Epoch: 4000 cost = 0.000079
Epoch: 5000 cost = 0.000037
'sorry hate you' is bad :(
'you love me' is good :)
