In [2]:
# http://pytorch.org/
from os.path import exists
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())
cuda_output = !ldconfig -p|grep cudart.so|sed -e 's/.*\.\([0-9]*\)\.\([0-9]*\)$/cu\1\2/'
accelerator = cuda_output[0] if exists('/dev/nvidia0') else 'cpu'

In [3]:
'''
  code by Tae Hwan Jung(Jeff Jung) @graykode
'''
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F


dtype = torch.FloatTensor

# Text-CNN Parameter
embedding_size = 2 # n-gram
sequence_length = 3
num_classes = 2  # 0 or 1
filter_sizes = [2, 2, 2] # n-gram window
num_filters = 3

# 3 words sentences (=sequence_length is 3)
sentences = ["i love you", "he loves me", "she likes baseball", "i hate you", "sorry for that", "this is awful"]
labels = [1, 1, 1, 0, 0, 0]  # 1 is good, 0 is not good.

word_list = " ".join(sentences).split()#空格分词
word_list = list(set(word_list))#去重建立列表
word_dict = {w: i for i, w in enumerate(word_list)} #创建词->index的dict
vocab_size = len(word_dict)#词表大小

inputs = []
for sen in sentences:
    inputs.append(np.asarray([word_dict[n] for n in sen.split()])) #把句子转换为各个词编号的组合 np.asarray()把输入转换为array

targets = []
for out in labels:
    targets.append(out) # To using Torch Softmax Loss function
print(inputs)
print(targets)


[array([4, 1, 2]), array([13,  3,  8]), array([9, 0, 5]), array([ 4, 14,  2]), array([10, 11, 12]), array([ 6, 15,  7])]
[1, 1, 1, 0, 0, 0]


In [62]:

#输入输出Variable化
input_batch = Variable(torch.LongTensor(inputs))
target_batch = Variable(torch.LongTensor(targets))


class TextCNN(nn.Module):
    def __init__(self):
        super(TextCNN, self).__init__()

        self.num_filters_total = num_filters * len(filter_sizes)
        self.W = nn.Parameter(torch.empty(vocab_size, embedding_size).uniform_(-1, 1)).type(dtype) #[vocab_size,embedding_size]
        #uniform对这个tensor操作,具有从连续均匀分布中采样的数字 
        self.Weight = nn.Parameter(torch.empty(self.num_filters_total, num_classes).uniform_(-1, 1)).type(dtype)#[num_filters_total,num_classes]
        self.Bias = nn.Parameter(0.1 * torch.ones([num_classes])).type(dtype)#偏置

    def forward(self, X):
        embedded_chars = self.W[X] # [batch_size, sequence_length, sequence_length] 这是一个元素为三个词嵌入的向量的列表的列表
        embedded_chars = embedded_chars.unsqueeze(1) # add channel(=1) [batch, channel(=1), sequence_length, embedding_size]  #对数据维度进行扩充。给指定位置加上维数为一的维度

        pooled_outputs = []
        for filter_size in filter_sizes:
            # conv : [input_channel(=1), output_channel(=3), (filter_height, filter_width), bias_option] 进行高维卷积
            conv = nn.Conv2d(1, num_filters, (filter_size, embedding_size), bias=True)(embedded_chars)#embedding_size = 2 # n-gram sequence_length = 3 num_classes = 2  # 0 or 1 filter_sizes = [2, 2, 2] num_filters = 3       这里是2*2的卷积核 等于说对一个3*2的矩阵进行卷积
            h = F.relu(conv) #激活函数Relu
            # mp : ((filter_height, filter_width))
            mp = nn.MaxPool2d((sequence_length - filter_size + 1, 1)) #池化
            # pooled : [batch_size(=6), output_height(=1), output_width(=1), output_channel(=3)]
            pooled = mp(h).permute(0, 3, 2, 1) #将tensor的维度换位
            pooled_outputs.append(pooled)

        h_pool = torch.cat(pooled_outputs, len(filter_sizes)) # [batch_size(=6), output_height(=1), output_width(=1), output_channel(=3) * 3] #拼接矩阵
        h_pool_flat = torch.reshape(h_pool, [-1, self.num_filters_total]) # [batch_size(=6), output_height * output_width * (output_channel * 3)]#整理形状

        model = torch.mm(h_pool_flat, self.Weight) + self.Bias # [batch_size, num_classes] #输出
        return model

model = TextCNN()
#这里最好先看完下面具体的形状再来看上面的代码
print(model)
print(model.W[:2])
print(model.W[input_batch][:2])
embedded_chars=model.W[input_batch].unsqueeze(1)
print('embedded_chars:',embedded_chars[:2])
print(embedded_chars.shape)

TextCNN()
tensor([[ 0.5544,  0.0455],
        [-0.1759,  0.1618]], grad_fn=<SliceBackward>)
tensor([[[ 0.8345,  0.8912],
         [-0.1759,  0.1618],
         [-0.2388, -0.0422]],

        [[ 0.8417,  0.3048],
         [ 0.3007,  0.5591],
         [ 0.9753, -0.5198]]], grad_fn=<SliceBackward>)
embedded_chars: tensor([[[[ 0.8345,  0.8912],
          [-0.1759,  0.1618],
          [-0.2388, -0.0422]]],


        [[[ 0.8417,  0.3048],
          [ 0.3007,  0.5591],
          [ 0.9753, -0.5198]]]], grad_fn=<SliceBackward>)
torch.Size([6, 1, 3, 2])


In [37]:

criterion = nn.CrossEntropyLoss() #交叉熵损失函数
optimizer = optim.Adam(model.parameters(), lr=0.001) #Adam优化器

# Training
for epoch in range(5000):
    optimizer.zero_grad() #梯度清零
    output = model(input_batch) #进行预测

    # output : [batch_size, num_classes], target_batch : [batch_size] (LongTensor, not one-hot)
    loss = criterion(output, target_batch) #误差计算
    if (epoch + 1) % 1000 == 0:
        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))

    loss.backward()#反向传播
    optimizer.step()#下一步

# Test
test_text = 'sorry hate you'
tests = [np.asarray([word_dict[n] for n in test_text.split()])]
test_batch = Variable(torch.LongTensor(tests))

# Predict
predict = model(test_batch).data.max(1, keepdim=True)[1]
if predict[0][0] == 0:
    print(test_text,"is Bad Mean...")
else:
    print(test_text,"is Good Mean!!")

Epoch: 1000 cost = 0.510522
Epoch: 2000 cost = 0.201033
Epoch: 3000 cost = 0.143718
Epoch: 4000 cost = 0.153680
Epoch: 5000 cost = 0.026367
sorry hate you is Bad Mean...
