In [14]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all" 

In [15]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import os
from torch.nn import init
from torchtext import data
from torchtext.vocab import Vectors
from torchtext.vocab import GloVe
import time

# data processing

In [16]:
df=pd.read_csv('data/train.tsv',sep='\t')
df.head()

Unnamed: 0,id,text,aspect,polarity
0,0,out of the hundreds of italian restaurants in ...,anecdotes miscellaneous,1
1,1,i go twice a month,anecdotes miscellaneous,0
2,2,my fiance took me to scopa last week for my bi...,anecdotes miscellaneous,0
3,3,incredible food at a very agreable price bring...,price,1
4,4,when you are sitting in their main dining room...,ambience,1


In [17]:
tokenize = lambda x: x.split()
TEXT = data.Field(sequential=True, tokenize=tokenize, lower=True)
ASPECT = data.Field(sequential=True,
                    tokenize=tokenize,
                    lower=True)
LABEL = data.Field(sequential=False, use_vocab=False)

In [18]:
train, val = data.TabularDataset.splits(path='data/',
                                        skip_header=True,
                                        train='train.tsv',
                                        validation='test.tsv',
                                        format='tsv',
                                        fields=[('text', TEXT),
                                                ('aspect', ASPECT),
                                                ('polarity', LABEL)])

In [19]:
cache = 'data/.vector_cache'
if not os.path.exists(cache):
    os.mkdir(cache)
vectors = Vectors(name='data/glove.6B/glove.6B.300d.txt')

In [20]:
TEXT.build_vocab(train, val, vectors=vectors)
ASPECT.build_vocab(train, val, vectors=vectors)
LABEL.build_vocab(train, val)

In [21]:
print(train[2].text)
print(train[5].__dict__.keys())

['2']
dict_keys(['text', 'aspect', 'polarity'])


In [22]:
TEXT.vocab.freqs.most_common(10)
print(TEXT.vocab.vectors.shape)

[('0', 1),
 ('1', 1),
 ('2', 1),
 ('3', 1),
 ('4', 1),
 ('5', 1),
 ('6', 1),
 ('7', 1),
 ('8', 1),
 ('9', 1)]

torch.Size([3520, 300])


In [23]:
ASPECT.vocab.freqs
print(ASPECT.vocab.vectors.shape)

Counter({'out': 139,
         'of': 711,
         'the': 3154,
         'hundreds': 2,
         'italian': 34,
         'restaurants': 72,
         'in': 612,
         'queens': 5,
         'this': 447,
         'is': 1222,
         'among': 7,
         'best': 138,
         'i': 1032,
         'go': 189,
         'twice': 16,
         'a': 1282,
         'month': 7,
         'my': 264,
         'fiance': 4,
         'took': 22,
         'me': 75,
         'to': 1046,
         'scopa': 3,
         'last': 42,
         'week': 22,
         'for': 702,
         'birthday': 15,
         'and': 1984,
         'could': 68,
         'not': 577,
         'believe': 9,
         'food': 704,
         'incredible': 16,
         'at': 312,
         'very': 227,
         'agreable': 2,
         'price': 87,
         'brings': 4,
         'back': 108,
         'just': 98,
         'about': 99,
         'every': 32,
         'other': 67,
         'day': 17,
         'authentic': 38,
         'thai':

torch.Size([4214, 300])


In [24]:
batch_size=128
train_iter, val_iter = data.Iterator.splits(
            (train, val),
            sort_key=lambda x: len(x.text),
            batch_sizes=(batch_size, len(val)), # 训练集设置batch_size,验证集整个集合用于测试
    )

In [25]:
text_vocab_size = len(TEXT.vocab)
aspect_vocab_size = len(ASPECT.vocab)

In [26]:
text_vector=TEXT.vocab.vectors
aspect_vector=ASPECT.vocab.vectors

In [27]:
batch = next(iter(train_iter))
data = batch.text
print(batch.text.shape)#(seq_len,batch_size)
print(batch.polarity.shape)
print(batch.aspect.shape)
print(batch.text)
print(batch.polarity)

ValueError: invalid literal for int() with base 10: 'anecdotes miscellaneous'

# model

In [None]:
class ATAE_LSTM(nn.Module):
    def __init__(self, embedding_dim, num_hiddens, num_layers):
        super(ATAE_LSTM, self).__init__()
        self.text_embeddings = nn.Embedding(text_vocab_size, embedding_dim)
        self.aspect_embeddings = nn.Embedding(aspect_vocab_size, embedding_dim)
        self.text_embeddings = nn.Embedding.from_pretrained(text_vector,
                                                            freeze=False)
        self.aspect_embeddings = nn.Embedding.from_pretrained(aspect_vector,
                                                              freeze=False)
        self.lstm = nn.LSTM(input_size=2 * embedding_dim,
                            hidden_size=num_hiddens,
                            num_layers=num_layers,
                            batch_first=True,
                            bidirectional=True)
        self.wh = nn.Parameter(torch.Tensor(num_hiddens * 2, num_hiddens * 2))
        self.wv = nn.Parameter(torch.Tensor(embedding_dim, embedding_dim))
        self.omega = nn.Parameter(
            torch.Tensor(1, embedding_dim*2))
        self.wp = nn.Parameter(torch.Tensor(num_hiddens * 2, num_hiddens * 2))
        self.wx = nn.Parameter(torch.Tensor(num_hiddens * 2, num_hiddens * 2))
        self.ws = nn.Parameter(torch.Tensor(3, num_hiddens * 2))
        nn.init.uniform_(self.wh, -0.1, 0.1)
        nn.init.uniform_(self.wv, -0.1, 0.1)
        nn.init.uniform_(self.omega, -0.1, 0.1)
        nn.init.uniform_(self.wp, -0.1, 0.1)
        nn.init.uniform_(self.wx, -0.1, 0.1)
        nn.init.uniform_(self.ws, -0.1, 0.1)
        self.bs = nn.Parameter(torch.zeros((3, 1)))

    def forward(self, text, aspect):
        seq_len = len(text.t())
        e1 = self.text_embeddings(text)
        # e1 形状是(batch_size,seq_len, embedding_dim)
        e2 = self.aspect_embeddings(aspect).expand(e1.size())

        wv = torch.cat((e1, e2), dim=2)
        # e.g.
        # wv torch.Size([batch_size,seq_len,2*embedding_dim])

        out, (h, c) = self.lstm(wv)  # output, (h, c)
        # out形状是(batch_size,seq_len, 2 * num_hiddens)
        # h形状是(num_layers * num_directions, batch_size, 2*num_hiddens)

        H = out.permute(0, 2, 1)
        # H形状是(batch_size,2 * num_hiddens,seq_len)
        #print(H.shape)
        #print(self.wh.shape)

        Wh_H = torch.matmul(self.wh, H)
        # wh 形状是(2*num_hiddens, 2*num_hiddens)
        # wh_H 形状是(batch_size, 2*num_hiddens, seq_len)
        #print('Wh_H: ', Wh_H.shape)
        Wv_Va_eN = torch.matmul(
            self.wv,
            self.aspect_embeddings(aspect).permute(0, 2, 1).expand(
                -1, embedding_dim, seq_len))
        # Wv 形状是(seq_len, seq_len) embedding_dim=2*num_hiddens
        # Wv_Va_eN 形状是(batch_size, embedding_dim, seq_len)
        #print('Wv_Va_eN: ', Wv_Va_eN.shape)

        vh = torch.cat((Wh_H, Wv_Va_eN), dim=1)
        # vh 形状是(batch_size, 2*embedding_dim, seq_len)
        #print('vh: ', vh.shape)

        M = torch.tanh(vh)
        # M 形状是(batch_size, 2*embedding_dim, seq_len)
        #print('M: ', M.shape)

        alpha = F.softmax(torch.matmul(self.omega, M),dim=2)
        # omega 形状为(1, 2*embedding_dim))
        # alpha 形状为(batch_size, 1, seq_len)
        #print('alpha: ', alpha.shape)

        r = torch.matmul(H, alpha.permute(0, 2, 1))
        # H形状是(batch_size,2 * num_hiddens,seq_len)
        # r 形状为(batch_size,2*num_hiddens,1)
        #print('r: ', r.shape)

        h_star = torch.tanh(
            torch.matmul(self.wp, r) +
            torch.matmul(self.wx, torch.unsqueeze(H[:, :, -1], 2)))
        # h_star形状是(batch_size,2 * num_hiddens,1)
        #print('h_star: ', h_star.shape)

        y = torch.matmul(self.ws, h_star) + self.bs  #不需要手动求softmax
        # y 形状(batch_size,3,1)
        
        y = y.reshape([-1, 3])
        # y 形状(batch_size,3)
        # ws 形状(3, 2*num_hiddens)
        #print('y: ', y.shape)

        return y

In [None]:
embedding_dim, num_hiddens, num_layers = 300, 150, 1
net = ATAE_LSTM(embedding_dim, num_hiddens, num_layers)
print(net)

# train

In [None]:
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    with torch.no_grad():
        for batch_idx, batch in enumerate(data_iter):
            X1, X2, y = batch.text, batch.aspect, batch.polarity
            X1 = X1.permute(1, 0)
            X2 = X2.permute(1, 0)
            y.data.add_(1)  #下标从0开始
            if isinstance(net, torch.nn.Module):
                net.eval()  # 评估模式, 这会关闭dropout
                acc_sum += (net(X1,
                                X2).argmax(dim=1) == y).float().sum().item()
                net.train()  # 改回训练模式
            else:
                if ('is_training'
                        in net.__code__.co_varnames):  # 如果有is_training这个参数
                    # 将is_training设置成False
                    acc_sum += (net(X1, X2, is_training=False).argmax(
                        dim=1) == y).float().sum().item()
                else:
                    acc_sum += (net(
                        X1, X2).argmax(dim=1) == y).float().sum().item()
            n += y.shape[0]
    return acc_sum / n

In [None]:
def train(train_iter, test_iter, net, loss, optimizer, num_epochs):
    batch_count = 0
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        for batch_idx, batch in enumerate(train_iter):
            X1, X2, y = batch.text, batch.aspect, batch.polarity
            X1 = X1.permute(1, 0)
            X2 = X2.permute(1, 0)
            y.data.add_(1)  #下标从0开始 
            y_hat = net(X1,X2)
            l = loss(y_hat, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
            n += y.shape[0]
            batch_count += 1
        test_acc = evaluate_accuracy(test_iter, net)
        print(
            'epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
            % (epoch + 1, train_l_sum / batch_count, train_acc_sum / n,
               test_acc, time.time() - start))

In [None]:
lr, num_epochs = 0.01, 20
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
loss = nn.CrossEntropyLoss()
train(train_iter, val_iter, net, loss, optimizer, num_epochs)