In [1]:
import tensorflow as tf
import os
import numpy as np
import tensorflow_datasets as tfds

### 读取数据

In [9]:
train_path = './Chinese_conversation_sentiment/sentiment_XS_30k.txt'
test_path = './Chinese_conversation_sentiment/sentiment_XS_test.txt'

In [3]:
def load_data(path):
    with open(path, 'r', encoding='utf-8')as f:
        data = f.readlines()
    return data

In [4]:
def split_label(data: str):
    label_list = []
    data_list = []
    for line in data:
        line = line.rstrip()
        data_split = line.split(',')
        if data_split[0] == 'positive':
            label_list.append(1)
            data_list.append(data_split[1])
        elif data_split[0] == 'negative':
            label_list.append(0)
            data_list.append(data_split[1])
    return label_list, data_list

In [5]:
def split_word(data):
    corpus = []
    for line in data:
        line_corpus = [char for char in line if char != ' ']
        corpus.append(line_corpus)
    return corpus

In [6]:
def transform_corpus(data):
    corpus = []
    for line in data:
        line_start = ''
        for char in line:
            line_start += char
            line_start += ' '
        line_start = line_start.rstrip(' ')
        corpus.append(line_start)
    return corpus

In [7]:
def get_train_test(train_path, test_path):
    
    train_data = load_data(train_path)
    test_data = load_data(test_path)
    
    train_label, train_data = split_label(train_data)
    test_label, test_data = split_label(test_data)
    
    train_corpus = split_word(train_data)
    test_corpus = split_word(test_data)
    
    train_corpus = transform_corpus(train_corpus)
    test_corpus = transform_corpus(test_corpus)
    
    return train_corpus, test_corpus, train_label, test_label

In [10]:
train_corpus, test_corpus, train_label, test_label = get_train_test(train_path, test_path)

In [11]:
tokenizer = tfds.deprecated.text.SubwordTextEncoder.build_from_corpus(
      train_corpus + test_corpus, target_vocab_size=2**13)

In [12]:
def data_token(tokenizer, train_corpus):
    train_token = []
    for train_line in train_corpus:
        train_sentence = [tokenizer.vocab_size + 1] + tokenizer.encode(train_line) + [tokenizer.vocab_size + 2]
        
        train_token.append(train_sentence)
    train_token = tf.keras.preprocessing.sequence.pad_sequences(
            train_token, maxlen=25, padding='post')
    return train_token

In [134]:
train_token = data_token(tokenizer, train_corpus)
test_token = data_token(tokenizer, test_corpus)

In [139]:
dataset = tf.data.Dataset.from_tensor_slices((train_token, train_label))

In [140]:
dataset = dataset.batch(64)

In [141]:
dataset = dataset.shuffle(len(train_corpus))

### 建模

In [144]:
vocabulary_size = tokenizer.vocab_size + 2

In [158]:
class BiLSTMSentiment(tf.keras.Model):
    
    def __init__(self, vocabulary_size):
        super(BiLSTMSentiment, self).__init__()
        self.vocabulary_size = vocabulary_size 
        self.embedding = tf.keras.layers.Embedding(input_dim = self.vocabulary_size, 
                                                   output_dim=256)
        self.bilstm = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True), 
                                                    input_shape=(25, 256), merge_mode='concat')
        self.drop_out = tf.keras.layers.Dropout(0.5)
        self.flatten = tf.keras.layers.Flatten()
        self.dense = tf.keras.layers.Dense(128, activation='relu')
        self.dense_out = tf.keras.layers.Dense(2)
        self.sort_out = tf.keras.layers.Softmax()
    
    def call(inputs):
        out1 = self.embedding(inputs)
        out2 = self.bilstm(out1)
        out3 = self.drop_out(out2)
        out4 = self.flatten(out3)
        out5 = self.dense(out4)
        out6 = self.dense_out(out5)
        out7 = self.sort_out(out6)
        return out7

In [159]:
model = BiLSTMSentiment(vocabulary_size)

In [163]:
model.build((None, 40, 25))

ValueError: You can only call `build` on a model if its `call` method accepts an `inputs` argument.

In [160]:
model.summary()

ValueError: This model has not yet been built. Build the model first by calling `build()` or calling `fit()` with some data, or specify an `input_shape` argument in the first layer(s) for automatic build.

In [None]:
class MultiHeadAttention(layers.Layer):

    def __init__(self, hparams, name="multi_head_attention"):
        super(MultiHeadAttention, self).__init__(name=name)
        self.num_heads = hparams.num_heads
        self.d_model = hparams.d_model

        assert self.d_model % self.num_heads == 0

        self.depth = self.d_model // self.num_heads

        self.query_dense = layers.Dense(self.d_model)
        self.key_dense = layers.Dense(self.d_model)
        self.value_dense = layers.Dense(self.d_model)

        self.dense = layers.Dense(self.d_model)

    def get_config(self):
        config = super(MultiHeadAttention, self).get_config()
        config.update({'num_heads': self.num_heads, 'd_model': self.d_model})
        return config

    def split_heads(self, inputs, batch_size):
        inputs = tf.reshape(
            inputs, shape=(batch_size, -1, self.num_heads, self.depth))
        return tf.transpose(inputs, perm=[0, 2, 1, 3])

    def call(self, inputs, **kwargs):
        query, key, value, mask = inputs['query'], inputs['key'], inputs[
            'value'], inputs['mask']
        batch_size = tf.shape(query)[0]

        # linear layers
        query = self.query_dense(query)
        key = self.key_dense(key)
        value = self.value_dense(value)

        # split heads
        query = self.split_heads(query, batch_size)
        key = self.split_heads(key, batch_size)
        value = self.split_heads(value, batch_size)

        # scaled dot-product attention
        scaled_attention = scaled_dot_product_attention(query, key, value, mask)
        scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3])

        # concatenation of heads
        concat_attention = tf.reshape(scaled_attention,
                                      (batch_size, -1, self.d_model))

        # final linear layer
        outputs = self.dense(concat_attention)

        return outputs