In [1]:
import numpy as np
import pandas as pd

from tqdm import tqdm

import matplotlib.pyplot as plt
plt.style.use('seaborn-white')
import seaborn as sns
sns.set_style("white")
%matplotlib inline

In [2]:
train = pd.read_csv('../data/train_2.csv')
test = pd.read_csv('../data/test_public_2v3.csv')

In [3]:
subject = list(train['subject'].unique())
def get_subject(x):
    for i in range(len(subject)):
        if x==subject[i]:
            return i
    return -1
train['Y1'] = train['subject'].apply(get_subject)

In [4]:
sentiment_value = list(train['sentiment_value'].unique())
def get_sentiment_value(x):
    for i in range(len(sentiment_value)):
        if x==sentiment_value[i]:
            return i
    return -1
train['Y2'] = train['sentiment_value'].apply(get_sentiment_value)

In [5]:
#合并主题和感情，共三十类
train['Y3'] = train['Y1']*3+train['Y2']

In [6]:
print (train.shape,train['content_id'].nunique())

(12572, 8) 10654


In [7]:
gp = train[['content_id']].groupby(['content_id']).size().rename('counts').reset_index()
gp = gp.sort_values(by='counts',ascending=False)
print (gp.head())

      content_id  counts
505          875       7
3846        6330       6
8601       14078       6
4304        7033       6
5250        8614       5


In [8]:
gp[gp['counts']>1].shape

(1472, 2)

In [9]:
gp.shape

(10654, 2)

In [10]:
#合并重复数据，获取多标签数据
def get_ys(x):
    x = list(x)
    ans = np.zeros(30)
    for i in x:
        ans[i]=1
    return ans

In [11]:
gpy = train.groupby(['content_id'])['Y3'].apply(get_ys).rename('Y4').reset_index()

In [12]:
gpy.shape

(10654, 2)

In [13]:
train1 = train.groupby(['content_id','content']).size().rename('counts').reset_index()

In [14]:
train0 = pd.merge(gpy,train1[['content_id','content']],on=['content_id'],how='left')

In [15]:
train0.shape

(10654, 3)

In [16]:
train = train0.copy()

In [17]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.layers import *
from keras.layers.embeddings import Embedding
from keras.models import Model
from keras.callbacks import EarlyStopping,ModelCheckpoint,ReduceLROnPlateau,Callback
from keras import backend as K
from keras.utils.np_utils import to_categorical
from sklearn.model_selection import train_test_split
from keras.layers import Conv1D, GlobalMaxPool1D, GlobalAveragePooling1D
from keras.layers.core import Layer
import warnings
warnings.filterwarnings('ignore')

Using TensorFlow backend.


In [18]:
import jieba

In [19]:
# 使用预训练的词向量和字向量
# https://github.com/Embedding/Chinese-Word-Vectors
embeddings_index = {}
EMBEDDING_DIM = 300
embfile = '../data/sgns.baidubaike.bigram-char'
with open(embfile, encoding='utf-8') as f:
    for i, line in enumerate(f):
        values = line.split()
        words = values[:-EMBEDDING_DIM]
        word = ''.join(words)
        try:
            coefs = np.asarray(values[-EMBEDDING_DIM:], dtype='float32')
            embeddings_index[word] = coefs
        except:
            pass
print('Found %s word vectors.' % len(embeddings_index))

Found 635793 word vectors.


In [20]:
#分词
rls = ['？','！','“','”','：','…','（','）',
      '—','《','》','、','‘','’','"','\n','.',
       '；','#','【','】','\'',':','(','」','∠','+',',',
       '!','|',
      ]
def cut_words(x):
    x = str(x).strip()
    for c in rls:
        x = x.replace(c,' ')
    x = ' '.join(x.split())
    s = ' '.join(jieba.cut(x,cut_all=True))
    s = ' '.join(s.split())
    return s
       

In [21]:
#分字
def cut_chars(x):
    x = str(x).replace(' ','')
    y = [i for i in x]
    y = ' '.join(y)
    return y

In [22]:
train['chars'] = train['content'].apply(cut_chars)
test['chars'] = test['content'].apply(cut_chars)


In [23]:
train['chars_len'] = train['chars'].apply(lambda x:len(x.split()))
test['chars_len'] = test['chars'].apply(lambda x:len(x.split()))
print (train['chars_len'].describe())
print (test['chars_len'].describe())

count    10654.000000
mean        44.624273
std         31.318177
min          9.000000
25%         22.000000
50%         36.000000
75%         57.000000
max        200.000000
Name: chars_len, dtype: float64
count    2753.000000
mean       43.404286
std        30.659615
min         9.000000
25%        21.000000
50%        35.000000
75%        55.000000
max       199.000000
Name: chars_len, dtype: float64


In [24]:
train['words'] = train['content'].apply(cut_words)

Building prefix dict from the default dictionary ...
Loading model from cache C:\Users\CHANTC~1\AppData\Local\Temp\jieba.cache
Loading model cost 0.636 seconds.
Prefix dict has been built succesfully.


In [25]:
train['words'].head(10)

0    建议 议定 定做 地毯 还有 后尾 箱 的 垫 前后 后尾 箱 大概 650 700 元 主...
1                            2 5 的 综合 油耗 好像 普遍 在 10 左右
2                                   确实 该 检查 了 油耗 太 高 了
3    5 3 万公里 公里 目前 什么 都 没有 换 异 响 还好 2 0L 自 吸 8 4 油耗...
4                       SE 2 5T 的 是 EJ 不是 FB 发动 发动机 动机
5    我 试过 过晚 晚上 开 错 路 去 了 山间 小路 荒无人烟 无人 人烟 突然 导航 来 ...
6    北京 石景 石景山 景山 古城 的 那个 店 丰台 有 一个 也 这个 价格 置换 还有 4...
7                                  油耗 没 那么 高 吧 10 个 左右
8    16 款 2 5 春节 高速 一个 CRV 不服 我 以 170 180 奔跑 数分 数分钟...
9    优点 省油 全 时 四 驱 空间 缺点 车 漆 薄 高速 烧 机油 保养 贵 我 的 车 三...
Name: words, dtype: object

In [26]:
#去除低频词
word_count1 = {}
word_count2 = {}
for i in tqdm(range(len(train))):
    td = {}
    s = train.loc[i,'words'].split()
    for c in s:
        if c not in word_count1:
            word_count1[c]=1
        else:
            word_count1[c]+=1
        if c not in td:
            td[c] = 1
    for c in td:
        if c not in word_count2:
            word_count2[c]=1
        else:
            word_count2[c]+=1

100%|█████████████████████████████████████████████████████████████████████████| 10654/10654 [00:00<00:00, 43002.56it/s]


In [27]:
def remove_low_words(x):
    s = x.split()
    t = []
    for c in s:
        if c in word_count1 and c in word_count2 and word_count1[c]>1 and word_count2[c]>1:
            t.append(c)
    return ' '.join(t)

In [28]:
train['words1'] = train['words'].apply(remove_low_words)

In [29]:
train['words1'].head()

0    建议 议定 定做 地毯 还有 后尾 箱 的 垫 前后 后尾 箱 大概 650 700 元 主...
1                            2 5 的 综合 油耗 好像 普遍 在 10 左右
2                                   确实 该 检查 了 油耗 太 高 了
3    5 3 万公里 公里 目前 什么 都 没有 换 异 响 还好 2 0L 自 吸 8 4 油耗...
4                       SE 2 5T 的 是 EJ 不是 FB 发动 发动机 动机
Name: words1, dtype: object

In [30]:
train['words_len'] = train['words1'].apply(lambda x:len(x.split()))

In [31]:
print (train['words_len'].describe())

count    10654.000000
mean        26.570021
std         18.490809
min          3.000000
25%         13.000000
50%         22.000000
75%         34.000000
max        127.000000
Name: words_len, dtype: float64


In [32]:
test['words'] = test['content'].apply(cut_words)
test['words1'] = test['words'].apply(remove_low_words)
test['words_len'] = test['words1'].apply(lambda x:len(x.split()))
print (test['words_len'].describe())

count    2753.000000
mean       25.388667
std        17.658873
min         4.000000
25%        13.000000
50%        21.000000
75%        32.000000
max       121.000000
Name: words_len, dtype: float64


In [33]:
MAX_NB_WORDS = 10000
MAX_SEQUENCE_LENGTH = 128
MAX_SEQUENCE_LENGTH1 = 200

In [34]:
tokenizer = Tokenizer(num_words=MAX_NB_WORDS)
tokenizer.fit_on_texts(train['words1'])
word_index = tokenizer.word_index
print (len(word_index))
nb_words = min(MAX_NB_WORDS,len(word_index))

9905


In [35]:
train_words = tokenizer.texts_to_sequences(train['words1'])
test_words = tokenizer.texts_to_sequences(test['words1'])

In [36]:
word_embedding_matrix = np.zeros((nb_words + 1, EMBEDDING_DIM))
cc = 0
cc1 = 0
for word, i in word_index.items():
    #print (word,tokenizer.word_counts[word])
    if i > MAX_NB_WORDS:
        continue
    if word in embeddings_index:
        word_embedding_matrix[i] = embeddings_index[word]
        cc +=1
    else:
        cc1+=1
print (cc,cc1)

9263 642


In [37]:
tokenizer1 = Tokenizer(num_words=MAX_NB_WORDS)
tokenizer1.fit_on_texts(train['chars'])
word_index1 = tokenizer1.word_index
print (len(word_index1))
nb_words1 = min(MAX_NB_WORDS,len(word_index1))

2812


In [38]:
train_chars = tokenizer1.texts_to_sequences(train['chars'])
test_chars = tokenizer1.texts_to_sequences(test['chars'])

In [39]:
word_embedding_matrix1 = np.zeros((nb_words1 + 1, EMBEDDING_DIM))
cc = 0
cc1 = 0
for word, i in word_index1.items():
    if i > MAX_NB_WORDS:
        continue
    if word in embeddings_index:
        word_embedding_matrix1[i] = embeddings_index[word]
        cc +=1
    else:
        cc1+=1
print (cc,cc1)

2793 19


In [40]:
def get_pad_char_seq(x):
    return pad_sequences(x,maxlen=MAX_SEQUENCE_LENGTH1)

In [41]:
def get_pad_seq(x):
    return pad_sequences(x,maxlen=MAX_SEQUENCE_LENGTH)

In [42]:
X = pad_sequences(train_words,maxlen=MAX_SEQUENCE_LENGTH)
test_X = pad_sequences(test_words,maxlen=MAX_SEQUENCE_LENGTH)
test_X1 = get_pad_char_seq(test_chars)

In [43]:
X.shape

(10654, 128)

In [44]:
Y = np.array(list(train['Y4']))

In [45]:
Y.shape

(10654, 30)

In [46]:
#f1_score, 总出现NAN，发现是K.sum会得到实数。。。就强行输出0
def f1_score(y_true, y_pred):

    # Count positive samples.
    c1 = K.sum(K.round(K.clip(y_pred, 0, 1))*K.round(K.clip(y_true, 0, 1)))
    c2 = K.sum(K.round(K.clip(y_pred, 0, 1)))
    c3 = K.sum(K.round(K.clip(y_true, 0, 1)))

    # If there are no true samples, fix the F1 score at 0.
    if c1==0 or c3 ==0 or c2==0:
        return 0

    # How many selected items are relevant?
    precision = c1 / (c2+0.000001)

    # How many relevant items are selected?
    recall = c1 / (c3+0.000001)

    # Calculate f1_score
    f1 = 2 * (precision * recall) / (precision + recall+0.000001)
    return f1

def c1(y_true, y_pred):

    # Count positive samples.
    c1 = K.sum(K.round(K.clip(y_pred, 0, 1)*K.clip(y_true, 0, 1)))
    return c1

def c2(y_true, y_pred):

    # Count positive samples.
    c2 = K.sum(K.round(K.clip(y_pred, 0, 1)))
    return c2

def c3(y_true, y_pred):

    # Count positive samples.
    c3 = K.sum(K.round(K.clip(y_true, 0, 1)))
    return c3

In [47]:
import keras

In [48]:
#数据增强与采样
np.random.seed(1992)
class DataGenerator(keras.utils.Sequence):
    def __init__(self, data,data1,datay,
                 batch_size=256, shuffle=True,aug=0):
        self.batch_size = batch_size
        self.data = data
        self.data1 = data1
        self.datay = datay
        self.aug = aug
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.data) / self.batch_size))

    def __getitem__(self, index):
        indexes = np.array(range(index*self.batch_size,(index+1)*self.batch_size))
        indexes = indexes%len(self.data)
        indexes = self.indexes[indexes]
        X, y = self.__data_generation(indexes)
        return X, y

    def on_epoch_end(self):
        self.indexes = np.array(range(len(self.data)))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, indexes):
        X = []
        X1 = []
        y = []
        for i in range(self.batch_size):
            X.append(self.data[indexes[i]])
            X1.append(self.data1[indexes[i]])
            y.append(self.datay[indexes[i]])
            
        if self.aug>0:
            for i in range(self.aug):
                while True:
                    a = np.random.randint(self.batch_size)
                    b = np.random.randint(len(self.data))
                    a = indexes[a]
                    #b = indexes[b]
                    xx = self.data[a]+self.data[b]
                    xx1 = self.data1[a]+self.data1[b]
                    if len(xx)<MAX_SEQUENCE_LENGTH and len(xx1)<MAX_SEQUENCE_LENGTH1:
                        yy = self.datay[a]+self.datay[b]
                        yy = np.minimum(yy,1)
                        X.append(xx)
                        X1.append(xx1)
                        y.append(yy)
                        break;
        X = get_pad_seq(X)   
        X1 = get_pad_char_seq(X1) 
        y = np.array(y)
        return [X,X1],y
    
params = {'batch_size': 64,
          'shuffle': True}

#training_generator = DataGenerator(X_train,y_train, **params)

In [49]:
from sklearn.model_selection import StratifiedKFold,KFold

In [50]:
N = 5
skf = KFold(n_splits=N,shuffle=True,random_state=1337)



In [51]:
#注意力层
from keras import backend as K
from keras.layers import Layer
from keras import initializers, regularizers, constraints
 
def dot_product(x, kernel):
    """
    Wrapper for dot product operation, in order to be compatible with both
    Theano and Tensorflow
    Args:
        x (): input
        kernel (): weights
    Returns:
    """
    if K.backend() == 'tensorflow':
        return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1)
    else:
        return K.dot(x, kernel)

class AttentionWithContext(Layer):
    """
    Attention operation, with a context/query vector, for temporal data.
    Supports Masking.
    Follows the work of Yang et al. [https://www.cs.cmu.edu/~diyiy/docs/naacl16.pdf]
    "Hierarchical Attention Networks for Document Classification"
    by using a context vector to assist the attention
    # Input shape
        3D tensor with shape: `(samples, steps, features)`.
    # Output shape
        2D tensor with shape: `(samples, features)`.
    How to use:
    Just put it on top of an RNN Layer (GRU/LSTM/SimpleRNN) with return_sequences=True.
    The dimensions are inferred based on the output shape of the RNN.
    Note: The layer has been tested with Keras 2.0.6
    Example:
        model.add(LSTM(64, return_sequences=True))
        model.add(AttentionWithContext())
        # next add a Dense layer (for classification/regression) or whatever...
    """
 
    def __init__(self,
                 W_regularizer=None, u_regularizer=None, b_regularizer=None,
                 W_constraint=None, u_constraint=None, b_constraint=None,
                 bias=True, **kwargs):
 
        self.supports_masking = True
        self.init = initializers.get('glorot_uniform')
 
        self.W_regularizer = regularizers.get(W_regularizer)
        self.u_regularizer = regularizers.get(u_regularizer)
        self.b_regularizer = regularizers.get(b_regularizer)
 
        self.W_constraint = constraints.get(W_constraint)
        self.u_constraint = constraints.get(u_constraint)
        self.b_constraint = constraints.get(b_constraint)
 
        self.bias = bias
        super(AttentionWithContext, self).__init__(**kwargs)
 
    def build(self, input_shape):
        assert len(input_shape) == 3
 
        self.W = self.add_weight((input_shape[-1], input_shape[-1],),
                                 initializer=self.init,
                                 name='{}_W'.format(self.name),
                                 regularizer=self.W_regularizer,
                                 constraint=self.W_constraint)
        if self.bias:
            self.b = self.add_weight((input_shape[-1],),
                                     initializer='zero',
                                     name='{}_b'.format(self.name),
                                     regularizer=self.b_regularizer,
                                     constraint=self.b_constraint)
 
        self.u = self.add_weight((input_shape[-1],),
                                 initializer=self.init,
                                 name='{}_u'.format(self.name),
                                 regularizer=self.u_regularizer,
                                 constraint=self.u_constraint)
 
        super(AttentionWithContext, self).build(input_shape)
 
    def compute_mask(self, input, input_mask=None):
        # do not pass the mask to the next layers
        return None
 
    def call(self, x, mask=None):
        uit = dot_product(x, self.W)
 
        if self.bias:
            uit += self.b
 
        uit = K.tanh(uit)
        ait = dot_product(uit, self.u)
 
        a = K.exp(ait)
 
        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())
 
        # in some cases especially in the early stages of training the sum may be almost zero
        # and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
        # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())
 
        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1)
 
    def compute_output_shape(self, input_shape):
        return input_shape[0], input_shape[-1]

In [52]:
#胶囊网络
def squash(x, axis=-1):
    # s_squared_norm is really small
    # s_squared_norm = K.sum(K.square(x), axis, keepdims=True) + K.epsilon()
    # scale = K.sqrt(s_squared_norm)/ (0.5 + s_squared_norm)
    # return scale * x
    s_squared_norm = K.sum(K.square(x), axis, keepdims=True)
    scale = K.sqrt(s_squared_norm + K.epsilon())
    return x / scale

# A Capsule Implement with Pure Keras
class Capsule(Layer):
    def __init__(self, num_capsule, dim_capsule, routings=3, kernel_size=(9, 1), share_weights=True,
                 activation='default', **kwargs):
        super(Capsule, self).__init__(**kwargs)
        self.num_capsule = num_capsule
        self.dim_capsule = dim_capsule
        self.routings = routings
        self.kernel_size = kernel_size
        self.share_weights = share_weights
        if activation == 'default':
            self.activation = squash
        else:
            self.activation = Activation(activation)

    def build(self, input_shape):
        super(Capsule, self).build(input_shape)
        input_dim_capsule = input_shape[-1]
        if self.share_weights:
            self.W = self.add_weight(name='capsule_kernel',
                                     shape=(1, input_dim_capsule,
                                            self.num_capsule * self.dim_capsule),
                                     # shape=self.kernel_size,
                                     initializer='glorot_uniform',
                                     trainable=True)
        else:
            input_num_capsule = input_shape[-2]
            self.W = self.add_weight(name='capsule_kernel',
                                     shape=(input_num_capsule,
                                            input_dim_capsule,
                                            self.num_capsule * self.dim_capsule),
                                     initializer='glorot_uniform',
                                     trainable=True)

    def call(self, u_vecs):
        if self.share_weights:
            u_hat_vecs = K.conv1d(u_vecs, self.W)
        else:
            u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1])

        batch_size = K.shape(u_vecs)[0]
        input_num_capsule = K.shape(u_vecs)[1]
        u_hat_vecs = K.reshape(u_hat_vecs, (batch_size, input_num_capsule,
                                            self.num_capsule, self.dim_capsule))
        u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3))
        # final u_hat_vecs.shape = [None, num_capsule, input_num_capsule, dim_capsule]

        b = K.zeros_like(u_hat_vecs[:, :, :, 0])  # shape = [None, num_capsule, input_num_capsule]
        for i in range(self.routings):
            b = K.permute_dimensions(b, (0, 2, 1))  # shape = [None, input_num_capsule, num_capsule]
            c = K.softmax(b)
            c = K.permute_dimensions(c, (0, 2, 1))
            b = K.permute_dimensions(b, (0, 2, 1))
            outputs = self.activation(K.batch_dot(c, u_hat_vecs, [2, 2]))
            if i < self.routings - 1:
                b = K.batch_dot(outputs, u_hat_vecs, [2, 3])

        return outputs

    def compute_output_shape(self, input_shape):
        return (None, self.num_capsule, self.dim_capsule)


In [53]:
import os
st_model = '../data/st_model/'
if not os.path.exists(st_model):
    os.mkdir(st_model)
st_csv = '../data/st_csv/'
if not os.path.exists(st_csv):
    os.mkdir(st_csv)  

#后面是结构微调的四个模型，第一个复赛A 0.6858，第四个复赛A 0.6825，第二个微差，第三个训崩了，

In [54]:
def get_model_att0(x0,x1):
    Embedding_layer0 = Embedding(nb_words + 1, EMBEDDING_DIM, 
                                weights=[word_embedding_matrix],
                                input_length=MAX_SEQUENCE_LENGTH, 
                                name = 'lc_embed0',
                                trainable = False,
                                )
    Embedding_layer1 = Embedding(nb_words + 1, 100, 
                                input_length=MAX_SEQUENCE_LENGTH, 
                                name = 'lc_embed1',
                                trainable = True,
                                )
    Embedding_layer2 = Embedding(nb_words1 + 1, EMBEDDING_DIM, 
                                weights=[word_embedding_matrix1],
                                input_length=MAX_SEQUENCE_LENGTH1, 
                                name = 'lc_embed2',
                                trainable = False,
                                )
    Embedding_layer3 = Embedding(nb_words1 + 1, 100, 
                                input_length=MAX_SEQUENCE_LENGTH1, 
                                name = 'lc_embed3',
                                trainable = True,
                                )
    
    x = Embedding_layer0(x0)
    x01 = Embedding_layer1(x0)
    x2 = Embedding_layer2(x1)
    x3 = Embedding_layer3(x1)
    x01 = SpatialDropout1D(0.5)(x01)
    x3 = SpatialDropout1D(0.5)(x3)
    
    xa0 = Concatenate(axis=2)([x,x01])
    xb0 = Concatenate(axis=2)([x2,x3])
        
    xa = Bidirectional(GRU(256, return_sequences=True,dropout=0.25))(xa0)    
    xb = Bidirectional(GRU(256, return_sequences=True,dropout=0.25))(xb0)

    xa2 = AttentionWithContext()(xa)
    xb2 = AttentionWithContext()(xb)

    ya = xa2
    yb = xb2
    
    ya = Dropout(0.5)(ya)
    yb = Dropout(0.5)(yb)
    y = Concatenate(axis=1)([ya,yb])
    y = Dense(30, kernel_initializer='he_normal', activation='sigmoid')(y)        
    return y
    
#inputxa = Input(shape=(MAX_SEQUENCE_LENGTH,))
#inputxb = Input(shape=(MAX_SEQUENCE_LENGTH1,))
#outputya = get_model_att0(inputxa,inputxb)
#model1 = Model([inputxa,inputxb], outputya)
#model1.summary() 

def get_result_att0(timei):
    xx_cv = []
    xx_pre = []
    xx_train = []
    yy_train = []
    early_stopping = EarlyStopping(patience=3,
                                   verbose=1,
                                   monitor='val_f1_score',
                                   mode='max'
                                  )

    reduce_lr = ReduceLROnPlateau(factor=0.1, 
                                  patience=2, 
                                  min_lr=0.00001, 
                                  verbose=1,
                                  monitor='val_f1_score',
                                  mode='max',
                                 )
    foldi = -1
    for train_in,test_in in skf.split(train_words,Y):
        foldi = foldi+1
        X_train = []
        X_traina = []
        for i in train_in:
            X_train.append(train_words[i])
            X_traina.append(train.loc[i,'words1'])

        X_test = []
        for i in test_in:
            X_test.append(train_words[i])

        X_train1 = []
        for i in train_in:
            X_train1.append(train_chars[i])

        X_test1 = []
        for i in test_in:
            X_test1.append(train_chars[i])

        y_train,y_test = Y[train_in],Y[test_in]

        X_test = get_pad_seq(X_test)
        X_test1 = get_pad_char_seq(X_test1)

        params = {'batch_size': 64,
                  'aug':128+64,
                  'shuffle': True}

        training_generator = DataGenerator(X_train,X_train1,y_train, **params)

        inputxa = Input(shape=(MAX_SEQUENCE_LENGTH,))
        inputxb = Input(shape=(MAX_SEQUENCE_LENGTH1,))
        outputya = get_model_att0(inputxa,inputxb)
        model1 = Model([inputxa,inputxb], outputya)


        model1.compile(loss='binary_crossentropy', 
                      optimizer="adam",
                      metrics=[f1_score],
                     )
        filename = st_model+str(foldi)+'_'+str(timei)+'.att0'
        model_checkpoint = ModelCheckpoint(filename,
                                       save_best_only=True,
                                       verbose=1,
                                       monitor='val_f1_score',
                                       mode='max'
                                      )
            
        history =model1.fit_generator(generator=training_generator,
                        validation_data=[[X_test,X_test1],y_test],
                        epochs=100,
                        callbacks=[early_stopping, model_checkpoint, reduce_lr],
                       )    

        model1.load_weights(filename)
        ttesty = model1.predict([test_X,test_X1],batch_size = 128)
        xx_pre.append(ttesty)
        xx_cv.append(np.max(history.history['val_f1_score']))

        ttesty = model1.predict([X_test,X_test1],batch_size = 128)
        xx_train.append(ttesty)
        yy_train.append(y_test)
    
    s = 0
    for i in xx_pre:
        s = s+i
    s = s/5
    
    cols = []
    for j in range(30):
        cols.append(str(j))
    res = pd.DataFrame(s,columns=cols)
    res.to_csv(st_csv+str(foldi)+'_'+str(timei)+'_att0.csv',index=None)
    
    train_res = np.concatenate(xx_train, axis=0)
    train_yy = np.concatenate(yy_train, axis=0)
    
    train_res = pd.DataFrame(train_res,columns=cols)
    train_res.to_csv(st_csv+str(foldi)+'_'+str(timei)+'_att0_train.csv',index=None)
    
    train_yy = pd.DataFrame(train_yy,columns=cols)
    train_yy.to_csv(st_csv+str(foldi)+'_'+str(timei)+'_att0_train_yy.csv',index=None)

In [61]:
for i in range(0,5):
    get_result_att0(i)

Epoch 1/100

Epoch 00001: val_f1_score improved from -inf to 0.57235, saving model to ../data/st_model/0_3.att0
Epoch 2/100

Epoch 00002: val_f1_score improved from 0.57235 to 0.59129, saving model to ../data/st_model/0_3.att0
Epoch 3/100

Epoch 00003: val_f1_score improved from 0.59129 to 0.61354, saving model to ../data/st_model/0_3.att0
Epoch 4/100

Epoch 00004: val_f1_score improved from 0.61354 to 0.62038, saving model to ../data/st_model/0_3.att0
Epoch 5/100

Epoch 00005: val_f1_score did not improve from 0.62038
Epoch 6/100

Epoch 00006: val_f1_score did not improve from 0.62038

Epoch 00006: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 7/100

Epoch 00007: val_f1_score did not improve from 0.62038
Epoch 00007: early stopping
Epoch 1/100

Epoch 00001: val_f1_score improved from -inf to 0.54730, saving model to ../data/st_model/1_3.att0
Epoch 2/100

Epoch 00002: val_f1_score improved from 0.54730 to 0.57627, saving model to ../data/st_model/1_3.att0
Ep

In [54]:
def get_model_att1(x0,x1):
    Embedding_layer0 = Embedding(nb_words + 1, EMBEDDING_DIM, 
                                weights=[word_embedding_matrix],
                                input_length=MAX_SEQUENCE_LENGTH, 
                                name = 'lc_embed0',
                                trainable = False,
                                )
    Embedding_layer1 = Embedding(nb_words + 1, 100, 
                                input_length=MAX_SEQUENCE_LENGTH, 
                                name = 'lc_embed1',
                                trainable = True,
                                )
    Embedding_layer2 = Embedding(nb_words1 + 1, EMBEDDING_DIM, 
                                weights=[word_embedding_matrix1],
                                input_length=MAX_SEQUENCE_LENGTH1, 
                                name = 'lc_embed2',
                                trainable = False,
                                )
    Embedding_layer3 = Embedding(nb_words1 + 1, 100, 
                                input_length=MAX_SEQUENCE_LENGTH1, 
                                name = 'lc_embed3',
                                trainable = True,
                                )
    
    x = Embedding_layer0(x0)
    x01 = Embedding_layer1(x0)
    x2 = Embedding_layer2(x1)
    x3 = Embedding_layer3(x1)
    x01 = SpatialDropout1D(0.5)(x01)
    x3 = SpatialDropout1D(0.5)(x3)
    
    xa0 = Concatenate(axis=2)([x,x01])
    xb0 = Concatenate(axis=2)([x2,x3])
        
    xa = Bidirectional(LSTM(128, return_sequences=True,dropout=0.5,recurrent_dropout=0.5))(xa0) 
    xa = Bidirectional(LSTM(64, return_sequences=True,dropout=0.5,recurrent_dropout=0.5))(xa) 
    
    xb = Bidirectional(LSTM(128, return_sequences=True,dropout=0.5,recurrent_dropout=0.5))(xb0)
    xb = Bidirectional(LSTM(64, return_sequences=True,dropout=0.5,recurrent_dropout=0.5))(xb) 
    xa2 = AttentionWithContext()(xa)
    xb2 = AttentionWithContext()(xb)

    ya = xa2
    yb = xb2
    
    ya = Dropout(0.5)(ya)
    yb = Dropout(0.5)(yb)
    y = Concatenate(axis=1)([ya,yb])
    y = Dense(30, kernel_initializer='he_normal', activation='sigmoid')(y)        
    return y
    
#inputxa = Input(shape=(MAX_SEQUENCE_LENGTH,))
#inputxb = Input(shape=(MAX_SEQUENCE_LENGTH1,))
#outputya = get_model_att0(inputxa,inputxb)
#model1 = Model([inputxa,inputxb], outputya)
#model1.summary() 

def get_result_att1(timei):
    xx_cv = []
    xx_pre = []
    xx_train = []
    yy_train = []
    early_stopping = EarlyStopping(patience=3,
                                   verbose=1,
                                   monitor='val_f1_score',
                                   mode='max'
                                  )

    reduce_lr = ReduceLROnPlateau(factor=0.1, 
                                  patience=2, 
                                  min_lr=0.00001, 
                                  verbose=1,
                                  monitor='val_f1_score',
                                  mode='max',
                                 )
    foldi = -1
    for train_in,test_in in skf.split(train_words,Y):
        foldi = foldi+1
        X_train = []
        X_traina = []
        for i in train_in:
            X_train.append(train_words[i])
            X_traina.append(train.loc[i,'words1'])

        X_test = []
        for i in test_in:
            X_test.append(train_words[i])

        X_train1 = []
        for i in train_in:
            X_train1.append(train_chars[i])

        X_test1 = []
        for i in test_in:
            X_test1.append(train_chars[i])

        y_train,y_test = Y[train_in],Y[test_in]

        X_test = get_pad_seq(X_test)
        X_test1 = get_pad_char_seq(X_test1)

        params = {'batch_size': 128,
                  'aug':128,
                  'shuffle': True}

        training_generator = DataGenerator(X_train,X_train1,y_train, **params)

        inputxa = Input(shape=(MAX_SEQUENCE_LENGTH,))
        inputxb = Input(shape=(MAX_SEQUENCE_LENGTH1,))
        outputya = get_model_att1(inputxa,inputxb)
        model1 = Model([inputxa,inputxb], outputya)


        model1.compile(loss='binary_crossentropy', 
                      optimizer="adam",
                      metrics=[f1_score],
                     )
        filename = st_model+str(foldi)+'_'+str(timei)+'.att1'
        model_checkpoint = ModelCheckpoint(filename,
                                       save_best_only=True,
                                       verbose=1,
                                       monitor='val_f1_score',
                                       mode='max'
                                      )
            
        history =model1.fit_generator(generator=training_generator,
                        validation_data=[[X_test,X_test1],y_test],
                        epochs=100,
                        callbacks=[early_stopping, model_checkpoint, reduce_lr],
                       )    

        model1.load_weights(filename)
        ttesty = model1.predict([test_X,test_X1],batch_size = 128)
        xx_pre.append(ttesty)
        xx_cv.append(np.max(history.history['val_f1_score']))

        ttesty = model1.predict([X_test,X_test1],batch_size = 128)
        xx_train.append(ttesty)
        yy_train.append(y_test)
    
    s = 0
    for i in xx_pre:
        s = s+i
    s = s/5
    
    cols = []
    for j in range(30):
        cols.append(str(j))
    res = pd.DataFrame(s,columns=cols)
    res.to_csv(st_csv+str(foldi)+'_'+str(timei)+'_att1.csv',index=None)
    
    train_res = np.concatenate(xx_train, axis=0)
    train_yy = np.concatenate(yy_train, axis=0)
    
    train_res = pd.DataFrame(train_res,columns=cols)
    train_res.to_csv(st_csv+str(foldi)+'_'+str(timei)+'_att1_train.csv',index=None)

In [55]:
for i in range(0,5):
    get_result_att1(i)

Epoch 1/100

Epoch 00001: val_f1_score improved from -inf to 0.00000, saving model to ../data/st_model/0_4.att1
Epoch 2/100

Epoch 00002: val_f1_score improved from 0.00000 to 0.01106, saving model to ../data/st_model/0_4.att1
Epoch 3/100

Epoch 00003: val_f1_score improved from 0.01106 to 0.27308, saving model to ../data/st_model/0_4.att1
Epoch 4/100

Epoch 00004: val_f1_score improved from 0.27308 to 0.36915, saving model to ../data/st_model/0_4.att1
Epoch 5/100

Epoch 00005: val_f1_score improved from 0.36915 to 0.39085, saving model to ../data/st_model/0_4.att1
Epoch 6/100

Epoch 00006: val_f1_score improved from 0.39085 to 0.39239, saving model to ../data/st_model/0_4.att1
Epoch 7/100

Epoch 00007: val_f1_score improved from 0.39239 to 0.46331, saving model to ../data/st_model/0_4.att1
Epoch 8/100

Epoch 00008: val_f1_score improved from 0.46331 to 0.50314, saving model to ../data/st_model/0_4.att1
Epoch 9/100

Epoch 00009: val_f1_score improved from 0.50314 to 0.56175, saving mod

In [57]:
def get_model_att2(x0,x1):
    Embedding_layer0 = Embedding(nb_words + 1, EMBEDDING_DIM, 
                                weights=[word_embedding_matrix],
                                input_length=MAX_SEQUENCE_LENGTH, 
                                name = 'lc_embed0',
                                trainable = False,
                                )
    Embedding_layer1 = Embedding(nb_words + 1, 100, 
                                input_length=MAX_SEQUENCE_LENGTH, 
                                name = 'lc_embed1',
                                trainable = True,
                                )
    Embedding_layer2 = Embedding(nb_words1 + 1, EMBEDDING_DIM, 
                                weights=[word_embedding_matrix1],
                                input_length=MAX_SEQUENCE_LENGTH1, 
                                name = 'lc_embed2',
                                trainable = False,
                                )
    Embedding_layer3 = Embedding(nb_words1 + 1, 100, 
                                input_length=MAX_SEQUENCE_LENGTH1, 
                                name = 'lc_embed3',
                                trainable = True,
                                )
    
    x = Embedding_layer0(x0)
    x01 = Embedding_layer1(x0)
    x2 = Embedding_layer2(x1)
    x3 = Embedding_layer3(x1)
    x01 = SpatialDropout1D(0.5)(x01)
    x3 = SpatialDropout1D(0.5)(x3)
    
    xa0 = Concatenate(axis=2)([x,x01])
    xb0 = Concatenate(axis=2)([x2,x3])
        
    xa = Bidirectional(GRU(256, return_sequences=True,dropout=0.25))(xa0)    
    xb = Bidirectional(GRU(256, return_sequences=True,dropout=0.25))(xb0)

    xa2 = AttentionWithContext()(xa)
    xb2 = AttentionWithContext()(xb)

    ya = xa2
    yb = xb2
    
    ya = Dropout(0.5)(ya)
    yb = Dropout(0.5)(yb)
    
    ya = Dense(30, kernel_initializer='he_normal', activation='sigmoid')(ya)   
    yb = Dense(30, kernel_initializer='he_normal', activation='sigmoid')(yb)   
    y = Lambda(lambda x:(x[0]+x[1])/2)([ya,yb])
    return y
    
inputxa = Input(shape=(MAX_SEQUENCE_LENGTH,))
inputxb = Input(shape=(MAX_SEQUENCE_LENGTH1,))
outputya = get_model_att2(inputxa,inputxb)
model1 = Model([inputxa,inputxb], outputya)
model1.summary() 

def get_result_att2(timei):
    xx_cv = []
    xx_pre = []
    xx_train = []
    yy_train = []
    early_stopping = EarlyStopping(patience=3,
                                   verbose=1,
                                   monitor='val_f1_score',
                                   mode='max'
                                  )

    reduce_lr = ReduceLROnPlateau(factor=0.1, 
                                  patience=2, 
                                  min_lr=0.00001, 
                                  verbose=1,
                                  monitor='val_f1_score',
                                  mode='max',
                                 )
    foldi = -1
    for train_in,test_in in skf.split(train_words,Y):
        foldi = foldi+1
        X_train = []
        X_traina = []
        for i in train_in:
            X_train.append(train_words[i])
            X_traina.append(train.loc[i,'words1'])

        X_test = []
        for i in test_in:
            X_test.append(train_words[i])

        X_train1 = []
        for i in train_in:
            X_train1.append(train_chars[i])

        X_test1 = []
        for i in test_in:
            X_test1.append(train_chars[i])

        y_train,y_test = Y[train_in],Y[test_in]

        X_test = get_pad_seq(X_test)
        X_test1 = get_pad_char_seq(X_test1)

        params = {'batch_size': 64,
                  'aug':128+64,
                  'shuffle': True}

        training_generator = DataGenerator(X_train,X_train1,y_train, **params)

        inputxa = Input(shape=(MAX_SEQUENCE_LENGTH,))
        inputxb = Input(shape=(MAX_SEQUENCE_LENGTH1,))
        outputya = get_model_att2(inputxa,inputxb)
        model1 = Model([inputxa,inputxb], outputya)


        model1.compile(loss='binary_crossentropy', 
                      optimizer="nadam",
                      metrics=[f1_score],
                     )
        filename = st_model+str(foldi)+'_'+str(timei)+'.att2'
        model_checkpoint = ModelCheckpoint(filename,
                                       save_best_only=True,
                                       verbose=1,
                                       monitor='val_f1_score',
                                       mode='max'
                                      )
            
        history =model1.fit_generator(generator=training_generator,
                        validation_data=[[X_test,X_test1],y_test],
                        epochs=100,
                        callbacks=[early_stopping, model_checkpoint, reduce_lr],
                       )    

        model1.load_weights(filename)
        ttesty = model1.predict([test_X,test_X1],batch_size = 128)
        xx_pre.append(ttesty)
        xx_cv.append(np.max(history.history['val_f1_score']))

        ttesty = model1.predict([X_test,X_test1],batch_size = 128)
        xx_train.append(ttesty)
        yy_train.append(y_test)
    
    s = 0
    for i in xx_pre:
        s = s+i
    s = s/5
    
    cols = []
    for j in range(30):
        cols.append(str(j))
    res = pd.DataFrame(s,columns=cols)
    res.to_csv(st_csv+str(foldi)+'_'+str(timei)+'_att2.csv',index=None)
    
    train_res = np.concatenate(xx_train, axis=0)
    train_yy = np.concatenate(yy_train, axis=0)
    
    train_res = pd.DataFrame(train_res,columns=cols)
    train_res.to_csv(st_csv+str(foldi)+'_'+str(timei)+'_att2_train.csv',index=None)
    
    #train_yy = pd.DataFrame(train_yy,columns=cols)
    #train_yy.to_csv(st_csv+str(foldi)+'_'+str(timei)+'_att2_train_yy.csv',index=None)

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_5 (InputLayer)            (None, 128)          0                                            
__________________________________________________________________________________________________
input_6 (InputLayer)            (None, 200)          0                                            
__________________________________________________________________________________________________
lc_embed1 (Embedding)           (None, 128, 100)     990600      input_5[0][0]                    
__________________________________________________________________________________________________
lc_embed3 (Embedding)           (None, 200, 100)     281300      input_6[0][0]                    
__________________________________________________________________________________________________
lc_embed0 

In [58]:
for i in range(0,5):
    get_result_att2(i)

Epoch 1/100

Epoch 00001: val_f1_score improved from -inf to 0.07486, saving model to ../data/st_model/0_0.att2
Epoch 2/100

Epoch 00002: val_f1_score did not improve from 0.07486
Epoch 3/100

Epoch 00003: val_f1_score did not improve from 0.07486

Epoch 00003: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 4/100

Epoch 00004: val_f1_score did not improve from 0.07486
Epoch 00004: early stopping
Epoch 1/100

Epoch 00001: val_f1_score improved from -inf to 0.53959, saving model to ../data/st_model/1_0.att2
Epoch 2/100

Epoch 00002: val_f1_score did not improve from 0.53959
Epoch 3/100

Epoch 00003: val_f1_score did not improve from 0.53959

Epoch 00003: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 4/100

Epoch 00004: val_f1_score did not improve from 0.53959
Epoch 00004: early stopping
Epoch 1/100

Epoch 00001: val_f1_score improved from -inf to 0.58139, saving model to ../data/st_model/2_0.att2
Epoch 2/100

Epoch 00002: val_f1_sco

In [57]:
def get_model_cap0(x0,x1):
    Embedding_layer0 = Embedding(nb_words + 1, EMBEDDING_DIM, 
                                weights=[word_embedding_matrix],
                                input_length=MAX_SEQUENCE_LENGTH, 
                                name = 'lc_embed0',
                                trainable = False,
                                )
    Embedding_layer1 = Embedding(nb_words + 1, 100, 
                                input_length=MAX_SEQUENCE_LENGTH, 
                                name = 'lc_embed1',
                                trainable = True,
                                )
    Embedding_layer2 = Embedding(nb_words1 + 1, EMBEDDING_DIM, 
                                weights=[word_embedding_matrix1],
                                input_length=MAX_SEQUENCE_LENGTH1, 
                                name = 'lc_embed2',
                                trainable = False,
                                )
    Embedding_layer3 = Embedding(nb_words1 + 1, 100, 
                                input_length=MAX_SEQUENCE_LENGTH1, 
                                name = 'lc_embed3',
                                trainable = True,
                                )
    
    x = Embedding_layer0(x0)
    x01 = Embedding_layer1(x0)
    x2 = Embedding_layer2(x1)
    x3 = Embedding_layer3(x1)
    x01 = SpatialDropout1D(0.5)(x01)
    x3 = SpatialDropout1D(0.5)(x3)
    
    xa0 = Concatenate(axis=2)([x,x01])
    xb0 = Concatenate(axis=2)([x2,x3])
        
    xa = Bidirectional(GRU(128, return_sequences=True,dropout=0.5))(xa0)    
    xb = Bidirectional(GRU(128, return_sequences=True,dropout=0.5))(xb0)

    xa1 = Capsule(num_capsule=10, dim_capsule=32, routings=5,share_weights=True)(xa)
    ya = Flatten()(xa1)
    
    xb1 = Capsule(num_capsule=10, dim_capsule=32, routings=5,share_weights=True)(xb)
    yb = Flatten()(xb1)
    
    ya = Dropout(0.5)(ya)
    yb = Dropout(0.5)(yb)
    y = Concatenate(axis=1)([ya,yb])
    y = Dense(30, kernel_initializer='he_normal', activation='sigmoid')(y)        
    return y


def get_result_cap0(timei):
    xx_cv = []
    xx_pre = []
    xx_train = []
    yy_train = []
    early_stopping = EarlyStopping(patience=3,
                                   verbose=1,
                                   monitor='val_f1_score',
                                   mode='max'
                                  )

    reduce_lr = ReduceLROnPlateau(factor=0.1, 
                                  patience=2, 
                                  min_lr=0.00001, 
                                  verbose=1,
                                  monitor='val_f1_score',
                                  mode='max',
                                 )
    foldi = -1
    for train_in,test_in in skf.split(train_words,Y):
        foldi = foldi+1
        X_train = []
        X_traina = []
        for i in train_in:
            X_train.append(train_words[i])
            X_traina.append(train.loc[i,'words1'])

        X_test = []
        for i in test_in:
            X_test.append(train_words[i])

        X_train1 = []
        for i in train_in:
            X_train1.append(train_chars[i])

        X_test1 = []
        for i in test_in:
            X_test1.append(train_chars[i])

        y_train,y_test = Y[train_in],Y[test_in]

        X_test = get_pad_seq(X_test)
        X_test1 = get_pad_char_seq(X_test1)

        params = {'batch_size': 128,
                  'aug':128,
                  'shuffle': True}

        training_generator = DataGenerator(X_train,X_train1,y_train, **params)

        inputxa = Input(shape=(MAX_SEQUENCE_LENGTH,))
        inputxb = Input(shape=(MAX_SEQUENCE_LENGTH1,))
        outputya = get_model_cap0(inputxa,inputxb)
        model1 = Model([inputxa,inputxb], outputya)


        model1.compile(loss='binary_crossentropy', 
                      optimizer="nadam",
                      metrics=[f1_score],
                     )
        filename = st_model+str(foldi)+'_'+str(timei)+'.cap0'
        model_checkpoint = ModelCheckpoint(filename,
                                       save_best_only=True,
                                       verbose=1,
                                       monitor='val_f1_score',
                                       mode='max'
                                      )
            
        history =model1.fit_generator(generator=training_generator,
                        validation_data=[[X_test,X_test1],y_test],
                        epochs=100,
                        callbacks=[early_stopping, model_checkpoint, reduce_lr],
                       )    

        model1.load_weights(filename)
        ttesty = model1.predict([test_X,test_X1],batch_size = 128)
        xx_pre.append(ttesty)
        xx_cv.append(np.max(history.history['val_f1_score']))

        ttesty = model1.predict([X_test,X_test1],batch_size = 128)
        xx_train.append(ttesty)
        yy_train.append(y_test)
    
    s = 0
    for i in xx_pre:
        s = s+i
    s = s/5
    
    cols = []
    for j in range(30):
        cols.append(str(j))
    res = pd.DataFrame(s,columns=cols)
    res.to_csv(st_csv+str(foldi)+'_'+str(timei)+'_cap0.csv',index=None)
    
    train_res = np.concatenate(xx_train, axis=0)
    train_yy = np.concatenate(yy_train, axis=0)
    
    train_res = pd.DataFrame(train_res,columns=cols)
    train_res.to_csv(st_csv+str(foldi)+'_'+str(timei)+'_cap0_train.csv',index=None)
    
    #train_yy = pd.DataFrame(train_yy,columns=cols)
    #train_yy.to_csv(st_csv+str(foldi)+'_'+str(timei)+'_cap0_train_yy.csv',index=None)

In [59]:
for i in range(0,5):
    get_result_cap0(i)

Epoch 1/100

Epoch 00001: val_f1_score improved from -inf to 0.01509, saving model to ../data/st_model/0_0.cap0
Epoch 2/100

Epoch 00002: val_f1_score improved from 0.01509 to 0.51536, saving model to ../data/st_model/0_0.cap0
Epoch 3/100

Epoch 00003: val_f1_score improved from 0.51536 to 0.56590, saving model to ../data/st_model/0_0.cap0
Epoch 4/100

Epoch 00004: val_f1_score improved from 0.56590 to 0.59298, saving model to ../data/st_model/0_0.cap0
Epoch 5/100

Epoch 00005: val_f1_score improved from 0.59298 to 0.60363, saving model to ../data/st_model/0_0.cap0
Epoch 6/100

Epoch 00006: val_f1_score improved from 0.60363 to 0.60767, saving model to ../data/st_model/0_0.cap0
Epoch 7/100

Epoch 00007: val_f1_score improved from 0.60767 to 0.61975, saving model to ../data/st_model/0_0.cap0
Epoch 8/100

Epoch 00008: val_f1_score did not improve from 0.61975
Epoch 9/100

Epoch 00009: val_f1_score did not improve from 0.61975

Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.00

In [54]:
#加权平均
s = 0
for i in range(5):
    fn = st_csv+'4_'+str(i)+'_att0.csv'
    a = pd.read_csv(fn)
    s = s+np.array(a.values)*0.85
    
    fn = st_csv+'4_'+str(i)+'_att1.csv'
    a = pd.read_csv(fn)
    s = s+np.array(a.values)*0.05
    
    fn = st_csv+'4_'+str(i)+'_cap0.csv'
    a = pd.read_csv(fn)
    s = s+np.array(a.values)*0.1
    
testy = s/5


In [59]:
#瞎写的stacking，比平均要差一点点
new_test = np.zeros((test_X.shape[0],30*15))
new_train = np.zeros((len(train),30*15))
new_trainy = np.zeros((len(train),30))

for i in range(5):
    fn = st_csv+'4_'+str(i)+'_att0.csv'
    testf = pd.read_csv(fn)
    new_test[:,30*i:30*(i+1)] = testf.values
    fn = st_csv+'4_'+str(i)+'_att0_train.csv'
    trainf = pd.read_csv(fn)
    new_train[:,30*i:30*(i+1)] = trainf.values
    
for i in range(5):
    fn = st_csv+'4_'+str(i)+'_att1.csv'
    a = pd.read_csv(fn)
    new_test[:,30*5+30*i:30*5+30*(i+1)] = a.values
    
    fn = st_csv+'4_'+str(i)+'_att1_train.csv'
    a = pd.read_csv(fn)
    new_train[:,30*5+30*i:30*5+30*(i+1)] = a.values
    
    fn = st_csv+'4_'+str(i)+'_cap0.csv'
    a = pd.read_csv(fn)
    new_test[:,30*10+30*i:30*10+30*(i+1)] = a.values
    
    fn = st_csv+'4_'+str(i)+'_cap0_train.csv'
    a = pd.read_csv(fn)
    new_train[:,30*10+30*i:30*10+30*(i+1)] = a.values
    

fn = st_csv+'4_0_att0_train_yy.csv'
trainfyy = pd.read_csv(fn)
new_trainy = trainfyy.values


In [60]:
st_stack = '../data/st_stack/'
if not os.path.exists(st_stack):
    os.mkdir(st_stack)

In [61]:
def stackingmodel(x):
    x = Dense(1024, kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    x = PReLU()(x)
    x = Dropout(0.5)(x)
    for i in range(4):
        x = Dense(1024, kernel_initializer='he_normal')(x)
        x = BatchNormalization()(x)
        x = PReLU()(x)
        x = Dropout(0.5)(x)
    y = Dense(30, kernel_initializer='he_normal', activation='sigmoid')(x)      
    return y
inputx = Input(shape=(new_train.shape[1],))
outputy = stackingmodel(inputx)
stackmodel = Model(inputx, outputy)
stackmodel.summary() 

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_57 (InputLayer)        (None, 450)               0         
_________________________________________________________________
dense_57 (Dense)             (None, 1024)              461824    
_________________________________________________________________
batch_normalization_1 (Batch (None, 1024)              4096      
_________________________________________________________________
p_re_lu_1 (PReLU)            (None, 1024)              1024      
_________________________________________________________________
dropout_57 (Dropout)         (None, 1024)              0         
_________________________________________________________________
dense_58 (Dense)             (None, 1024)              1049600   
_________________________________________________________________
batch_normalization_2 (Batch (None, 1024)              4096      
__________

In [62]:
N = 5
skf1 = KFold(n_splits=N,shuffle=True,random_state=None)
xx_cv = []
xx_pre = []

early_stopping = EarlyStopping(patience=10,
                               verbose=1,
                               monitor='val_f1_score',
                               mode='max'
                              )

reduce_lr = ReduceLROnPlateau(factor=0.1, 
                              patience=5, 
                              min_lr=0.00001, 
                              verbose=1,
                              monitor='val_f1_score',
                              mode='max',
                             )
epochs = 100
batch_size = 128
foldi = -1
for train_in,test_in in skf1.split(new_train,new_trainy):
    foldi+=1
    traina = new_train[train_in]
    vala = new_train[test_in]
    trainay = new_trainy[train_in]
    valay = new_trainy[test_in]
    
    inputx = Input(shape=(new_train.shape[1],))
    outputy = stackingmodel(inputx)
    stackmodel = Model(inputx, outputy)
    
    stackmodel.compile(loss='binary_crossentropy', 
                  optimizer="adam",
                  metrics=[f1_score],
                 )
    filename = st_stack+str(foldi)+'.stacking'
    model_checkpoint = ModelCheckpoint(filename,
                                   save_best_only=True,
                                   verbose=1,
                                   monitor='val_f1_score',
                                   mode='max'
                                  )


    
    history = stackmodel.fit(traina,trainay,
                        validation_data=[vala,valay],
                        epochs=epochs,
                        batch_size=batch_size,
                        callbacks=[early_stopping, model_checkpoint, reduce_lr])
        
    stackmodel.load_weights(filename)
    ttesty = stackmodel.predict(new_test,batch_size = 128)
    xx_pre.append(ttesty)
    xx_cv.append(np.max(history.history['val_f1_score']))
    

Train on 8523 samples, validate on 2131 samples
Epoch 1/100

Epoch 00001: val_f1_score improved from -inf to 0.56802, saving model to ../data/st_stack/0.stacking
Epoch 2/100

Epoch 00002: val_f1_score improved from 0.56802 to 0.60164, saving model to ../data/st_stack/0.stacking
Epoch 3/100

Epoch 00003: val_f1_score improved from 0.60164 to 0.60347, saving model to ../data/st_stack/0.stacking
Epoch 4/100

Epoch 00004: val_f1_score improved from 0.60347 to 0.61974, saving model to ../data/st_stack/0.stacking
Epoch 5/100

Epoch 00005: val_f1_score did not improve from 0.61974
Epoch 6/100

Epoch 00006: val_f1_score improved from 0.61974 to 0.62203, saving model to ../data/st_stack/0.stacking
Epoch 7/100

Epoch 00007: val_f1_score improved from 0.62203 to 0.62972, saving model to ../data/st_stack/0.stacking
Epoch 8/100

Epoch 00008: val_f1_score did not improve from 0.62972
Epoch 9/100

Epoch 00009: val_f1_score did not improve from 0.62972
Epoch 10/100

Epoch 00010: val_f1_score did not i

In [63]:
print (np.mean(xx_cv))

0.6388790937156792


In [82]:
print (np.mean(xx_cv))

0.6412328714541581


In [64]:
s = 0
for i in xx_pre:
    s = s+i
testy = s/5

In [59]:
#调阈值，复赛提交3030左右
th = 0.53
yy = testy.copy()
ans = []
for i in range(len(testy)):
    mm = np.max(yy[i])
    if mm<=th:
        x = np.argmax(yy[i])
        d = {}
        d['content_id'] = test.loc[i,'content_id']
        d['subject'] = subject[x//3]
        d['sentiment_value'] = sentiment_value[x%3]
        ans.append(d)
    else:
        for j in range(yy.shape[1]):
            if yy[i,j]>th:
                x = j
                d = {}
                d['content_id'] = test.loc[i,'content_id']
                d['subject'] = subject[x//3]
                d['sentiment_value'] = sentiment_value[x%3]
                ans.append(d)
print (len(ans))

3049


In [60]:
ans = pd.DataFrame(ans)
ans['sentiment_word'] = None
print (ans.shape)


(3049, 4)


In [61]:
ans[['content_id','subject','sentiment_value','sentiment_word']].to_csv('../submit/1111_3049_75.csv',index=None)