In [1]:
import numpy as np
import pandas as pd
from keras.layers import Dense, Input, Flatten
from keras.layers import GlobalMaxPool1D, Bidirectional, Convolution1D, Embedding, BatchNormalization,MaxPooling1D, Dropout, LSTM
from keras import backend as K
from keras.engine.topology import Layer
from keras import initializers, regularizers, constraints
from keras.models import Model
from keras.layers.merge import Concatenate
from keras.callbacks import EarlyStopping, ModelCheckpoint

Using TensorFlow backend.


In [2]:
INPUT_PATH = '../input/'
CACHE_PATH = '../cache/'
OUTPUT_PATH ='../output/'

In [3]:
data = np.load(CACHE_PATH + 'data.npz')
X_train = data['X_train']
y_train = data['y_train']
X_val = data['X_val']
y_val = data['y_val']
X_test = data['X_test']
embedding_matrix = np.load(CACHE_PATH + 'embedding_matrix.npy')

In [4]:
MAX_FEATURES = 20000
MAX_SEQUENCE_LENGTH = 300
EMBEDDING_DIM = 300

In [5]:
embedding_layer = Embedding(MAX_FEATURES,
                            EMBEDDING_DIM,
                            weights=[embedding_matrix],
                            input_length=MAX_SEQUENCE_LENGTH,
                            trainable=False)

adam 优化器比SGD要好，自适应学习率

In [6]:
def get_lstm_model():
    inp = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
    embedded_sequences = embedding_layer(inp)
    x = Bidirectional(LSTM(64, dropout=0.25, recurrent_dropout=0.25, return_sequences=True))(embedded_sequences)
    x = Bidirectional(LSTM(32, dropout=0.25, recurrent_dropout=0.25, return_sequences=True))(embedded_sequences)
    x = Bidirectional(LSTM(32, dropout=0.25, recurrent_dropout=0.25, return_sequences=True))(embedded_sequences)
    x = GlobalMaxPool1D()(x)
    x = Dropout(0.1)(x)
    x = Dense(128, activation="relu")(x)
    x = Dropout(0.1)(x)
    x = Dense(1, activation="linear")(x)
    model = Model(inputs=inp, outputs=x)
    model.compile(loss='mse',optimizer='adam')
    return model

In [7]:
def train_lstm_model(model):
    model_path = CACHE_PATH + "lstm_weights_best.hdf5"
    early = EarlyStopping(monitor="val_loss", mode="min", patience=5)
    checkpoint = ModelCheckpoint(model_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
    callbacks_list = [checkpoint, early]
    model.fit(X_train, y_train, batch_size=128, epochs=100, validation_data=(X_val, y_val), callbacks=callbacks_list)
    model.load_weights(model_path)
    return model

In [8]:
model = get_lstm_model()

In [9]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 300)               0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 300, 300)          6000000   
_________________________________________________________________
bidirectional_3 (Bidirection (None, 300, 64)           85248     
_________________________________________________________________
global_max_pooling1d_1 (Glob (None, 64)                0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               8320      
_________________________________________________________________
dropout_2 (Dropout)          (None, 128)               0         
__________

In [10]:
from keras.utils.vis_utils import plot_model
plot_model(model,to_file='./lstm.png')

In [None]:
model = train_lstm_model(model)

Train on 209000 samples, validate on 11000 samples
Epoch 1/100
  6144/209000 [..............................] - ETA: 15:31 - loss: 3.7755

In [12]:
y_test = model.predict(X_test,batch_size=128,verbose=1)
y_test[y_test < 1] = 1
y_test[y_test > 4.7] = 5



In [13]:
import datetime
time = datetime.datetime.now()

In [14]:
sub = pd.read_csv(INPUT_PATH + 'sample.csv',header=None,names=['Id','Score'])
sub['Score'] = y_test
sub.to_csv(OUTPUT_PATH + 'lstm_{}.csv'.format(time.strftime('%Y-%m-%d-%H:%M:%S')),index=False, header=False)

In [16]:
df_predict = pd.read_csv(CACHE_PATH+'test_pred_model.csv')
columnsname = time.strftime("%d-%H-%M")
df_predict[columnsname] = y_test
df_predict.to_csv(CACHE_PATH+'test_pred_model.csv',index=False)

df_train = pd.read_csv(CACHE_PATH + 'train_pred_model.csv')
df_train = df_train[df_train['Score'].notnull()].reset_index(drop=True)
y_train = model.predict(X_train,batch_size=128,verbose=1)
y_train[y_train < 1] = 1
y_train[y_train > 4.7] = 5
df_train[columnsname] = y_train
df_train.to_csv(CACHE_PATH+'train_pred_model.csv',index=False)



ValueError: Length of values does not match length of index