In [90]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

from keras.utils import np_utils
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout

from keras.callbacks import EarlyStopping, ModelCheckpoint

## 数据准备

读取数据

In [15]:
train_path = './data/train.csv'
test_path = './data/test.csv'

df = pd.read_csv(train_path)

显示数据

In [16]:
df.head()

Unnamed: 0,id,feat_1,feat_2,feat_3,feat_4,feat_5,feat_6,feat_7,feat_8,feat_9,...,feat_85,feat_86,feat_87,feat_88,feat_89,feat_90,feat_91,feat_92,feat_93,target
0,1,1,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,Class_1
1,2,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,Class_1
2,3,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,Class_1
3,4,1,0,0,1,6,1,5,0,0,...,0,1,2,0,0,0,0,0,0,Class_1
4,5,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,1,0,0,0,Class_1


导入数据

In [30]:
def load_data(path, train=True):
    df = pd.read_csv(path)
    X = df.values.copy()
    
    if train:
        np.random.shuffle(X)
        X, label = X[:, 1:-1].astype(np.float32), X[:, -1]
        return X, label
    else:
        X, ids = X[:, 1:].astype(np.float32), X[:, 0].astype(str)
        return X, ids

In [47]:
X_train, y_train = load_data(train_path)
X_test, ids = load_data(test_path, train=False)

预处理，属性归一化

In [48]:
def preprocess_data(X, scaler=None):
    if not scaler:
        scaler = StandardScaler()
        scaler.fit(X)
    X = scaler.transform(X)
    return X, scaler

In [49]:
X_train, scaler = preprocess_data(X_train)
X_test, _ = preprocess_data(X_test, scaler)

One-hot 编码

In [50]:
def preprocess_label(labels, encoder=None, categorical=True):
    if not encoder:
        encoder = LabelEncoder()
        encoder.fit(labels)
    y = encoder.transform(labels).astype(np.int32)
    if categorical:
        y = np_utils.to_categorical(y)
    return y, encoder

In [51]:
y_train, encoder = preprocess_label(y_train)

## 搭建网络模型

In [83]:
dim = X_train.shape[1]
print(dim, 'dims')
print('Building model')

nb_classes = y_train.shape[1]

model = Sequential()

model.add(Dense(256, input_shape=(dim, )))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(nb_classes))
model.add(Activation('softmax'))


93 dims
Building model


In [84]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [85]:
batch_size = 128
epochs = 50

训练，同时保持最佳模型

In [91]:
fBestModel = 'best_model.h5'
early_stop = EarlyStopping(monitor='val_acc', patience=2, verbose=1)
best_model = ModelCheckpoint(fBestModel, verbose=0, save_best_only=True)

model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=1, validation_split=0.1, callbacks=[best_model, early_stop])

Train on 55690 samples, validate on 6188 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 00003: early stopping


<keras.callbacks.History at 0x223254d8748>

预测并保存结果

In [95]:
prediction = model.predict(X_test)

In [118]:
columns = ['Class_'+str(post+1) for post in range(9)]
columns

['Class_1',
 'Class_2',
 'Class_3',
 'Class_4',
 'Class_5',
 'Class_6',
 'Class_7',
 'Class_8',
 'Class_9']

In [136]:
1:2

SyntaxError: illegal target for annotation (<ipython-input-136-aaee9a7a446f>, line 1)

In [137]:
df2 = pd.DataFrame({'id' : range(1,num_pre+1)})
df2.head()

Unnamed: 0,id
0,1
1,2
2,3
3,4
4,5


In [120]:
df3 = pd.DataFrame(prediction, columns=columns)
df3.head()

Unnamed: 0,Class_1,Class_2,Class_3,Class_4,Class_5,Class_6,Class_7,Class_8,Class_9
0,3.895429e-07,0.3359313,0.4422675,0.2206697,6.750323e-09,6.259824e-07,0.001129791,6.536018e-08,3.746376e-07
1,0.002526483,0.000145772,2.459417e-05,6.785819e-05,2.699431e-07,0.2312342,0.002153061,0.7614354,0.00241237
2,2.949907e-19,1.045081e-36,0.0,6.838075e-19,0.0,1.0,6.540133999999999e-20,1.165196e-12,7.902789e-21
3,3.705485e-08,0.6742248,0.3131838,0.01257828,3.170758e-08,3.369495e-09,1.303122e-05,5.405844e-08,1.896132e-08
4,0.2703277,6.044691e-08,6.72167e-10,3.815687e-11,1.268215e-09,0.0008218087,2.449777e-05,0.009322176,0.7195038


In [138]:
df_pre = pd.concat([df2, df3], axis=1)


In [139]:
df_pre.head()

Unnamed: 0,id,Class_1,Class_2,Class_3,Class_4,Class_5,Class_6,Class_7,Class_8,Class_9
0,1,3.895429e-07,0.3359313,0.4422675,0.2206697,6.750323e-09,6.259824e-07,0.001129791,6.536018e-08,3.746376e-07
1,2,0.002526483,0.000145772,2.459417e-05,6.785819e-05,2.699431e-07,0.2312342,0.002153061,0.7614354,0.00241237
2,3,2.949907e-19,1.045081e-36,0.0,6.838075e-19,0.0,1.0,6.540133999999999e-20,1.165196e-12,7.902789e-21
3,4,3.705485e-08,0.6742248,0.3131838,0.01257828,3.170758e-08,3.369495e-09,1.303122e-05,5.405844e-08,1.896132e-08
4,5,0.2703277,6.044691e-08,6.72167e-10,3.815687e-11,1.268215e-09,0.0008218087,2.449777e-05,0.009322176,0.7195038


In [141]:
df_pre.to_csv('predition.csv', index=False)

In [131]:
?df_pre.to_csv