In [1]:
from time import time
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedShuffleSplit
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Convolution1D, Dropout, MaxPooling1D, Conv1D
from keras.optimizers import Adam
from keras.utils import np_utils
from keras.callbacks import TensorBoard

from numpy.random import seed
from tensorflow import set_random_seed
seed(3141593)
set_random_seed(3141593)

train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

def encode(train, test):
    label_encoder = LabelEncoder().fit(train.species)
    labels = label_encoder.transform(train.species)
    classes = list(label_encoder.classes_)

    train = train.drop(['species', 'id'], axis=1)
    test = test.drop('id', axis=1)

    return train, labels, test, classes

train, labels, test, classes = encode(train, test)

# standardize train features
scaler = StandardScaler().fit(train.values)
scaled_train = scaler.transform(train.values)
scaled_test = scaler.transform(test)

# split train data into train and validation
sss = StratifiedShuffleSplit(test_size=0.1, random_state=123)
for train_index, valid_index in sss.split(scaled_train, labels):
    X_train, X_valid = scaled_train[train_index], scaled_train[valid_index]
    y_train, y_valid = labels[train_index], labels[valid_index]

feature_number = 64
class_number = len(classes)

y_train = np_utils.to_categorical(y_train, class_number)
y_valid = np_utils.to_categorical(y_valid, class_number)    

# reshape train data
X_train_r = np.zeros((len(X_train), feature_number, 3))
X_train_r[:, :, 0] = X_train[:, :feature_number]
X_train_r[:, :, 1] = X_train[:, feature_number:128]
X_train_r[:, :, 2] = X_train[:, 128:]

# reshape validation data
X_valid_r = np.zeros((len(X_valid), feature_number, 3))
X_valid_r[:, :, 0] = X_valid[:, :feature_number]
X_valid_r[:, :, 1] = X_valid[:, feature_number:128]
X_valid_r[:, :, 2] = X_valid[:, 128:]

# reshape y train data
y_train_all = [[0 for i in range(99)] for j in range(990)]
for i, label in enumerate(labels):
    y_train_all[i][label] = 1
y_train_all = np.array(y_train_all)

# reshape test data
test_r = np.zeros((len(scaled_test), feature_number, 3))
test_r[:, :, 0] = scaled_test[:, :feature_number]
test_r[:, :, 1] = scaled_test[:, feature_number:128]
test_r[:, :, 2] = scaled_test[:, 128:]

model = Sequential()
model.add(Conv1D(input_shape=(64, 3), filters=1024, kernel_size=6))
model.add(Activation('relu'))
model.add(Flatten())
model.add(Dropout(rate = 1 - 0.2))
model.add(Dense(2048, activation='relu'))
model.add(Dense(1024, activation='relu'))
model.add(Dense(512, activation='relu'))
model.add(Dense(class_number))
model.add(Activation('softmax'))

adam = Adam(lr=1e-4, decay=1e-5)

tensorboard = TensorBoard(log_dir="logs/{}".format(time()))
model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])

model.fit(X_train_r, y_train, epochs=100, validation_data=(X_valid_r, y_valid), batch_size=32, callbacks = [tensorboard])

Using TensorFlow backend.


Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Train on 891 samples, validate on 99 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100


Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x2d1e23eccc0>

In [2]:
pred = model.predict(test_r)
sample_sub = pd.read_csv('sample_submission.csv')
submission = pd.DataFrame(data = pred)
submission.columns = sample_sub.columns[1:]
submission['id'] = sample_sub['id']
submission.to_csv("submission.csv", sep=',', index = False)