In [1]:
import pandas as pd
from keras.models import Sequential, Model, Input
from keras.layers import RNN, GRU, Dense, Softmax, LSTM, SimpleRNNCell, SimpleRNN, concatenate, Dropout
from keras.callbacks import ModelCheckpoint, EarlyStopping
import numpy as np
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
df_train = pd.read_csv("X_train.csv")
df_targets = pd.read_csv("y_train.csv")
df_test = pd.read_csv("X_test.csv")

In [3]:
# df_targets

In [4]:
df_train = df_train.drop('row_id', axis=1)
df_train = df_train.drop('measurement_number', axis=1)
df_test = df_test.drop('row_id', axis=1)
df_test = df_test.drop('measurement_number', axis=1)

In [5]:
def normalize_orientations(df): #normalizing orientation by taking the square then the sqroot
    q_norm = df['orientation_X'] ** 2 + df['orientation_Y'] ** 2 + df['orientation_Z'] ** 2 + df['orientation_W'] ** 2
    qmod = q_norm ** .5
    df['X_normalized'] = df['orientation_X'] / qmod
    df['Y_normalized'] = df['orientation_Y'] / qmod
    df['Z_normalized'] = df['orientation_Z'] / qmod
    df['W_normalized'] = df['orientation_W'] / qmod
    return df

def sum_columns(df): #taking the sqrt of squared XYZ data for velocity, acceleration, orientation
    df['total_angular_velocity'] = (df['angular_velocity_X'] ** 2 + df['angular_velocity_Y'] ** 2 + df['angular_velocity_Z'] ** 2) * 0.5
    df['total_linear_acceleration'] = (df['linear_acceleration_X'] ** 2 + df['linear_acceleration_Y'] ** 2 + df['linear_acceleration_Z'] ** 2) ** 0.5
#     df['total_orientation'] = (df['orientation_X'] ** 2 + df['orientation_Y'] ** 2 + df['orientation_Z'] ** 2) ** 0.5
#     df['acceleration_vs_velocity'] = df['total_linear_acceleration'] / df['total_angular_velocity']
    return df

In [6]:
df_train = normalize_orientations(df_train)
df_train = sum_columns(df_train)
df_test = normalize_orientations(df_test)
df_test = sum_columns(df_test)

In [7]:
df_train_agg = df_train.groupby('series_id').apply(np.mean)

if 'series_id' in df_train_agg.columns:
    df_train_agg = df_train_agg.drop('series_id', axis=1)

X_train_agg = np.array(df_train_agg)

In [8]:
df_test_agg = df_test.groupby('series_id').apply(np.mean)

if 'series_id' in df_test_agg.columns:
    df_test_agg = df_test_agg.drop('series_id', axis=1)

X_test_agg = np.array(df_test_agg)

In [9]:
# df_train_agg

In [10]:
standards = df_train.std()

df_train = df_train / standards
df_test = df_test / standards

In [11]:
# df_train_agg.describe().T


In [12]:
x_train = df_train.groupby('series_id').apply(lambda group: np.array(group.drop('series_id', axis=1)))
x_test = df_test.groupby('series_id').apply(lambda group: np.array(group.drop('series_id', axis=1)))

y = df_targets.surface

X_train = []
X_test = []

for example in x_train:
    X_train.append(example)
    
for example in x_test:
    X_test.append(example)

    
X_train = np.array(X_train)
X_test = np.array(X_test)

In [13]:
X_train.shape, X_train_agg.shape

((3810, 128, 16), (3810, 16))

In [14]:
X_test.shape, X_test_agg.shape

((3816, 128, 16), (3816, 16))

In [15]:
encoder = LabelEncoder()
y_train = encoder.fit_transform(y)

In [16]:
onehotencoder = OneHotEncoder()

y_train = onehotencoder.fit_transform(y_train.reshape(-1, 1))

In [17]:
# df_test.describe()

In [18]:
# stds = df_train.std()

In [19]:
main_input = Input(shape=(X_train.shape[1:]))
agg_input = Input(shape=(X_train_agg.shape[1],))

x = GRU(32, input_shape=(None, X_train[2]))(main_input)

y = Dense(32, activation="tanh")(agg_input)

z = concatenate([x, y])

z = Dense(64, activation="tanh")(z)
z = Dropout(.6)(z)
z = Dense(32, activation="tanh")(z)
predictions = Dense(encoder.classes_.size, activation="softmax")(z)

model = Model(inputs=[main_input, agg_input], outputs=predictions)
# model = Sequential()
# model.add(GRU(32, input_shape=(None, X_train.shape[2])))
# model.add(Dense(64, activation="tanh"))
# model.add(Dense(32, activation="tanh"))
# model.add()
# # model.load_weights('weights.hdf5')

In [20]:
checkpoint_file = 'weights2.hdf5'
checkpoint = ModelCheckpoint(checkpoint_file, monitor='val_acc', verbose=1, save_best_only=True, mode='max', save_weights_only=True)


# early_stopping = EarlyStopping(patience=20)

In [21]:
# df_train.columns


In [22]:
callbacks=[checkpoint]

In [23]:
model.load_weights('weights2.hdf5')

In [24]:
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [25]:
X_train, X_val, X_train_agg, X_val_agg, y_train, y_val = train_test_split(X_train, X_train_agg, y_train)
# y_trai.shape

In [26]:
# df_train.describe()

In [28]:
# X_train

model.fit([X_train, X_train_agg], y_train, epochs=1, validation_data=([X_val, X_val_agg], y_val), callbacks=callbacks, batch_size=24, verbose=1)

Train on 2857 samples, validate on 953 samples
Epoch 1/1
Epoch 00001: val_acc improved from -inf to 0.88877, saving model to weights2.hdf5


<keras.callbacks.History at 0x1237a7860>

In [29]:
# model.predict(X_test)
raw_preds = model.predict([X_test, X_test_agg]).argmax(axis=1)

In [30]:
raw_preds

array([7, 1, 8, ..., 1, 1, 8])

In [890]:
# raw_preds = model.predict(X_test)

In [891]:
preds = encoder.inverse_transform(raw_preds)

  if diff:


In [892]:
# df_test.describe().T

In [893]:
# acc = (raw_preds == y_train).sum() / raw_preds.size
# acc
# preds

In [894]:
pred_df = pd.DataFrame()
pred_df['surface'] = preds

In [895]:
# pred_df.to_csv('submission.csv')
pred_df.index.name = 'series_id'
pred_df.to_csv('submission.csv')

In [645]:
df_train.describe()

Unnamed: 0,series_id,orientation_X,orientation_Y,orientation_Z,orientation_W,angular_velocity_X,angular_velocity_Y,angular_velocity_Z,linear_acceleration_X,linear_acceleration_Y,linear_acceleration_Z,X_normalized,Y_normalized,Z_normalized,W_normalized,total_angular_velocity,total_linear_acceleration
count,487680.0,487680.0,487680.0,487680.0,487680.0,487680.0,487680.0,487680.0,487680.0,487680.0,487680.0,487680.0,487680.0,487680.0,487680.0,487680.0,487680.0
mean,1.731594,-0.026323,0.105986,0.117556,-0.036469,0.001507,0.094022,-0.083715,0.069112,1.348774,-3.291305,-0.026323,0.105986,0.117556,-0.036469,0.5061279,4.160892
std,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
min,0.0,-1.442476,-1.397364,-1.536535,-1.501639,-20.133554,-10.463414,-5.536902,-19.28098,-56.769244,-26.494538,-1.442477,-1.39737,-1.53654,-1.501646,8.978396e-07,0.198812
25%,0.86557,-1.028327,-0.972825,-0.844243,-1.01688,-0.346049,-0.374293,-0.395991,-0.283777,0.914878,-3.582347,-1.02833,-0.972827,-0.844243,-1.016881,0.03883079,3.763558
50%,1.731594,-0.154529,0.335846,0.301489,-0.179335,0.000715,0.061032,-0.02328,0.066813,1.345565,-3.291451,-0.154529,0.335846,0.30149,-0.179334,0.1248068,4.010295
75%,2.597619,0.950571,1.143067,1.159455,0.932079,0.344141,0.54206,0.281925,0.423534,1.775084,-2.995317,0.950571,1.143065,1.159451,0.932076,0.4165537,4.387901
max,3.463189,1.442476,1.396418,1.469347,1.483901,19.379501,12.168937,6.054023,19.67123,34.114816,23.139229,1.442481,1.396424,1.469349,1.483902,40.64174,56.434945


(3816, 128, 21)