In [32]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib notebook

from IPython.display import display, HTML

pd.set_option('display.max_columns', None)

from pandas.api.types import CategoricalDtype
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline

In [33]:
from tensorflow import keras
import tensorflow as tf

ModuleNotFoundError: No module named 'tensorflow'

In [3]:
!ls 'data'

train.csv   X_test.csv.zip  X_train.csv.zip
X_test.csv  X_train.csv     y_train.csv


In [50]:
data = pd.read_csv(
    "data/X_train.csv", 
    index_col=None, 
    nrows = 200 * 128
)

In [51]:
data.head()

Unnamed: 0,row_id,series_id,measurement_number,orientation_X,orientation_Y,orientation_Z,orientation_W,angular_velocity_X,angular_velocity_Y,angular_velocity_Z,linear_acceleration_X,linear_acceleration_Y,linear_acceleration_Z
0,0_0,0,0,-0.75853,-0.63435,-0.10488,-0.10597,0.10765,0.017561,0.000767,-0.74857,2.103,-9.7532
1,0_1,0,1,-0.75853,-0.63434,-0.1049,-0.106,0.067851,0.029939,0.003385,0.33995,1.5064,-9.4128
2,0_2,0,2,-0.75853,-0.63435,-0.10492,-0.10597,0.007275,0.028934,-0.005978,-0.26429,1.5922,-8.7267
3,0_3,0,3,-0.75852,-0.63436,-0.10495,-0.10597,-0.013053,0.019448,-0.008974,0.42684,1.0993,-10.096
4,0_4,0,4,-0.75852,-0.63435,-0.10495,-0.10596,0.005135,0.007652,0.005245,-0.50969,1.4689,-10.441


In [52]:
data.columns

Index(['row_id', 'series_id', 'measurement_number', 'orientation_X',
       'orientation_Y', 'orientation_Z', 'orientation_W', 'angular_velocity_X',
       'angular_velocity_Y', 'angular_velocity_Z', 'linear_acceleration_X',
       'linear_acceleration_Y', 'linear_acceleration_Z'],
      dtype='object')

In [53]:
pids = data.series_id.unique()
process_id_array = data.series_id.values
data.drop(['series_id', 'row_id', 'measurement_number'], axis=1, inplace=True)

In [55]:
data.head()

Unnamed: 0,orientation_X,orientation_Y,orientation_Z,orientation_W,angular_velocity_X,angular_velocity_Y,angular_velocity_Z,linear_acceleration_X,linear_acceleration_Y,linear_acceleration_Z
0,-0.75853,-0.63435,-0.10488,-0.10597,0.10765,0.017561,0.000767,-0.74857,2.103,-9.7532
1,-0.75853,-0.63434,-0.1049,-0.106,0.067851,0.029939,0.003385,0.33995,1.5064,-9.4128
2,-0.75853,-0.63435,-0.10492,-0.10597,0.007275,0.028934,-0.005978,-0.26429,1.5922,-8.7267
3,-0.75852,-0.63436,-0.10495,-0.10597,-0.013053,0.019448,-0.008974,0.42684,1.0993,-10.096
4,-0.75852,-0.63435,-0.10495,-0.10596,0.005135,0.007652,0.005245,-0.50969,1.4689,-10.441


In [66]:
labels = pd.read_csv('data/y_train.csv', index_col='series_id', nrows=200)

In [67]:
labels.drop(['group_id'], axis=1, inplace=True)

In [88]:
labels.head()

Unnamed: 0_level_0,surface
series_id,Unnamed: 1_level_1
0,fine_concrete
1,concrete
2,concrete
3,concrete
4,soft_tiles


# Check Inputs

In [69]:
preprocessor = Pipeline(
    steps=[
        ('scaler', StandardScaler().fit(data))
    ]
)

In [70]:
process_id_array

array([  0,   0,   0, ..., 199, 199, 199])

In [71]:
data.shape

(25600, 10)

In [72]:
labels.shape

(200, 1)

In [74]:
N_ENCODED_FEATURES = data.shape[1]

In [82]:
arr = preprocessor.transform(data)

In [89]:
y = labels.values

# Model

In [83]:
class RNNConfig(object):
    def __init__(
        self, cell_type='lstm', window=20, forget_bias=1.0, 
        n_hidden_cells=100, n_layers=1, keep_prob=1.0, batch_size=1, 
        epoch_num=100, learning_rate=0.02, max_grad_norm=1.0, init_scale=0.1,
    ):
        self.cell_type = cell_type
        self.window = window
        self.forget_bias = forget_bias
        self.n_hidden_cells = n_hidden_cells
        self.keep_prob = keep_prob
        self.batch_size = batch_size
        self.epoch_num = epoch_num
        self.learning_rate = learning_rate
        self.max_grad_norm = max_grad_norm
        self.init_scale = init_scale
        self.n_layers = n_layers

In [84]:
def many_to_one_model(config):
    assert isinstance(config, RNNConfig)
    model = tf.keras.models.Sequential()
    
    # Masking layer
    model.add(
        tf.keras.layers.Masking(
            mask_value=0., input_shape=(config.window, N_ENCODED_FEATURES)))
    
    if config.n_layers == 1:
        model.add(
            tf.keras.layers.LSTM(
                config.n_hidden_cells, input_shape=[config.window, N_ENCODED_FEATURES],
                activation='relu'),)
    elif config.n_layers == 2:
        model.add(
            tf.keras.layers.LSTM(
                config.n_hidden_cells, input_shape=[config.window, N_ENCODED_FEATURES], return_sequences=True,
                activation='relu')
        )
        model.add(
            tf.keras.layers.LSTM(
               config.n_hidden_cells, activation='relu'),
        )
    else:
        raise NotImplementedError("Keep n_layers <= 2.")
        
    model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
    
    loss = 'mean_squared_error'
    rmsprop = tf.keras.optimizers.RMSprop(lr=config.learning_rate)
    model.compile(
        loss=loss, optimizer=rmsprop,
        metrics=[tf.keras.metrics.mean_squared_error]
    )

    return model

# Sanity Check

In [85]:
max_length_test = 200

In [86]:
basic_config = RNNConfig(
    window=max_length_test, 
    n_hidden_cells=16, 
    n_layers=1, 
    batch_size=1, 
    epoch_num=1
)

model = many_to_one_model(basic_config)

NameError: name 'tf' is not defined

In [None]:
model.fit(x=arr, y=y, batch_size=basic_config.batch_size, epochs=basic_config.epoch_num, validation_split=0.2)