In [1]:
import os

import tensorflow as tf

In [2]:
tf.reset_default_graph()

data_dir = "/media/data/ai/kaggle/lanl_earthquake/"

window_size = 150000
window_shift = 9
batch_size = 32

def to_train(acoustic_data_vec, time_to_failure_vec):
    return acoustic_data_vec, time_to_failure_vec[-1]

acoustic_dataset = tf.contrib.data.CsvDataset(
    os.path.join(data_dir, 'train.csv'),
    [tf.constant(0.0, dtype=tf.float32), tf.constant(0.0, dtype=tf.float32)],
    header=True
).apply(
    tf.contrib.data.sliding_window_batch(window_size=window_size, window_shift=window_shift)
).map(to_train)

# test_dataset = acoustic_dataset.take(100).batch(100).cache().repeat()
train_dataset = acoustic_dataset.batch(batch_size).prefetch(1000).repeat()

train_iterator = train_dataset.make_initializable_iterator()
X, y = train_iterator.get_next()
X = tf.expand_dims(tf.reshape(X, (-1, window_size)), -1)
y = tf.expand_dims(y, -1)

# test_interator = test_dataset.make_initializable_iterator()
# text_X, test_y = test_interator.get_next()
# test_X = tf.expand_dims(text_X, -1)

Instructions for updating:
Use `tf.data.experimental.CsvDataset(...)`.
Instructions for updating:
Use `tf.data.Dataset.window(size=window_size, shift=window_shift, stride=window_stride).flat_map(lambda x: x.batch(window.size))` instead.


In [3]:
with tf.variable_scope('input', reuse=tf.AUTO_REUSE):
    input_signal = tf.placeholder(tf.float32, [None, 150000, 1], name='input_signal')
    time_to_failure = tf.placeholder(tf.float32, [None, 1], name='time_to_failure')
    is_training = tf.placeholder_with_default(True, [], name='is_training')
    dropout_rate = tf.cast(tf.placeholder_with_default(0.5, [], name='dropout_rate'), tf.float32)

    training_rate = tf.placeholder_with_default(0.00001, [], name='training_rate')
    
with tf.variable_scope('convolution', reuse=tf.AUTO_REUSE):
    conv1_1 = tf.layers.conv1d(X, 64, 9, strides=3, padding='VALID', name='conv1_1')
    conv1_2 = tf.layers.conv1d(conv1_1, 64, 9, strides=1, padding='VALID', name='conv1_2')
    pool1 = tf.layers.max_pooling1d(conv1_2, 2, 2, name='pool1')
    
    conv2_1 = tf.layers.conv1d(pool1, 512, 9, strides=3, padding='VALID', name='conv2_1')
    conv2_2 = tf.layers.conv1d(conv2_1, 512, 9, strides=1, padding='VALID', name='conv2_2')
    pool2 = tf.layers.max_pooling1d(conv2_2, 2, 2, name='pool2')
    
    conv3_1 = tf.layers.conv1d(pool2, 256, 9, strides=3, padding='VALID', name='conv3_1')
    conv3_2 = tf.layers.conv1d(conv3_1, 256, 9, strides=1, padding='VALID', name='conv3_2')
    pool3 = tf.layers.max_pooling1d(conv3_2, 2, 2, name='pool3')

    conv4_1 = tf.layers.conv1d(pool3, 512, 9, strides=3, padding='VALID', name='conv4_1')
    conv4_2 = tf.layers.conv1d(conv4_1, 512, 9, strides=1, padding='VALID', name='conv4_2')
    pool4 = tf.layers.max_pooling1d(conv4_2, 2, 2, name='pool4')
    
    flatten = tf.layers.flatten(pool4, name='flatten')
    dropout = tf.layers.dropout(flatten, rate=dropout_rate, training=is_training, name='dropout')
    
    feature = tf.layers.dense(dropout, 1000, activation=tf.nn.relu, name='feature')
    time_to_failure_pred = tf.layers.dense(feature, 1, name='time_to_failure_pred')
    
with tf.variable_scope('training', reuse=tf.AUTO_REUSE):
    loss = tf.losses.mean_squared_error(y, time_to_failure_pred)
    tf.summary.scalar('train_loss', loss)
    merged = tf.summary.merge_all()
    optimizer = tf.train.AdamOptimizer(training_rate)
    train_step = optimizer.minimize(loss)
    

In [None]:
from datetime import datetime

max_iter = 100000

train_writer = tf.summary.FileWriter('tensorboard/%s' % datetime.utcnow().isoformat())

sess = tf.Session()

with sess:
    sess.run([tf.global_variables_initializer()])
    sess.run([train_iterator.initializer])

    for i in range(max_iter):
        loss_val, summary, _ = sess.run([loss, merged, train_step])
        train_writer.add_summary(summary, global_step=i)

In [5]:
loss_val

0.12119138