In [1]:
import itertools
import pandas as pd
import numpy as np
from keras import layers
from keras import models
from keras import optimizers
from keras import backend as K
from tqdm import tqdm_notebook

TEST_SIZE = 150000

Using TensorFlow backend.


In [2]:
def load_data():
    df = pd.read_csv(
            "../input/train.csv",
            names=["x", "y"],
            skiprows=1,
            dtype={"x": "float32", "y": "float32"}
        )
    return df
BIG_FRAME = load_data()

In [3]:
def yield_case():
    data = BIG_FRAME.values
    max_start = len(data) - TEST_SIZE
    while True:
        start = np.random.randint(max_start)
        end = start + TEST_SIZE
        if data[start, 1] < data[end - 1, 1]:
            continue  
        yield data[start:end, :1], data[end - 1, 1]

In [4]:
def yield_batch(batch_size=16):
    while True:
        x_batch = np.zeros((batch_size, TEST_SIZE, 1))
        y_batch = np.zeros((batch_size, ))
        gen = yield_case()
        for i, (x, y) in zip(range(batch_size), gen):
            x_batch[i] = x
            y_batch[i] = y
        yield x_batch, y_batch

In [5]:
def primes(num=TEST_SIZE):
    d = 2
    rez = []
    while num > 1:
        full, rem = divmod(num, d)
        if not rem:
            rez.append(d)
            num = full
        else:
            d += 1
    for i in sorted(rez, reverse=True):
        yield i

In [6]:
def make_model(filters):
    K.clear_session()
    y = x = layers.Input(shape=(TEST_SIZE, 1))

    for kernel_size in primes():
        y = layers.Conv1D(
            filters=filters,
            kernel_size=kernel_size,
            strides=kernel_size,
            activation="relu")(y)
        y = layers.Conv1D(
            filters=filters,
            kernel_size=1,
            strides=1,
            activation="relu")(y)
        y = layers.Conv1D(
            filters=filters,
            kernel_size=1,
            strides=1,
            activation="relu")(y)
    y = layers.Flatten()(y)
    y = layers.Dense(
        units=filters,
        activation="relu"
    )(y)
    y = layers.Dense(
        units=filters,
        activation="relu"
    )(y)
    y = layers.Dense(
        units=1,
        activation="relu"
    )(y)

    model = models.Model(inputs=x, outputs=y)
    model.compile(optimizer=optimizers.Nadam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004),
                  loss='mean_absolute_error',
                  # metrics=['mean_absolute_error']
    )
    model.summary()
    return model

In [7]:
def train_model(filters, batch_start, batch_max=100):
    model = make_model(filters)
    batch_size = batch_start
    loss = None
    inc = 1
    for epoch in itertools.count():
        print(f"Batch size = {batch_size}")
        steps = 4000 // batch_size
        rez = model.fit_generator(
            yield_batch(batch_size),
            steps_per_epoch=steps,
            epochs=epoch + 1,
            initial_epoch=epoch
        )
        new_loss = rez.history["loss"][0]
        if loss is None or new_loss < loss:
            loss = new_loss
            model.save('model_conv.h5')
        batch_size += inc
        if batch_size > batch_max:
            break
    return loss

In [8]:
loss = train_model(128, 1)

Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 150000, 1)         0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 30000, 128)        768       
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 30000, 128)        16512     
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 30000, 128)        16512     
_________________________________________________________________
conv1d_4 (Conv1D)            (None, 6000, 128)         82048     
_________________________________________________________________
conv1d_5 (Conv1D)            (None, 6000, 128)         16512     
_________________________________________________________________
conv

In [9]:
print(f"Minimum loss - {loss:0.3f}")
model = models.load_model("model_conv.h5")
submission = pd.read_csv(
    "../input/sample_submission.csv", 
    index_col='seg_id', 
    dtype={'time_to_failure': np.float32}
)
for seg_id in tqdm_notebook(submission.index):
    seg = pd.read_csv(f"../input/test/{seg_id}.csv")
    X = seg["acoustic_data"].values.reshape(1, TEST_SIZE, 1)
    y = model.predict(X)
    submission.loc[seg_id]["time_to_failure"] = y
submission.to_csv(f"submission_{loss:0.3f}.csv")

Minimum loss - 1.978


HBox(children=(IntProgress(value=0, max=2624), HTML(value='')))


