In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import time
import keras
from keras import layers
from keras import models
from keras import optimizers
from keras import callbacks
from keras import backend as K
from sklearn import model_selection
import pathlib

COORDINATES = ["Xmin", "Ymin", "Xmax", "Ymax"]
MAX_TRAIN_FORECASTS = 14

Using TensorFlow backend.


In [3]:
votes = pd.read_csv("../raw/train_data.csv").set_index("itemId")

In [4]:
answers = pd.read_csv("../raw/train_answers.csv").set_index("itemId")
answers.columns = COORDINATES

In [5]:
votes_test = pd.read_csv("../raw/test_data.csv").set_index("itemId")

In [6]:
def yield_batch(batch_size):
    while True:
        item_id = np.random.choice(votes.index, 1)
        forecasts = votes.loc[item_id].set_index("userId")
        x = np.zeros((1, len(forecasts), 4),)
        y = np.zeros((1, 4))
        
        x[0] = forecasts.sample(len(forecasts))
        y[0] = answers.loc[item_id]
        yield x, y

In [7]:
def intersection_over_union(boxes_pred, boxes_true):

    x_min = K.stack([boxes_pred[:, 0], boxes_true[:, 0]], axis=-1)
    y_min = K.stack([boxes_pred[:, 1], boxes_true[:, 1]], axis=-1)
    x_max = K.stack([boxes_pred[:, 2], boxes_true[:, 2]], axis=-1)
    y_max = K.stack([boxes_pred[:, 3], boxes_true[:, 3]], axis=-1)

    x_min = K.max(x_min, axis=-1)
    y_min = K.max(y_min, axis=-1)
    x_max = K.min(x_max, axis=-1)
    y_max = K.min(y_max, axis=-1)

    zeros = K.zeros_like(x_max)

    x_inter = K.stack([zeros, x_max - x_min], axis=-1)
    y_inter = K.stack([zeros, y_max - y_min], axis=-1)

    x_inter = K.max(x_inter, axis=-1)
    y_inter = K.max(y_inter, axis=-1)
    inter_area = x_inter * y_inter
    
    area_pred = (K.max(K.stack([zeros, boxes_pred[:, 2] - boxes_pred[:, 0]], axis=-1), axis=-1) * 
                 K.max(K.stack([zeros, boxes_pred[:, 3] - boxes_pred[:, 1]], axis=-1), axis=-1))
    area_true = (K.max(K.stack([zeros, boxes_true[:, 2] - boxes_true[:, 0]], axis=-1), axis=-1) * 
                 K.max(K.stack([zeros, boxes_true[:, 3] - boxes_true[:, 1]], axis=-1), axis=-1))

    iou = inter_area / (area_pred + area_true - inter_area + K.epsilon())
    
    return -K.mean(iou, axis=-1)

In [60]:
def make_model(filters):
    K.clear_session()
    
    y = x = layers.Input(shape=(None, 4))
    y_rez = layers.GlobalAveragePooling1D()(y)
    
    y = layers.Bidirectional(layers.LSTM(
        units=filters * 4,
        return_sequences=False
    ))(y)
    # y = layers.GlobalAveragePooling1D()(y)
    
    y = layers.Dense(
        units=filters * 4,
        activation="relu"
    )(y)
    y = layers.Dense(
        units=filters * 2,
        activation="relu"
    )(y)
    y = layers.Dense(
        units=filters,
        activation="relu"
    )(y)
    y = layers.Dense(
        units=filters // 2,
        activation="relu"
    )(y)
    y = layers.Dense(
        units=4,
        activation=None
    )(y)
    y = layers.add([y_rez, y])
    y = layers.Activation("relu")(y)
    
    model = models.Model(inputs=x, outputs=y)
    model.summary()
    return model

In [58]:
def train_model(batch_size, units, epochs=100):
    model = make_model(units)
    
    lr=0.002
    
    model.compile(optimizer=optimizers.Nadam(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004),
                  loss=intersection_over_union,
                  metrics=None
    )
    cb = [
        callbacks.ModelCheckpoint("../processed/model.h5", monitor="loss", verbose=1, save_best_only=True),
        callbacks.EarlyStopping(monitor='loss', patience=epochs // 10, verbose=10),
        callbacks.ReduceLROnPlateau(monitor='loss', factor=0.8, patience=1, verbose=1)
    ]
    rez = model.fit_generator(
        yield_batch(batch_size),
        steps_per_epoch=1000 // batch_size,
        epochs=epochs,
        callbacks=cb,
        # validation_data=[make_feat(votes), answers],
    )
    return rez, model

In [61]:
rez, model = train_model(1, 32)

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, None, 4)      0                                            
__________________________________________________________________________________________________
bidirectional_1 (Bidirectional) (None, 256)          136192      input_1[0][0]                    
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, 128)          32896       bidirectional_1[0][0]            
__________________________________________________________________________________________________
dense_2 (Dense)                 (None, 64)           8256        dense_1[0][0]                    
__________________________________________________________________________________________________
dense_3 (D


Epoch 00029: loss did not improve from -0.55618

Epoch 00029: ReduceLROnPlateau reducing learning rate to 4.50359977548942e-05.
Epoch 30/100

Epoch 00030: loss improved from -0.55618 to -0.55763, saving model to ../processed/model.h5
Epoch 31/100

Epoch 00031: loss did not improve from -0.55763

Epoch 00031: ReduceLROnPlateau reducing learning rate to 3.602879878599197e-05.
Epoch 32/100

Epoch 00032: loss did not improve from -0.55763

Epoch 00032: ReduceLROnPlateau reducing learning rate to 2.8823039610870185e-05.
Epoch 33/100

Epoch 00033: loss did not improve from -0.55763

Epoch 00033: ReduceLROnPlateau reducing learning rate to 2.3058432270772757e-05.
Epoch 34/100

Epoch 00034: loss did not improve from -0.55763

Epoch 00034: ReduceLROnPlateau reducing learning rate to 1.8446745525579902e-05.
Epoch 35/100

Epoch 00035: loss did not improve from -0.55763

Epoch 00035: ReduceLROnPlateau reducing learning rate to 1.475739700254053e-05.
Epoch 36/100

Epoch 00036: loss did not improve

In [None]:
pd.DataFrame(rez.history)[["loss", "val_loss"]].plot(figsize=(16, 8))

In [None]:
def make_forecast(model):
    subdir = time.strftime('%Y-%m-%d_%H-%M')
    path = pathlib.Path(f"../processed/{subdir}")
    path.mkdir(exist_ok=True)
    feat = make_feat(votes_test)
    df = model.predict(feat)
    df = pd.DataFrame(df, index=feat.index)
    df.to_csv(path / "_sub_full.csv", header=False)
    # path.rename(path.parent / f"{subdir}-{score:0.1f}")

In [None]:
make_forecast(model)