In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import time
from keras import layers
from keras import models
from keras import optimizers
from keras import callbacks
from keras import backend as K
from sklearn import model_selection
from sklearn import preprocessing
import pathlib

Using TensorFlow backend.


In [2]:
COORDINATES = ["Xmin", "Ymin", "Xmax", "Ymax"]

In [3]:
answers = pd.read_csv("../raw/train_answers.csv").set_index("itemId")
answers.columns = COORDINATES
scaler = preprocessing.StandardScaler().fit(np.vstack([answers.values, answers.values[:, [2, 3, 0, 1]]]))
answers[COORDINATES] = scaler.transform(answers[COORDINATES])

  after removing the cwd from sys.path.


In [4]:
votes = pd.read_csv("../raw/train_data.csv").set_index("itemId")
votes[COORDINATES] = scaler.transform(votes[COORDINATES])

  


In [5]:
votes_test = pd.read_csv("../raw/test_data.csv").set_index("itemId")
votes_test[COORDINATES] = scaler.transform(votes_test[COORDINATES])

  


In [6]:
def yield_batch(data):
    votes, answers = data
    item_ids = list(set(votes.index))
    while True:
        item_id = np.random.choice(item_ids, 1)
        forecasts = votes.loc[item_id].set_index("userId")
        x = np.zeros((1, len(forecasts), 4),)
        y = np.zeros((1, 4))
        x[0] = forecasts.sample(len(forecasts))
        y[0] = answers.loc[item_id]
        yield x, y

In [7]:
def yield_batch_val(data):
    votes, answers = data
    item_ids = set(votes.index)
    while True:
        for item_id in item_ids:
            forecasts = votes.loc[item_id].set_index("userId")
            x = np.zeros((1, len(forecasts), 4),)
            y = np.zeros((1, 4))

            x[0] = forecasts
            y[0] = answers.loc[item_id]
            yield x, y

In [9]:
def yield_batch_test(data):
    item_ids = data.index.unique()
    for item_id in item_ids:
        forecasts = data.loc[item_id].set_index("userId")
        x = np.zeros((1, len(forecasts), 4),)
        y = np.zeros((1, 4))
        x[0] = forecasts
        yield x

In [10]:
def intersection_over_union(boxes_pred, boxes_true):

    x_min = K.stack([boxes_pred[:, 0], boxes_true[:, 0]], axis=-1)
    y_min = K.stack([boxes_pred[:, 1], boxes_true[:, 1]], axis=-1)
    x_max = K.stack([boxes_pred[:, 2], boxes_true[:, 2]], axis=-1)
    y_max = K.stack([boxes_pred[:, 3], boxes_true[:, 3]], axis=-1)

    x_min = K.max(x_min, axis=-1)
    y_min = K.max(y_min, axis=-1)
    x_max = K.min(x_max, axis=-1)
    y_max = K.min(y_max, axis=-1)

    zeros = K.zeros_like(x_max)

    x_inter = K.stack([zeros, x_max - x_min], axis=-1)
    y_inter = K.stack([zeros, y_max - y_min], axis=-1)

    x_inter = K.max(x_inter, axis=-1)
    y_inter = K.max(y_inter, axis=-1)
    inter_area = x_inter * y_inter
    
    area_pred = (K.max(K.stack([zeros, boxes_pred[:, 2] - boxes_pred[:, 0]], axis=-1), axis=-1) * 
                 K.max(K.stack([zeros, boxes_pred[:, 3] - boxes_pred[:, 1]], axis=-1), axis=-1))
    area_true = (K.max(K.stack([zeros, boxes_true[:, 2] - boxes_true[:, 0]], axis=-1), axis=-1) * 
                 K.max(K.stack([zeros, boxes_true[:, 3] - boxes_true[:, 1]], axis=-1), axis=-1))

    iou = inter_area / (area_pred + area_true - inter_area + K.epsilon())
    
    return -K.mean(iou, axis=-1)

In [11]:
def make_model(filters):
    K.clear_session()
    
    y = x = layers.Input(shape=(None, 4))
    
    mul = 4
    y = layers.Conv1D(
        filters=filters * mul,
        kernel_size=3,
        strides=1,
        padding="same"
    )(y)
    y = layers.Conv1D(
        filters=filters * mul,
        kernel_size=3,
        strides=1,
        padding="same"
    )(y)
    y = layers.Conv1D(
        filters=filters * mul,
        kernel_size=3,
        strides=1,
        padding="same"
    )(y)
    y = layers.Conv1D(
        filters=filters * mul,
        kernel_size=3,
        strides=1,
        padding="same"
    )(y)
    y = layers.Conv1D(
        filters=filters * mul,
        kernel_size=3,
        strides=1,
        padding="same"
    )(y)
    y = layers.Conv1D(
        filters=filters * mul,
        kernel_size=3,
        strides=1,
        padding="same"
    )(y)
    y = layers.GlobalAveragePooling1D()(y)
    
    y = layers.Dense(
        units=filters * 2,
        activation="relu"
    )(y)
    y = layers.Dense(
        units=4,
        activation=None
    )(y)
    
    model = models.Model(inputs=x, outputs=y)
    model.summary()
    return model

In [12]:
class MaxLRTest(callbacks.Callback):

    def __init__(self, base_lr=1.0e-9, max_lr=10, steps=10000):
        super().__init__()

        self.base_lr = base_lr
        self.max_lr = max_lr
        self.steps = steps
        
        self.iterations = None
        self.history = None
        
    def clr(self):
        return self.base_lr * (self.max_lr / self.base_lr) ** (self.iterations / self.steps) 
  
    def on_train_begin(self, logs=None):
        self.iterations = 0
        self.history = {}
        K.set_value(self.model.optimizer.lr, self.base_lr)     
            
    def on_batch_end(self, batch, logs=None):
        if self.iterations > self.steps:
            self.model.stop_training = True
        
        self.history.setdefault("iterations", []).append(self.iterations)
        self.history.setdefault("lr", []).append(K.get_value(self.model.optimizer.lr))
        for k, v in logs.items():
            self.history.setdefault(k, []).append(v)
            
        self.iterations += 1
        K.set_value(self.model.optimizer.lr, self.clr())
        
    def plot(self, smooth=None, t_score=2):
        smooth = int(smooth or self.steps // 10)
        df = pd.DataFrame(self.history).set_index("lr").loss.rolling(smooth).agg(["mean", "std"])
        df["std"] = df["mean"] + t_score * df["std"] / smooth ** 0.5
        print(f"Max speed learning rate  - {df['std'].idxmin():.1e}")
        df.plot(logx=True, figsize=(16, 8))

In [30]:
class DecayingLR(callbacks.Callback):

    def __init__(self, max_lr=0.1, warm_up=1, decay_per_epoch=1, epsilon=0.0001, wait=3):
        super().__init__()

        self.max_lr = max_lr
        self.warm_up = warm_up
        self.decay_per_epoch = decay_per_epoch
        self.epsilon=epsilon
        self.wait=wait
        
        self.iterations = None
        self.history = None
        
        self.d_loss_iterations = None
        self.d_loss = None
        self.d_iterations = None
        self.d_iterations2 = None
        self.d_1 = None
                
        self.steps = None
        self.decay = None
        
        self.wait_count=None
        self.prev_loss=None

    def on_train_begin(self, logs=None):
        self.iterations = 1
        self.history = {}
        
        self.d_loss_iterations = 0.0
        self.d_loss = 0.0
        self.d_iterations = 0.0
        self.d_iterations2 = 0.0
        self.d_1 = 0.0
        
        self.steps = self.params["steps"]
        self.decay = (1 - 1 / self.steps) ** (1 / self.decay_per_epoch)
        
        self.wait_count=0
        self.prev_loss=None
        
        K.set_value(self.model.optimizer.lr, self.lr())
    
    def make_decay(self, old, new):
        decay = self.decay
        return old * decay + new * (1 - decay)
    
    def lr(self):
        warm_up = self.iterations / self.steps * self.warm_up
        if warm_up <= 1:
            return self.max_lr * warm_up 
        
        lr = self.history["lr"][-1]
        speed = self.history["speed"][-1]
        
        if speed > 0:
            return lr * self.decay
        
        return lr
            
    def on_batch_end(self, batch, logs=None):
        self.history.setdefault("iterations", []).append(self.iterations)
        self.history.setdefault("lr", []).append(K.get_value(self.model.optimizer.lr))
        
        for k, v in logs.items():
            self.history.setdefault(k, []).append(v)
            
        self.d_loss_iterations = self.make_decay(self.d_loss_iterations, logs["loss"] * self.iterations)
        self.d_loss = self.make_decay(self.d_loss, logs["loss"])
        self.d_iterations = self.make_decay(self.d_iterations, self.iterations)
        self.d_iterations2 = self.make_decay(self.d_iterations2, self.iterations ** 2)
        self.d_1 = self.make_decay(self.d_1, 1)
        
        cov = self.d_loss_iterations - self.d_loss * self.d_iterations / self.d_1
        var = self.d_iterations2 - self.d_iterations ** 2 / self.d_1
        
        speed = self.steps * cov / var
        self.history.setdefault("speed", []).append(speed)
        
        self.iterations += 1
        K.set_value(self.model.optimizer.lr, self.lr())     
        
    def on_epoch_end(self, epoch, logs=None):
        lr = self.history["lr"][-1]
        speed = self.history["speed"][-1]
        print(f"Learning rate: {lr:.1e}")
        print(f"Speed per epoch: {speed:.4f}")
        
        if self.prev_loss is None:
            self.prev_loss = logs["val_loss"]
        elif abs(self.prev_loss - logs["val_loss"]) < self.epsilon:
            self.wait_count += 1
        else:
            self.wait_count = 0
        self.prev_loss = logs["val_loss"]
        print(f"Wait to stop: {self.wait - self.wait_count}\n")
        if self.wait == self.wait_count:
            self.model.stop_training = True
        
    def plot(self, smooth=None):
        smooth = int(smooth or self.steps)
        df = pd.DataFrame(self.history).set_index("iterations")[["loss", "lr"]].rolling(smooth).mean()
        df.plot(figsize=(16, 8), secondary_y="lr")

In [24]:
def train_model(data_train, data_val, units, test=False):
    
    max_lr =  3.8e-04
    
    steps = 100000
    steps_per_epoch = 1000
    epochs = steps // steps_per_epoch
    
    model = make_model(units)  
    
    model.compile(optimizer=optimizers.Nadam(lr=max_lr / 100, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004),
                      loss="mae",
                      metrics=[intersection_over_union]
        )
    rez = model.fit_generator(
            yield_batch(data_train),
            steps_per_epoch=1000,
            epochs=1,
            callbacks=None,
            validation_data=yield_batch_val(data_val),
            validation_steps=len(data_val[1].index),
        )
    
    
    model.compile(optimizer=optimizers.Nadam(lr=max_lr, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004),
                  loss=intersection_over_union,
                  metrics=None
    )
    cycle = MaxLRTest() if test else DecayingLR(max_lr=max_lr)
    cb = [
        callbacks.ModelCheckpoint("../processed/model.h5", monitor="val_loss", verbose=1, save_best_only=True),
        cycle
    ]
    rez = model.fit_generator(
        yield_batch(data_train),
        steps_per_epoch=steps_per_epoch,
        epochs=epochs,
        callbacks=cb,
        validation_data=yield_batch_val(data_val),
        validation_steps=len(data_val[1].index),
        )
    
    model = models.load_model("../processed/model.h5", custom_objects={"intersection_over_union": intersection_over_union})
    
    return rez, cycle, model

In [22]:
FOLDS = 10

def train_oof(train_set, test_set, units=16):
    x_train, y_train = train_set
    x_test = test_set

    y_oof = pd.DataFrame(0, index=y_train.index, columns=COORDINATES)
    y_pred = pd.DataFrame(0, index=x_test.index.unique(), columns=COORDINATES)
    scores = []
    folds = model_selection.KFold(n_splits=FOLDS, shuffle=True)
    
    for n, (index_train, index_valid) in enumerate(folds.split(y_train), 1):
        print(f"\nFold - {n} / {FOLDS}")
        
        ids_train = y_train.index[index_train]
        ids_valid = y_train.index[index_valid]
        
        data_train = (x_train.loc[ids_train], y_train.loc[ids_train])
        data_val = (x_train.loc[ids_valid], y_train.loc[ids_valid])
        
        rez, cycle, model = train_model(data_train, data_val, units)
        
        cycle.plot()
        pd.DataFrame(rez.history)[["loss", "val_loss"]].plot(figsize=(16, 8))
        scores.append(min(rez.history["val_loss"]))
        
        feat = yield_batch_test(data_val[0])
        df = model.predict_generator(feat, steps=len(data_val[0].index.unique()))
        df = scaler.inverse_transform(df)
        y_oof.loc[ids_valid] = df
        
        feat = yield_batch_test(x_test)
        df = model.predict_generator(feat, steps=len(x_test.index.unique()))
        df = scaler.inverse_transform(df)
        y_pred += df / FOLDS

    print(f"IOU на кроссвалидации: " + str(-np.round(sorted(scores), 5)))
    print(f"IOU среднее: {-np.mean(scores):0.5f} +/- {2 * np.std(scores) / len(scores) ** 2:0.5f}")

    subdir = time.strftime('%Y-%m-%d_%H-%M')
    path = pathlib.Path(f"../processed/{subdir}")
    path.mkdir(exist_ok=True)
    y_oof.to_csv(path / f"oof-{-np.mean(scores):0.5f}.csv", header=False)
    y_pred.to_csv(path / f"sub-{-np.mean(scores):0.5f}.csv", header=False)

In [None]:
train_oof((votes, answers), votes_test)


Fold - 1 / 10
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, None, 4)           0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, None, 64)          832       
_________________________________________________________________
conv1d_2 (Conv1D)            (None, None, 64)          12352     
_________________________________________________________________
conv1d_3 (Conv1D)            (None, None, 64)          12352     
_________________________________________________________________
conv1d_4 (Conv1D)            (None, None, 64)          12352     
_________________________________________________________________
conv1d_5 (Conv1D)            (None, None, 64)          12352     
_________________________________________________________________
conv1d_6 (Conv1D)            (None, None, 64)          12352 




Epoch 00001: val_loss improved from inf to -0.30271, saving model to ../processed/model.h5
Learning rate: 3.8e-04
Speed per epoch: -0.0993
Wait to stop: 3

Epoch 2/100

Epoch 00002: val_loss improved from -0.30271 to -0.41082, saving model to ../processed/model.h5
Learning rate: 3.8e-04
Speed per epoch: -0.0555
Wait to stop: 3

Epoch 3/100

Epoch 00003: val_loss improved from -0.41082 to -0.46968, saving model to ../processed/model.h5
Learning rate: 3.8e-04
Speed per epoch: -0.0326
Wait to stop: 3

Epoch 4/100

Epoch 00004: val_loss did not improve from -0.46968
Learning rate: 3.8e-04
Speed per epoch: -0.0241
Wait to stop: 3

Epoch 5/100

Epoch 00005: val_loss did not improve from -0.46968
Learning rate: 3.8e-04
Speed per epoch: -0.0109
Wait to stop: 3

Epoch 6/100

Epoch 00006: val_loss did not improve from -0.46968
Learning rate: 3.8e-04
Speed per epoch: -0.0118
Wait to stop: 3

Epoch 7/100

Epoch 00007: val_loss improved from -0.46968 to -0.48978, saving model to ../processed/model


Epoch 00035: val_loss improved from -0.56277 to -0.56351, saving model to ../processed/model.h5
Learning rate: 6.0e-06
Speed per epoch: -0.0034
Wait to stop: 3

Epoch 36/100

Epoch 00036: val_loss did not improve from -0.56351
Learning rate: 4.6e-06
Speed per epoch: -0.0044
Wait to stop: 3

Epoch 37/100

Epoch 00037: val_loss improved from -0.56351 to -0.56395, saving model to ../processed/model.h5
Learning rate: 4.2e-06
Speed per epoch: -0.0036
Wait to stop: 3

Epoch 38/100

Epoch 00038: val_loss did not improve from -0.56395
Learning rate: 4.2e-06
Speed per epoch: -0.0011
Wait to stop: 3

Epoch 39/100

Epoch 00039: val_loss did not improve from -0.56395
Learning rate: 3.7e-06
Speed per epoch: 0.0006
Wait to stop: 3

Epoch 40/100

Epoch 00040: val_loss improved from -0.56395 to -0.56550, saving model to ../processed/model.h5
Learning rate: 3.0e-06
Speed per epoch: -0.0050
Wait to stop: 3

Epoch 41/100

Epoch 00041: val_loss did not improve from -0.56550
Learning rate: 1.6e-06
Speed p




Epoch 00001: val_loss improved from inf to -0.32005, saving model to ../processed/model.h5
Learning rate: 3.8e-04
Speed per epoch: -0.2416
Wait to stop: 3

Epoch 2/100

Epoch 00002: val_loss improved from -0.32005 to -0.42390, saving model to ../processed/model.h5
Learning rate: 3.8e-04
Speed per epoch: -0.0753
Wait to stop: 3

Epoch 3/100

Epoch 00003: val_loss improved from -0.42390 to -0.46492, saving model to ../processed/model.h5
Learning rate: 3.8e-04
Speed per epoch: -0.0316
Wait to stop: 3

Epoch 4/100

Epoch 00004: val_loss did not improve from -0.46492
Learning rate: 3.8e-04
Speed per epoch: -0.0203
Wait to stop: 3

Epoch 5/100

Epoch 00005: val_loss did not improve from -0.46492
Learning rate: 3.8e-04
Speed per epoch: -0.0246
Wait to stop: 3

Epoch 6/100

Epoch 00006: val_loss did not improve from -0.46492
Learning rate: 3.8e-04
Speed per epoch: -0.0073
Wait to stop: 3

Epoch 7/100

Epoch 00007: val_loss did not improve from -0.46492
Learning rate: 3.8e-04
Speed per epoch: 


Epoch 00035: val_loss did not improve from -0.55516
Learning rate: 1.1e-05
Speed per epoch: -0.0059
Wait to stop: 3

Epoch 36/100

Epoch 00036: val_loss improved from -0.55516 to -0.55522, saving model to ../processed/model.h5
Learning rate: 1.1e-05
Speed per epoch: -0.0029
Wait to stop: 3

Epoch 37/100

Epoch 00037: val_loss did not improve from -0.55522
Learning rate: 7.0e-06
Speed per epoch: 0.0011
Wait to stop: 2

Epoch 38/100

Epoch 00038: val_loss did not improve from -0.55522
Learning rate: 5.8e-06
Speed per epoch: -0.0028
Wait to stop: 3

Epoch 39/100

Epoch 00039: val_loss did not improve from -0.55522
Learning rate: 3.1e-06
Speed per epoch: -0.0011
Wait to stop: 3

Epoch 40/100

Epoch 00040: val_loss did not improve from -0.55522
Learning rate: 3.1e-06
Speed per epoch: -0.0033
Wait to stop: 3

Epoch 41/100

Epoch 00041: val_loss did not improve from -0.55522
Learning rate: 1.9e-06
Speed per epoch: 0.0022
Wait to stop: 3

Epoch 42/100

Epoch 00042: val_loss improved from -0.5