In [1]:
import numpy as np
import pandas as pd 
import tensorflow as tf

In [2]:
label_names = pd.read_csv('data/label_names.csv', index_col='label_id').label_name.to_dict()

In [3]:
len(label_names)

4716

In [4]:
from glob import glob
from tqdm import tqdm_notebook as tqdm

In [5]:
train_recs = sorted(glob('data/video_level/train*.tfrecord'))
val_recs = sorted(glob('data/video_level/validate*.tfrecord'))
test_recs = sorted(glob('data/video_level/test*.tfrecord'))

In [6]:
def read_file(fine_name):
    vid_ids = []
    labels = []
    mean_rgb = []
    mean_audio = []

    for example in tf.python_io.tf_record_iterator(fine_name):
        tf_example = tf.train.Example.FromString(example)

        features = tf_example.features.feature

        id = features['video_id'].bytes_list.value[0].decode(encoding='UTF-8')
        label = np.array(features['labels'].int64_list.value, dtype='uint16')
        rgb = np.array(features['mean_rgb'].float_list.value, dtype='float32')
        audio = np.array(features['mean_audio'].float_list.value, dtype='float32')
                
        vid_ids.append(id)
        labels.append(label)
        mean_rgb.append(rgb)
        mean_audio.append(audio)

    return vid_ids, labels, mean_rgb, mean_audio

In [7]:
import traceback

In [8]:
def read_data(files_list):
    vid_ids = []
    labels = []
    mean_rgb = []
    mean_audio = []

    for f in files_list:
        for example in tf.python_io.tf_record_iterator(f):
            good = False
    
            try:
                tf_example = tf.train.Example.FromString(example)

                features = tf_example.features.feature

                id = features['video_id'].bytes_list.value[0].decode(encoding='UTF-8')
                label = np.array(features['labels'].int64_list.value, dtype='uint16')
                rgb = np.array(features['mean_rgb'].float_list.value, dtype='float32')
                audio = np.array(features['mean_audio'].float_list.value, dtype='float32')
                good = True
            except:
                print('exception ocurred while reading, swallowing it')
                traceback.print_exc()
                continue

            if good:
                vid_ids.append(id)
                labels.append(label)
                mean_rgb.append(rgb)
                mean_audio.append(audio)

    return vid_ids, labels, mean_rgb, mean_audio

In [9]:
train_ids, train_labs, train_rgb, train_audio = read_data(train_recs[:10])

In [10]:
X_10_concat = np.hstack([train_rgb, train_audio])

In [11]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(X_10_concat)

In [12]:
val_ids, val_labs, val_rgb, val_audio = read_data(val_recs[:100])

In [13]:
X_100_val = np.hstack([val_rgb, val_audio])
X_100_val = scaler.transform(X_100_val)

In [14]:
from keras.models import Sequential, Input, Model
from keras.layers import Concatenate, concatenate
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD, Adam
from keras.regularizers import l1, l2
from keras.callbacks import EarlyStopping
from keras.layers.normalization import BatchNormalization
from keras.layers.advanced_activations import PReLU, ELU, LeakyReLU
from keras.callbacks import EarlyStopping, Callback

Using TensorFlow backend.


In [15]:
input_dim = X_100_val.shape[1]
output_dim = len(label_names)

In [16]:
input_dim

1152

In [17]:
output_dim

4716

In [18]:
def to_dense_lab(labs):
    res = np.zeros(output_dim, dtype='uint8')
    res[labs] = 1
    return res

def to_dense_labs(list_labs):
    return np.array([to_dense_lab(l) for l in list_labs])

In [19]:
val_labs_dense = to_dense_labs(val_labs)

In [20]:
from sklearn.metrics import log_loss, f1_score

In [21]:
class WatchlistCallback(Callback):
    def __init__(self, watchlist):
        super(Callback, self).__init__()
        self.X, self.y = watchlist
        self.log = open('log2.txt', 'a')

    def on_epoch_end(self, epoch, logs={}):
        #if epoch % self.epn == 0:
        self.print_score(epoch)

    def print_score(self, epoch):
        y_pred = self.model.predict(self.X, verbose=0)

        print("epoch no %d" % (epoch), end=', ')
        ll = log_loss(self.y, y_pred)
        print('logloss=%.4f' % ll, end=', ')

        
        self.log.write("epoch no %d, " % epoch)
        self.log.write('model: %.4f, ' % ll)
        self.log.write('\n')
        self.log.flush()
        #f = f1_score(self.y, y_pred >= 0.5, average='micro')
        #print('f1=%0.4f' % f, end=', ')
        print()

In [22]:
from sklearn.utils import shuffle

In [23]:
X_100_val.shape

(34339, 1152)

In [24]:
#watchlist = WatchlistCallback(watchlist=(X_100_val_pca, val_labs_dense))
watchlist = WatchlistCallback(watchlist=(X_100_val, val_labs_dense))

In [25]:
def prepare_batches(seq, n):
    for i in range(0, len(seq), n):
        yield seq[i:i+n]

def image_gen(file_list, n=2, seed=0):
    i = seed + 0

    while True:
        files = shuffle(file_list, random_state=i)

        batches = prepare_batches(files, n)

        for batch in batches:
            _, labs, rgb, audio = read_data(batch)
            X = np.hstack([rgb, audio])
            X = scaler.transform(X)
            #X = pca.transform(X)
            y = to_dense_labs(labs)

            yield X, y

        i = i + 1

In [26]:
from keras_tqdm import TQDMNotebookCallback

In [27]:
import keras.backend as K

In [28]:
# wide = Linear(input, 4k)
# deep = sigmoid(Linear(input, 1k))
# deep = sigmoid(Linear(deep, 1k))
# deep =Linear(deep, 4k)
# out = sigmoid(deep + wide)
def create_model():
    input = Input(shape=(input_dim, ))

    wide = input
    wide = Dense(input_dim=input_dim, units=4000, kernel_initializer='glorot_uniform')(wide)
    wide = Activation('sigmoid')(wide)
    wide = Dropout(0.5)(wide)

    wide = BatchNormalization()(wide)


    deep = wide
    deep = Dense(input_dim=input_dim, units=1000, kernel_initializer='glorot_uniform')(deep)
    deep = Activation('sigmoid')(deep)
    deep = Dropout(0.2)(deep)

    deep = BatchNormalization()(deep)

    deep = Dense(input_dim=input_dim, units=1000, kernel_initializer='glorot_uniform')(deep)
    deep = Activation('sigmoid')(deep)
    deep = Dropout(0.2)(deep)

    deep = BatchNormalization()(deep)

    deep = Dense(input_dim=input_dim, units=4000, kernel_initializer='glorot_uniform')(deep)
    deep = Activation('sigmoid')(deep)
    deep = Dropout(0.5)(deep)


    model = concatenate([wide, deep])
    model = Dense(units=output_dim, kernel_initializer='glorot_uniform')(model)
    model = Activation('sigmoid')(model)

    model = Model(inputs=input, outputs=model)

    model.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.01))
    
    return model

In [29]:
model = create_model()

In [30]:
gen = image_gen(train_recs, n=8, seed=2)


In [None]:
#273
model.fit_generator(gen, steps_per_epoch=32, epochs=300, verbose=0, 
                    callbacks=[watchlist, TQDMNotebookCallback()])

epoch no 0, logloss=39.2281, 
epoch no 1, logloss=59.9097, 
epoch no 2, logloss=52.0797, 
epoch no 3, logloss=49.1388, 
epoch no 4, logloss=41.1664, 
epoch no 5, logloss=33.7037, 
epoch no 6, logloss=28.0021, 
epoch no 7, logloss=23.7866, 
epoch no 8, logloss=19.9821, 
epoch no 9, logloss=17.3367, 
epoch no 10, logloss=15.4236, 
epoch no 11, logloss=13.9567, 
epoch no 12, logloss=12.9591, 
epoch no 13, logloss=12.3136, 
epoch no 14, logloss=11.7894, 
epoch no 15, logloss=11.3657, 
epoch no 16, logloss=11.0584, 
epoch no 17, logloss=10.8080, 
epoch no 18, logloss=10.6068, 
epoch no 19, logloss=10.4226, 
epoch no 20, logloss=10.3121, 
epoch no 21, logloss=10.1784, 
epoch no 22, logloss=10.0961, 
epoch no 23, logloss=10.0253, 
epoch no 24, logloss=9.9523, 
epoch no 25, logloss=9.8819, 
epoch no 26, logloss=9.8335, 
epoch no 27, logloss=9.7775, 
epoch no 28, logloss=9.7102, 
epoch no 29, logloss=9.7133, 
epoch no 30, logloss=9.6682, 
epoch no 31, logloss=9.6232, 
epoch no 32, logloss=9.616

KeyboardInterrupt: 

In [54]:
model.save_weights('mt-01.bin')

In [60]:
model.load_weights('mt-01.bin')

In [62]:
K.set_value(model.optimizer.lr, 0.002)

In [None]:
#273
model.fit_generator(gen, steps_per_epoch=32, epochs=30, verbose=0, 
                    callbacks=[watchlist, TQDMNotebookCallback()])

epoch no 0, logloss=8.7558, 
epoch no 1, logloss=8.7478, 
epoch no 2, logloss=8.7435, 
epoch no 3, logloss=8.7415, 
epoch no 4, logloss=8.7365, 
epoch no 5, logloss=8.7345, 
epoch no 6, logloss=8.7343, 
epoch no 7, logloss=8.7325, 
epoch no 8, logloss=8.7291, 
epoch no 9, logloss=8.7279, 
epoch no 10, logloss=8.7274, 
epoch no 11, logloss=8.7272, 
epoch no 12, logloss=8.7273, 
epoch no 13, logloss=8.7240, 
epoch no 14, logloss=8.7234, 
epoch no 15, logloss=8.7246, 
epoch no 16, logloss=8.7235, 
epoch no 20, 

In [64]:
#273
model.fit_generator(gen, steps_per_epoch=32, epochs=30, verbose=0, 
                    callbacks=[watchlist, TQDMNotebookCallback()])

epoch no 0, logloss=8.7176, 
epoch no 1, logloss=8.7175, 
epoch no 2, logloss=8.7171, 
epoch no 3, logloss=8.7164, 
epoch no 4, logloss=8.7160, 
epoch no 5, logloss=8.7133, 
epoch no 6, logloss=8.7138, 
epoch no 7, logloss=8.7130, 
epoch no 8, logloss=8.7134, 
epoch no 9, logloss=8.7150, 
epoch no 10, logloss=8.7128, 
epoch no 11, logloss=8.7127, 
epoch no 12, logloss=8.7125, 
epoch no 13, logloss=8.7113, 
epoch no 14, logloss=8.7121, 
epoch no 15, logloss=8.7121, 
epoch no 16, logloss=8.7097, 
epoch no 17, logloss=8.7089, 
epoch no 18, logloss=8.7095, 
epoch no 19, logloss=8.7063, 
epoch no 20, logloss=8.7070, 
epoch no 21, logloss=8.7045, 
epoch no 22, logloss=8.7061, 
epoch no 23, logloss=8.7062, 
epoch no 24, logloss=8.7056, 
epoch no 25, logloss=8.7071, 
epoch no 26, logloss=8.7054, 
epoch no 27, logloss=8.7054, 
epoch no 28, logloss=8.7066, 
epoch no 29, logloss=8.7044, 


<keras.callbacks.History at 0x7f58057c22b0>

In [65]:
model.save_weights('mt-02.bin')

In [66]:
K.set_value(model.optimizer.lr, 0.001)

In [67]:
model.fit_generator(gen, steps_per_epoch=32, epochs=32, verbose=0, 
                    callbacks=[watchlist, TQDMNotebookCallback()])

epoch no 0, logloss=8.7012, 
epoch no 1, logloss=8.7002, 
epoch no 2, logloss=8.6999, 
epoch no 3, logloss=8.6988, 
epoch no 4, logloss=8.6987, 
epoch no 5, logloss=8.6971, 
epoch no 6, logloss=8.6968, 
epoch no 7, logloss=8.6961, 
epoch no 8, logloss=8.6971, 
epoch no 9, logloss=8.6970, 
epoch no 10, logloss=8.6982, 
epoch no 11, logloss=8.6983, 
epoch no 12, logloss=8.6969, 
epoch no 13, logloss=8.6968, 
epoch no 14, logloss=8.6951, 
epoch no 15, logloss=8.6958, 
epoch no 16, logloss=8.6951, 
epoch no 17, logloss=8.6943, 
epoch no 18, logloss=8.6952, 
epoch no 19, logloss=8.6947, 


KeyboardInterrupt: 

In [68]:
model.save_weights('mt-03.bin')

In [69]:
K.set_value(model.optimizer.lr, 0.0001)
model.fit_generator(gen, steps_per_epoch=32, epochs=32, verbose=0, 
                    callbacks=[watchlist, TQDMNotebookCallback()])

epoch no 0, logloss=8.6929, 
epoch no 1, logloss=8.6924, 
epoch no 2, logloss=8.6927, 
epoch no 3, logloss=8.6922, 
epoch no 4, logloss=8.6918, 
epoch no 5, logloss=8.6919, 
epoch no 6, logloss=8.6919, 
epoch no 7, logloss=8.6916, 
epoch no 8, logloss=8.6917, 
epoch no 9, logloss=8.6914, 
epoch no 10, logloss=8.6916, 
epoch no 11, logloss=8.6916, 


KeyboardInterrupt: 

In [70]:
model.save_weights('mt-04.bin')

In [31]:
model.load_weights('mt-04.bin')

In [36]:
def prepare_pred_row(prow):
    classes = (-prow).argsort()[:20]
    scores = prow[classes]
    return ' '.join(['%d %0.3f' % (c, s) for (c, s) in zip(classes, scores)])

In [37]:
with open('subm10_val.csv', 'w') as f:
    f.write('VideoId,LabelConfidencePairs\n')

    for fn in tqdm(test_recs):
        ids, _, rgb, audio = read_file(fn)
        X = np.hstack([rgb, audio])
        X = scaler.transform(X)

        pred = model.predict(X)

        for id, prow in zip(ids, pred):
            lab_conf = prepare_pred_row(prow)
            f.write('%s,%s\n' % (id, lab_conf))

        #break




In [38]:
!gzip subm10_val.csv

In [77]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         (None, 1152)              0         
_________________________________________________________________
dense_13 (Dense)             (None, 4000)              4612000   
_________________________________________________________________
activation_13 (Activation)   (None, 4000)              0         
_________________________________________________________________
dropout_10 (Dropout)         (None, 4000)              0         
_________________________________________________________________
batch_normalization_7 (Batch (None, 4000)              16000     
_________________________________________________________________
dense_14 (Dense)             (None, 1000)              4001000   
_________________________________________________________________
activation_14 (Activation)   (None, 1000)              0         
__________