In [1]:
import gc

import numpy as np
import scipy.sparse as sp

from sklearn.utils import shuffle
from tqdm import tqdm_notebook as tqdm

In [2]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD, Adam
from keras.layers.normalization import BatchNormalization
from keras.layers.advanced_activations import PReLU

Using TensorFlow backend.


In [3]:
# from http://stackoverflow.com/a/8980156/861423

def save_sparse_csr(filename, array):
    np.savez(filename, data=array.data, indices=array.indices, indptr=array.indptr, shape=array.shape)

def load_sparse_csr(filename):
    loader = np.load(filename)
    return sp.csr_matrix((loader['data'], loader['indices'], loader['indptr']), shape=loader['shape'])

In [4]:
def nnet():
    model = Sequential()

    model.add(Dense(input_dim=1152, units=5500, kernel_initializer='glorot_uniform'))
    model.add(PReLU())
    model.add(Dropout(0.5))

    model.add(BatchNormalization())

    model.add(Dense(units=4716, kernel_initializer='glorot_uniform')) 
    model.add(Activation('sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.01))
    
    return model

In [5]:
nnet_model = nnet()
nnet_model.load_weights('ololo.bin')

In [6]:
import pickle

In [7]:
with open('scaler.bin', 'rb') as f:
    scaler = pickle.load(f)

In [8]:
X_val = np.load('X_val.npy')
y_val = load_sparse_csr('y_val.npz')

In [9]:
knn_val = load_sparse_csr('knn_pred_val.npz')
knn_test = load_sparse_csr('knn_pred_test.npz')

In [11]:
n = 10000
n_val = 20000

In [12]:
import xgboost as xgb

In [13]:
xgbs = {}

for i in range(4716):
    try:
        xgbs[i] = xgb.Booster(model_file='xgb_models/model_%d.xgb' % i)
    except:
        pass
        #print('cannot load model for class %d' % i)

In [15]:
d = 4
X_train_reshaped = np.zeros((n * 4716, d), dtype='float16')
X_val_reshaped = np.zeros((n_val * 4716, d), dtype='float16')

In [16]:
idx_shuffled = shuffle(np.arange(X_val.shape[0]), random_state=1)

train_sample_idx = idx_shuffled[:n]
val_sample_idx = idx_shuffled[n:(n+n_val)]

In [17]:
X_sample_train = X_val[train_sample_idx]
X_sample_val = X_val[val_sample_idx]

In [18]:
dtrain = xgb.DMatrix(X_sample_train)
dval = xgb.DMatrix(X_sample_val)

In [19]:
X_xgb_train = np.full((n, 4716), 0, dtype='float16')
X_miss_train = np.full((n, 4716), 1, dtype='float16')

X_xgb_val = np.full((n_val, 4716), 0, dtype='float16')
X_miss_val = np.full((n_val, 4716), 1, dtype='float16')

In [20]:
for i in tqdm(range(4716)):
    if i not in xgbs:
        continue
    X_xgb_train[:, i] = xgbs[i].predict(dtrain)
    X_miss_train[:, i] = 0.0

    X_xgb_val[:, i] = xgbs[i].predict(dval)
    X_miss_val[:, i] = 0.0

  def _keys_default(self):





In [21]:
X_train_reshaped[:, 0] = X_xgb_train.flatten()
X_train_reshaped[:, 1] = X_miss_train.flatten()

In [22]:
knn_d = knn_val[train_sample_idx].toarray().reshape(-1)
X_train_reshaped[:, 2] = knn_d / 48.531

In [48]:
knn_sample_val = knn_val[val_sample_idx].toarray() / 48.531

In [28]:
nnet_d = nnet_model.predict(scaler.transform(X_sample_train))
X_train_reshaped[:, 3] = nnet_d.reshape(-1)
del nnet_d

In [23]:
y_train = np.asarray(y_val[train_sample_idx].todense()).reshape(-1)

In [24]:
gc.collect()

6

In [25]:
from sklearn.linear_model import LogisticRegression

In [30]:
lr = LogisticRegression(C=1.0, random_state=1, solver='liblinear')

In [46]:
%time lr.fit(X_train_reshaped[:,[2,3]], y_train, )

CPU times: user 1min 1s, sys: 1.3 s, total: 1min 2s
Wall time: 1min 2s


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=1, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [47]:
lr.coef_[0]

array([  7.32869425,  13.87249349])

In [32]:
lr.coef_[0]

array([ -0.29514885,  -2.0692742 ,   6.93966496,  13.42163642])

In [26]:
lr.coef_[0]

array([  8.00144297,  -2.22362558,  19.1382344 ])

In [36]:
nn_val = nnet_model.predict(scaler.transform(X_sample_val))

In [51]:
pred_val = -0.29514885 * X_xgb_val + \
           -2.0692742 * X_miss_val + \
            6.93966496 * knn_sample_val + \
            13.42163642 * nn_val

In [53]:
pred_val = 7.32869425 * knn_sample_val + \
           13.872493494 * nn_val

In [41]:
def ap_at_n(predictions, actuals, n=20):
    
    sortidx = (-predictions).argsort()
    numpos = (actuals > 0).sum()
    if numpos == 0:
        return 0

    numpos = min(numpos, n)
    delta_recall = 1.0 / numpos

    ap = 0.0
    poscount = 0.0

    for i in range(n):
        if actuals[sortidx[i]] > 0:
            poscount += 1
            ap += poscount / (i + 1) * delta_recall
    return ap

def gap(pred, actual):
    all = 0.0
    cnt = 0
    for i in range(len(pred)):
        v = ap_at_n(pred[i], actual[i], n=20)
        all = all + v
        cnt = cnt + 1
    return all / cnt

In [54]:
gap(pred_val, y_val[val_sample_idx].toarray())

0.78568446192758623

In [55]:
X_test = np.load('X_test.npy')

In [56]:
nn_test = nnet_model.predict(scaler.transform(X_test))

In [59]:
y_pred = 7.32869425 * (knn_test.toarray() / 48.531) + \
           13.872493494 * nn_test

In [61]:
def prepare_pred_row(prow):
    classes = (-prow).argsort()[:20]
    scores = prow[classes]
    return ' '.join(['%d %0.3f' % (c, s) for (c, s) in zip(classes, scores)])

In [62]:
with open('test_ids.bin', 'rb') as f:
    test_ids = pickle.load(f)

In [63]:
with open('subm_stack.csv', 'w') as f:
    f.write('VideoId,LabelConfidencePairs\n')

    for id, prow in tqdm(zip(test_ids, y_pred), total=len(test_ids)):
        lab_conf = prepare_pred_row(prow)
        f.write('%s,%s\n' % (id, lab_conf))




In [64]:
!gzip subm_stack.csv