In [None]:
import tensorflow as tf
import tensorflow.contrib.slim as slim
import pickle
import numpy as np
import scipy.sparse as sp
import tqdm
import random
import numpy as np
import more_itertools
import os
from prepare_dataset.config import SPARSE_DIR, TMP_DIR, TENSORBOARD_DIR

%pylab inline

In [None]:
def IP_score(y, p, ctr):
    denom = len(p)*(1 + (1-ctr) / ctr *10)
    return np.sum( y / p) / denom  * 10000

In [None]:
hX, hI, hC, hP, hCTR = pickle.load(open(TMP_DIR + '/holdout_pack.pickled', 'rb'))
vX, vI, vC, vP, vCTR = pickle.load(open(TMP_DIR + '/valid_pack.pickled', 'rb'))
batches = pickle.load(open(TMP_DIR + '/train_batches.pickled', 'rb'))

In [None]:
uniform_policy_probs = np.ones(len(vP)) / 11
print(IP_score(uniform_policy_probs, np.clip(vP, 1e-3, 1), vCTR))
# > 41.9452974882

In [None]:
class LinearModel():
    def __init__(self):
        self.step = 0
        self.graph = tf.Graph()
        self.graph.seed = 42
        self.build_model()
        self.create_session()  
        os.makedirs(TENSORBOARD_DIR, exist_ok=True)
        
    def create_session(self):
        self.session = tf.Session(config=None, graph=self.graph)
        self.session.run(self.init_all_vars)
        
    def close_session(self):
        self.session.close()
        self.graph = None
    
    def dump_summary(self, fd):
        summary = self.session.run(self.summary_op, feed_dict=fd)
        self.log_writer.add_summary(summary, self.step)

    def build_model(self):
        with self.graph.as_default():
            self.place_X = tf.sparse_placeholder(dtype=tf.float32, shape=(None, 74000), name="input_X")
            self.place_I = tf.placeholder(dtype=tf.int32, shape=(None,2), name="input_Indices")
            self.place_C = tf.placeholder(dtype=tf.float32, shape=(None,), name="input_Cost")
            self.place_P = tf.placeholder(dtype=tf.float32, shape=(None,), name="input_Propensity")
            
            self.W = tf.Variable(tf.random_normal(shape=(74000, 1)), name="weights")

            self.o_linear = tf.sparse_tensor_dense_matmul(self.place_X, self.W) 
            x = tf.reshape(self.o_linear, shape=[-1, 11], name='reshape') 
            self.probs = tf.nn.softmax(x)
            
            
            self.sliced_probs = tf.gather_nd(self.probs, self.place_I)
            self.ratio = self.sliced_probs / self.place_P
            self.r = self.place_C * self.ratio
            
            r_mean, r_var = tf.nn.moments(self.r, axes=[0])
            tf.summary.scalar('r_mean', r_mean)
            tf.summary.scalar('r_std', tf.square(r_var))

            self.loss = tf.reduce_mean(self.r) + 0.0001*tf.sqrt(tf.nn.l2_loss(self.o_linear))
            tf.summary.scalar('loss', self.loss)
            
            self.adam3 = tf.train.AdamOptimizer(learning_rate=0.003).minimize(self.loss)
            self.adam1 = tf.train.AdamOptimizer(learning_rate=0.001).minimize(self.loss)
            
            self.init_all_vars = tf.global_variables_initializer()
            self.summary_op = tf.summary.merge_all()
            self.saver = tf.train.Saver()
            self.log_writer = tf.summary.FileWriter(TENSORBOARD_DIR)

In [None]:
! rm -rf {TENSORBOARD_DIR}

In [None]:
model = LinearModel()

In [None]:
print('use `tensorboard --logdir={}` to see learning progress'.format(TENSORBOARD_DIR))

In [None]:
for ep in range(40):
    np.random.seed(ep)
    np.random.shuffle(batches)
    for i in range(3001):
        tX, tI, tC, tP = batches[i]
        click_mask = tC < 0.5
        
        # revert transformed by /10 negative probs and do propensity clipping
        tP_ = tP.copy()
        tP_[~click_mask] *= 10
        tP_ = np.clip(tP_, 0.3, 1)

        # set manual rewards (costs)
        tC_ = tC.copy()
        tC_[click_mask] = -1
        tC_[~click_mask] = -0.1

        # do optimization step
        fd = {model.place_X: tX, model.place_I: tI, model.place_C: tC_, model.place_P: tP_}
        _ = model.session.run(model.adam3, feed_dict=fd)
        model.step += 1
        
        if i%50==0:
            model.dump_summary(fd)

        if i%1000==0:
            fd = {model.place_X: vX, model.place_I: vI}
            valid_probs = model.session.run(model.sliced_probs, feed_dict=fd)
            valid_score = IP_score(valid_probs, np.clip(vP, 1e-3, 1), vCTR)
            print(ep, i, valid_score)  

In [None]:
fd = {model.place_X: vX, model.place_I: vI}
valid_probs = model.session.run(model.sliced_probs, feed_dict=fd)
valid_score = IP_score(valid_probs, np.clip(vP, 1e-7, 1), vCTR)
print(valid_score)
# > 56.0573730533

In [None]:
fd = {model.place_X: hX, model.place_I: hI}
holdout_probs = model.session.run(model.sliced_probs, feed_dict=fd)
holdout_score = IP_score(holdout_probs, np.clip(hP, 1e-7, 1), hCTR)
print(holdout_score)
# > 55.062495803

In [None]:
! mkdir ./models

In [None]:
model.saver.save(model.session, './checkpoints/base.model')

# Predict for test

In [None]:
import more_itertools

def pack_samples_test(samples):
    X_coo = sp.vstack([x['mat'] for x in samples])
    indices = np.mat([X_coo.row, X_coo.col]).transpose()
    return tf.SparseTensorValue(indices, np.ones_like(X_coo.data).astype(np.float32), X_coo.shape)

In [None]:
result = []
for i in range(8):
    ds = pickle.load(open(SPARSE_DIR + '/test_{}.pickled'.format(i), 'rb'))
    for ds_chunk in more_itertools.chunked(tqdm.tqdm_notebook(ds), 50000):
        tX = pack_samples_test(ds_chunk)
        fd = {model.place_X: tX}
        probs = model.session.run(model.o_linear, feed_dict=fd)
        borders = np.cumsum(([0] + [x['n_candidates'] for x in ds_chunk]))
        for i, sample in enumerate(ds_chunk):
            result.append((sample['id'], probs[borders[i]:borders[i+1], 0][:]))
        del tX
    del ds
pickle.dump(result, open(TMP_DIR + '/pre_submit.pickled', 'wb'))

In [None]:
fout = open(TMP_DIR + '/submit', 'wt')
for pred in tqdm.tqdm_notebook(result):
    id = pred[0]
    logits = pred[1]
    line = id + ';' + ','.join(['{}:{:.4f}'.format(i, v) for i, v in enumerate(logits)])
    fout.write(line + '\n')
fout.close()
! gzip {TMP_DIR}/submit
# > IPS : 54.3729997773

In [None]:
result = pickle.load(open(TMP_DIR + '/pre_submit.pickled', 'rb'))
fout = open(TMP_DIR + '/submit_scaled', 'wt')
for pred in tqdm.tqdm_notebook(result):
    id = pred[0]
    logits = pred[1] * 20
    line = id + ';' + ','.join(['{}:{:.4f}'.format(i, v) for i, v in enumerate(logits)])
    fout.write(line + '\n')
fout.close()
! gzip {TMP_DIR}/submit_scaled
# > IPS: 54.5564084314