In [3]:
import time
import csv
import pickle
import math
import os
import operator
import pandas
from __future__ import print_function

from collections import Counter, OrderedDict, defaultdict, namedtuple
import sys
import time

import numpy as np
import theano
from theano import config
import theano.tensor as T
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams


In [4]:
sample = pandas.read_pickle("./data/processedData.pkl")
userList = sample["USERID"].unique()
productList = sample["PRODUCTID"].unique()

In [5]:
userBase = sample.groupby(['USERID', 'SESSION'])['PRODUCTID'].apply(list).groupby('USERID').apply(list)

In [6]:
# Here we first define a class that can map a product to an ID (p2i)
# and back (i2p).

class OrderedCounter(Counter, OrderedDict):
    """Counter that remembers the order elements are first seen"""
    def __repr__(self):
        return '%s(%r)' % (self.__class__.__name__,
                      OrderedDict(self))
    def __reduce__(self):
        return self.__class__, (OrderedDict(self),)


class Vocabulary:
    """A vocabulary, assigns IDs to tokens"""
    def __init__(self):
        self.freqs = OrderedCounter()
        self.users = []
        self.u2i = {}
        self.i2u = []
        self.p2i = {}
        self.i2p = []
        self.p2e = {}
        self.u2e = {}

    def count_product(self, t):
        self.freqs[t] += 1
    
    def count_user(self, t):
        self.users.append(t)

    def add_product(self, t):
        self.p2i[t] = str(len(self.p2i))
        self.i2p.append(t) 
        
    def add_user(self, t):
        self.u2i[t] = str(len(self.u2i))
        self.i2u.append(t)

    def build(self, min_freq=0):
#         self.add_product("<unk>")  # reserve 0 for <unk> (unknown products (products only occuring in test set))
#         self.add_user("<unk>")
        tok_freq = list(self.freqs.items())
        tok_freq.sort(key=lambda x: x[1], reverse=True)
        for tok, freq in tok_freq:
            if freq >= min_freq:
                self.add_product(tok)
        for user in self.users:
            self.add_user(user)
            
            
# This process should be deterministic and should have the same result 
# if run multiple times on the same data set.

def build_voc(userList, productList):
    v = Vocabulary()
    for product in productList:
        v.count_product(product)
    for user in userList:
        v.count_user(user)
    v.build()
    return v

v = build_voc(userList, productList)
print("Vocabulary size:", len(v.p2i))



Vocabulary size: 67172


In [17]:
# More efficient create examples function
# A simple way to define a class is using namedtuple.
Example = namedtuple("Example", ["inputs", "target"])


def f(userid, sessions, train):
    #print(sessions)
    sessions = [[int(v.p2i.get(t,0)) for t in ses] for ses in sessions if len(ses) > 1]
#     if userid == 11905:
#         print(sessions)
    if train:
        object_train = Example(inputs = sessions[-2], target = sessions[-2][1:])
        return object_train
    else:
        return Example(
                       inputs = sessions[-1], 
                       target = sessions[-1][1:])

def createExamples(userBase):
    ''' Create training and testing set '''
    userBase = pandas.DataFrame(userBase)
    userBase.reset_index(level = 0, inplace = True)
    trainData = [x for x in 
                 userBase.apply(lambda x: f(x['USERID'], x['PRODUCTID'], True), axis = 1).tolist() 
                 if x is not None]
    testData = [x for x in 
                userBase.apply(lambda x: f(x['USERID'], x['PRODUCTID'], False), axis = 1).tolist() 
                if x is not None]
    return trainData, testData

trainData, testData = createExamples(userBase)


narmTrain = ([example.inputs for example in trainData],[example.target for example in trainData]) 
narmTest = ([example.inputs for example in testData],[example.target for example in testData])

print(narmTrain[0][0])
print('')
print(narmTest[1][1])
print(narmTest[0][1])

[14, 15, 14]

[11561, 21435, 15043, 15084, 10699]
[2661, 11561, 21435, 15043, 15084, 10699]


In [18]:
print(narmTrain[1][25])
print(narmTrain[0][25])

[708]
[13588, 708]


In [21]:

def process_seqs(iseqs):
    out_seqs = []
    out_dates = []
    labs = []
    for seq in iseqs:
        for i in range(1, len(seq)):
            tar = seq[-i]
            labs += [tar]
            out_seqs += [seq[:-i]]
            out_dates += [0]

    return out_seqs, out_dates, labs


tr_seqs, tr_dates, tr_labs = process_seqs(narmTrain[0])
te_seqs, te_dates, te_labs = process_seqs(narmTest[0])

trainData = (tr_seqs, tr_labs)
testData = (te_seqs, te_labs)


In [22]:
#Atrain, Btrain = trainData

print(te_seqs[0:10])
print(te_labs[0:10])

[[13, 16], [13], [2661, 11561, 21435, 15043, 15084], [2661, 11561, 21435, 15043], [2661, 11561, 21435], [2661, 11561], [2661], [34909, 16183, 15406, 20815, 21377, 956, 3218, 17640, 34910, 826, 34911, 34912, 28019, 6030, 1895, 3139, 25705, 7144, 21379, 15023], [34909, 16183, 15406, 20815, 21377, 956, 3218, 17640, 34910, 826, 34911, 34912, 28019, 6030, 1895, 3139, 25705, 7144, 21379], [34909, 16183, 15406, 20815, 21377, 956, 3218, 17640, 34910, 826, 34911, 34912, 28019, 6030, 1895, 3139, 25705, 7144]]
[17, 16, 10699, 15084, 15043, 21435, 11561, 2772, 15023, 21379]


In [23]:

np.random.seed(42)


def prepare_data(seqs, labels):
    """Create the matrices from the datasets.

    This pad each sequence to the same lenght: the lenght of the
    longuest sequence or maxlen.

    if maxlen is set, we will cut all sequence to this maximum
    lenght.

    This swap the axis!
    """
    # x: a list of sentences

    lengths = [len(s) for s in seqs]
    n_samples = len(seqs)
    maxlen = np.max(lengths)

    x = np.zeros((maxlen, n_samples)).astype('int64')
    x_mask = np.ones((maxlen, n_samples)).astype(theano.config.floatX)
    for idx, s in enumerate(seqs):
        x[:lengths[idx], idx] = s

    x_mask *= (1 - (x == 0))

    return x, x_mask, labels


def load_data(valid_portion=0.1, maxlen=19, sort_by_len=False):
    '''Loads the dataset

    :type path: String
    :param path: The path to the dataset (here RSC2015)
    :type n_items: int
    :param n_items: The number of items.
    :type valid_portion: float
    :param valid_portion: The proportion of the full train set used for
        the validation set.
    :type maxlen: None or positive int
    :param maxlen: the max sequence length we use in the train/valid set.
    :type sort_by_len: bool
    :name sort_by_len: Sort by the sequence lenght for the train,
        valid and test set. This allow faster execution as it cause
        less padding per minibatch. Another mechanism must be used to
        shuffle the train set at each epoch.

    '''

    #############
    # LOAD DATA #
    #############

    # Load the dataset
    #path_train_data = './data/processedData.pkl'
    #path_test_data = './data/processedData.pkl'

    #f1 = open(path_train_data, 'rb')
    train_set = trainData
    #f1.close()

    #f2 = open(path_test_data, 'rb')
    test_set = testData
    #f2.close()

    if maxlen:
        new_train_set_x = []
        new_train_set_y = []
        for x, y in zip(train_set[0], train_set[1]):
            if len(x) < maxlen:
                new_train_set_x.append(x)
                new_train_set_y.append(y)
            else:
                new_train_set_x.append(x[:maxlen])
                new_train_set_y.append(y)
        train_set = (new_train_set_x, new_train_set_y)
        del new_train_set_x, new_train_set_y

        new_test_set_x = []
        new_test_set_y = []
        for xx, yy in zip(test_set[0], test_set[1]):
            if len(xx) < maxlen:
                new_test_set_x.append(xx)
                new_test_set_y.append(yy)
            else:
                new_test_set_x.append(xx[:maxlen])
                new_test_set_y.append(yy)
        test_set = (new_test_set_x, new_test_set_y)
        del new_test_set_x, new_test_set_y

    # split training set into validation set
    train_set_x, train_set_y = train_set
    n_samples = len(train_set_x)
    sidx = np.arange(n_samples, dtype='int32')
    np.random.shuffle(sidx)
    n_train = int(np.round(n_samples * (1. - valid_portion)))
    valid_set_x = [train_set_x[s] for s in sidx[n_train:]]
    valid_set_y = [train_set_y[s] for s in sidx[n_train:]]
    train_set_x = [train_set_x[s] for s in sidx[:n_train]]
    train_set_y = [train_set_y[s] for s in sidx[:n_train]]

    train_set = (train_set_x, train_set_y)
    valid_set = (valid_set_x, valid_set_y)

    test_set_x, test_set_y = test_set
    valid_set_x, valid_set_y = valid_set
    train_set_x, train_set_y = train_set

#     def len_argsort(seq):
#         return sorted(range(len(seq)), key=lambda x: len(seq[x]))

#     if sort_by_len:
#         sorted_index = len_argsort(test_set_x)
#         test_set_x = [test_set_x[i] for i in sorted_index]
#         test_set_y = [test_set_y[i] for i in sorted_index]

#         sorted_index = len_argsort(valid_set_x)
#         valid_set_x = [valid_set_x[i] for i in sorted_index]
#         valid_set_y = [valid_set_y[i] for i in sorted_index]

    train = (train_set_x, train_set_y)
    valid = (valid_set_x, valid_set_y)
    test = (test_set_x, test_set_y)

    return train, valid, test

In [24]:
'''
Build NARM model
'''


datasets = {'rsc2015': (load_data, prepare_data)}

#datasets = {'rsc2015': (narmTrain, narmTest)}

# Set the random number generators' seeds for consistency
SEED = 42
np.random.seed(SEED)


def numpy_floatX(data):
    return np.asarray(data, dtype=config.floatX)


def get_minibatches_idx(n, minibatch_size, shuffle=False):
    """
    Used to shuffle the dataset at each iteration.
    """

    idx_list = np.arange(n, dtype="int32")

    if shuffle:
        np.random.shuffle(idx_list)

    minibatches = []
    minibatch_start = 0
    for i in range(n // minibatch_size):
        minibatches.append(idx_list[minibatch_start:
                                    minibatch_start + minibatch_size])
        minibatch_start += minibatch_size

    if minibatch_start != n:
        # Make a minibatch out of what is left
        minibatches.append(idx_list[minibatch_start:])

    return zip(range(len(minibatches)), minibatches)


def get_dataset(name):
    return datasets[name][0], datasets[name][1]


def zipp(params, tparams):
    """
    When we reload the model. Needed for the GPU stuff.
    """
    for kk, vv in params.items():
        tparams[kk].set_value(vv)


def unzip(zipped):
    """
    When we pickle the model. Needed for the GPU stuff.
    """
    new_params = OrderedDict()
    for kk, vv in zipped.items():
        new_params[kk] = vv.get_value()
    return new_params


def dropout_layer(state_before, use_noise, trng, drop_p=0.5):
    retain = 1. - drop_p
    proj = T.switch(use_noise, (state_before * trng.binomial(state_before.shape,
                                                             p=retain, n=1,
                                                             dtype=state_before.dtype)), state_before * retain)
    return proj


def _p(pp, name):
    return '%s_%s' % (pp, name)


def init_params(options):
    """
    Global (not GRU) parameter. For the embeding and the classifier.
    """
    params = OrderedDict()
    # embedding
    params['Wemb'] = init_weights((options['n_items'], options['dim_proj']))
    params = get_layer(options['encoder'])[0](options,
                                              params,
                                              prefix=options['encoder'])
    # attention
    params['W_encoder'] = init_weights((options['hidden_units'], options['hidden_units']))
    params['W_decoder'] = init_weights((options['hidden_units'], options['hidden_units']))
    params['bl_vector'] = init_weights((1, options['hidden_units']))
    # classifier
    # params['U'] = init_weights((2*options['hidden_units'], options['n_items']))
    # params['b'] = np.zeros((options['n_items'],)).astype(config.floatX)
    params['bili'] = init_weights((options['dim_proj'], 2 * options['hidden_units']))

    return params


def load_params(path, params):
    pp = np.load(path)
    for kk, vv in params.items():
        if kk not in pp:
            raise Warning('%s is not in the archive' % kk)
        params[kk] = pp[kk]

    return params


def init_tparams(params):
    tparams = OrderedDict()
    for kk, pp in params.items():
        tparams[kk] = theano.shared(params[kk], name=kk)
    return tparams


def get_layer(name):
    fns = layers[name]
    return fns


def init_weights(shape):
    sigma = np.sqrt(2. / shape[0])
    return numpy_floatX(np.random.randn(*shape) * sigma)


def ortho_weight(ndim):
    W = np.random.randn(ndim, ndim)
    u, s, v = np.linalg.svd(W)
    return u.astype(config.floatX)


def param_init_gru(options, params, prefix='gru'):
    """
    Init the GRU parameter:

    :see: init_params
    """
    Wxrz = np.concatenate([init_weights((options['dim_proj'], options['hidden_units'])),
                           init_weights((options['dim_proj'], options['hidden_units'])),
                           init_weights((options['dim_proj'], options['hidden_units']))], axis=1)
    params[_p(prefix, 'Wxrz')] = Wxrz

    Urz = np.concatenate([ortho_weight(options['hidden_units']),
                          ortho_weight(options['hidden_units'])], axis=1)
    params[_p(prefix, 'Urz')] = Urz

    Uh = ortho_weight(options['hidden_units'])
    params[_p(prefix, 'Uh')] = Uh

    b = np.zeros((3 * options['hidden_units'],))
    params[_p(prefix, 'b')] = b.astype(config.floatX)
    return params


def gru_layer(tparams, state_below, options, prefix='gru', mask=None):
    nsteps = state_below.shape[0]
    if state_below.ndim == 3:
        n_samples = state_below.shape[1]
    else:
        n_samples = 1

    assert mask is not None

    def _slice(_x, n, dim):
        if _x.ndim == 3:
            return _x[:, :, n * dim:(n + 1) * dim]
        return _x[:, n * dim:(n + 1) * dim]

    def _step(m_, x_, h_):
        preact = T.dot(h_, tparams[_p(prefix, 'Urz')])
        preact += x_[:, 0:2 * options['hidden_units']]

        z = T.nnet.hard_sigmoid(_slice(preact, 0, options['hidden_units']))
        r = T.nnet.hard_sigmoid(_slice(preact, 1, options['hidden_units']))
        h = T.tanh(T.dot((h_ * r), tparams[_p(prefix, 'Uh')]) + _slice(x_, 2, options['hidden_units']))

        h = (1.0 - z) * h_ + z * h
        h = m_[:, None] * h + (1. - m_)[:, None] * h_

        return h

    state_below = (T.dot(state_below, tparams[_p(prefix, 'Wxrz')]) +
                   tparams[_p(prefix, 'b')])

    hidden_units = options['hidden_units']
    rval, updates = theano.scan(_step,
                                sequences=[mask, state_below],
                                outputs_info=T.alloc(numpy_floatX(0.), n_samples, hidden_units),
                                name=_p(prefix, '_layers'),
                                n_steps=nsteps)
    return rval

layers = {'gru': (param_init_gru, gru_layer)}


def adam(loss, all_params, learning_rate=0.001, b1=0.9, b2=0.999, e=1e-8, gamma=1-1e-8):
    """
    ADAM update rules
    Default values are taken from [Kingma2014]

    References:
    [Kingma2014] Kingma, Diederik, and Jimmy Ba.
    "Adam: A Method for Stochastic Optimization."
    arXiv preprint arXiv:1412.6980 (2014).
    http://arxiv.org/pdf/1412.6980v4.pdf
    """

    updates = OrderedDict()
    all_grads = theano.grad(loss, all_params)
    alpha = learning_rate
    t = theano.shared(np.float32(1))
    b1_t = b1*gamma**(t-1)   #(Decay the first moment running average coefficient)

    for theta_previous, g in zip(all_params, all_grads):
        m_previous = theano.shared(np.zeros(theta_previous.get_value().shape, dtype=config.floatX))
        v_previous = theano.shared(np.zeros(theta_previous.get_value().shape, dtype=config.floatX))

        m = b1_t*m_previous + (1 - b1_t)*g  # (Update biased first moment estimate)
        v = b2*v_previous + (1 - b2)*g**2   # (Update biased second raw moment estimate)
        m_hat = m / (1-b1**t)               # (Compute bias-corrected first moment estimate)
        v_hat = v / (1-b2**t)               # (Compute bias-corrected second raw moment estimate)
        theta = theta_previous - (alpha * m_hat) / (T.sqrt(v_hat) + e) #(Update parameters)

        updates[m_previous] = m
        updates[v_previous] = v
        updates[theta_previous] = theta
    updates[t] = t + 1.

    return updates


def build_model(tparams, options):
    trng = RandomStreams(SEED)

    # Used for dropout.
    use_noise = theano.shared(numpy_floatX(0.))

    x = T.matrix('x', dtype='int64')
    mask = T.matrix('mask', dtype=config.floatX)
    y = T.vector('y', dtype='int64')

    n_timesteps = x.shape[0]
    n_samples = x.shape[1]

    emb = tparams['Wemb'][x.flatten()].reshape([n_timesteps,
                                                n_samples,
                                                options['dim_proj']])
    if options['use_dropout']:
        emb = dropout_layer(emb, use_noise, trng, drop_p=0.25)

    proj = get_layer(options['encoder'])[1](tparams, emb, options,
                                            prefix=options['encoder'],
                                            mask=mask)

    def compute_alpha(state1, state2):
        tmp = T.nnet.hard_sigmoid(T.dot(tparams['W_encoder'], state1.T) + T.dot(tparams['W_decoder'], state2.T))
        alpha = T.dot(tparams['bl_vector'], tmp)
        res = T.sum(alpha, axis=0)
        return res

    last_h = proj[-1]

    sim_matrix, _ = theano.scan(
        fn=compute_alpha,
        sequences=proj,
        non_sequences=proj[-1]
    )
    att = T.nnet.softmax(sim_matrix.T * mask.T) * mask.T
    p = att.sum(axis=1)[:, None]
    weight = att / p
    atttention_proj = (proj * weight.T[:, :, None]).sum(axis=0)

    proj = T.concatenate([atttention_proj, last_h], axis=1)

    if options['use_dropout']:
        proj = dropout_layer(proj, use_noise, trng, drop_p=0.5)

    ytem = T.dot(tparams['Wemb'], tparams['bili'])
    pred = T.nnet.softmax(T.dot(proj, ytem.T))
    # pred = T.nnet.softmax(T.dot(proj, tparams['U']) + tparams['b'])

    f_pred_prob = theano.function([x, mask], pred, name='f_pred_prob')
    # f_weight = theano.function([x, mask], weight, name='f_weight')

    off = 1e-8
    if pred.dtype == 'float16':
        off = 1e-6

    cost = -T.log(pred[T.arange(n_samples), y] + off).mean()

    return use_noise, x, mask, y, f_pred_prob, cost


def pred_evaluation(f_pred_prob, prepare_data, data, iterator):
    """
    Compute recall@20 and mrr@20
    f_pred_prob: Theano fct computing the prediction
    prepare_data: usual prepare_data for that dataset.
    """
    recall = 0.0
    mrr = 0.0
    evalutation_point_count = 0
    ranks = "never entered for-loop"
    # pred_res = []
    # att = []

    for _, valid_index in iterator:
        x, mask, y = prepare_data([data[0][t] for t in valid_index], [data[1][t] for t in valid_index])
        preds = f_pred_prob(x, mask)
        # weights = f_weight(x, mask)
        targets = y

        ranks = (preds.T > np.diag(preds.T[targets])).sum(axis=0) + 1
#         if math.isnan(ranks):
#             print(preds)
#             print(targets)
        rank_ok = (ranks <= 20)
        # pred_res += list(rank_ok)
        recall += rank_ok.sum()
        mrr += (1.0 / ranks[rank_ok]).sum()
        evalutation_point_count += len(ranks)
        # att.append(weights)

    recall = numpy_floatX(recall) / evalutation_point_count
    mrr = numpy_floatX(mrr) / evalutation_point_count
    eval_score = (recall, mrr)

    # ff = open('/storage/lijing/mydataset/res_attention_correct.pkl', 'wb')
    # pickle.dump(pred_res, ff)
    # ff.close()
    # ff2 = open('/storage/lijing/mydataset/attention_weights.pkl', 'wb')
    # pickle.dump(att, ff2)
    # ff2.close()

    return eval_score


def train_gru(
    dim_proj=50,  # word embeding dimension
    hidden_units=100,  # GRU number of hidden units.
    patience=100,  # Number of epoch to wait before early stop if no progress
    max_epochs=30,  # The maximum number of epoch to run
    dispFreq=100,  # Display to stdout the training progress every N updates
    lrate=0.001,  # Learning rate
    n_items=37484,  # Vocabulary size
    encoder='gru',  # TODO: can be removed must be gru.
    saveto='narm_model.npz',  # The best model will be saved there
    is_valid=True,  # Compute the validation error after this number of update.
    is_save=False,  # Save the parameters after every saveFreq updates
    batch_size=512,  # The batch size during training.
    valid_batch_size=512,  # The batch size used for validation/test set.
    dataset='rsc2015',

    # Parameter for extra option
    use_dropout=True,  # if False slightly faster, but worst test error
                       # This frequently need a bigger model.
    reload_model=None,  # Path to a saved model we want to start from.
    test_size=-1,  # If >0, we keep only this number of test example.
):

    # Model options
    model_options = locals().copy()
    print("model options", model_options)

    load_data, prepare_data = get_dataset(dataset)

    print('Loading data')
    train, valid, test = load_data()

    print('Building model')
    # This create the initial parameters as numpy ndarrays.
    # Dict name (string) -> numpy ndarray
    params = init_params(model_options)

    if reload_model:
        load_params('gru_model.npz', params)

    # This create Theano Shared Variable from the parameters.
    # Dict name (string) -> Theano Tensor Shared Variable
    # params and tparams have different copy of the weights.
    tparams = init_tparams(params)

    # use_noise is for dropout
    (use_noise, x, mask,
     y, f_pred_prob, cost) = build_model(tparams, model_options)

    all_params = list(tparams.values())

    updates = adam(cost, all_params, lrate)

    train_function = theano.function(inputs=[x, mask, y], outputs=cost, updates=updates)

    print('Optimization')

    kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size)
    kf_test = get_minibatches_idx(len(test[0]), valid_batch_size)

    print("%d train examples" % len(train[0]))
    print("%d valid examples" % len(valid[0]))
    print("%d test examples" % len(test[0]))

    history_errs = []
    history_vali = []
    best_p = None
    bad_count = 0

    uidx = 0  # the number of update done
    estop = False  # early stop

    try:
        for eidx in range(max_epochs):
            start_time = time.time()
            n_samples = 0
            epoch_loss = []

            # Get new shuffled index for the training set.
            kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True)

            for _, train_index in kf:
                uidx += 1
                use_noise.set_value(1.)

                # Select the random examples for this minibatch
                y = [train[1][t] for t in train_index]
                x = [train[0][t]for t in train_index]
                
                
                # Get the data in numpy.ndarray format
                # This swap the axis!
                # Return something of shape (minibatch maxlen, n samples)
                x, mask, y = prepare_data(x, y)
                n_samples += x.shape[1]

                loss = train_function(x, mask, y)
                epoch_loss.append(loss)

                if np.isnan(loss) or np.isinf(loss):
                    print('bad loss detected: ', loss)
                    return 1., 1., 1.

                if np.mod(uidx, dispFreq) == 0:
                    print('Epoch ', eidx, 'Update ', uidx, 'Loss ', np.mean(epoch_loss))

            if saveto and is_save:
                print('Saving...')

                if best_p is not None:
                    params = best_p
                else:
                    params = unzip(tparams)
                np.savez(saveto, history_errs=history_errs, **params)
                print('Saving done')

            if is_valid:
                use_noise.set_value(0.)
                kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size)
                kf_test = get_minibatches_idx(len(test[0]), valid_batch_size)

                valid_evaluation = pred_evaluation(f_pred_prob, prepare_data, valid, kf_valid)
                test_evaluation = pred_evaluation(f_pred_prob, prepare_data, test, kf_test)
                history_errs.append([valid_evaluation, test_evaluation])

                if best_p is None or valid_evaluation[0] >= np.array(history_vali).max():

                    best_p = unzip(tparams)
                    print('Best perfomance updated!')
                    bad_count = 0

                print('Valid Recall@20:', valid_evaluation[0], '   Valid Mrr@20:', valid_evaluation[1],
                      '\nTest Recall@20', test_evaluation[0], '   Test Mrr@20:', test_evaluation[1])

                if len(history_vali) > 10 and valid_evaluation[0] <= np.array(history_vali).max():
                    bad_count += 1
                    print('===========================>Bad counter: ' + str(bad_count))
                    print('current validation recall: ' + str(valid_evaluation[0]) +
                          '      history max recall:' + str(np.array(history_vali).max()))
                    if bad_count > patience:
                        print('Early Stop!')
                        estop = True

                history_vali.append(valid_evaluation[0])

            end_time = time.time()
            print('Seen %d samples' % n_samples)
            print(('This epoch took %.1fs' % (end_time - start_time)), file=sys.stderr)

            if estop:
                break

    except KeyboardInterrupt:
        print("Training interupted")

    if best_p is not None:
        zipp(best_p, tparams)
    else:
        best_p = unzip(tparams)

    use_noise.set_value(0.)
    kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size)
    kf_test = get_minibatches_idx(len(test[0]), valid_batch_size)
    valid_evaluation = pred_evaluation(f_pred_prob, prepare_data, valid, kf_valid)
    test_evaluation = pred_evaluation(f_pred_prob,  prepare_data, test, kf_test)

    print('=================Best performance=================')
    print('Valid Recall@20:', valid_evaluation[0], '   Valid Mrr@20:', valid_evaluation[1],
          '\nTest Recall@20', test_evaluation[0], '   Test Mrr@20:', test_evaluation[1])
    print('==================================================')
    if saveto and is_save:
        np.savez('Best_performance', valid_evaluation=valid_evaluation, test_evaluation=test_evaluation, history_errs=history_errs,
                 **best_p)

    return valid_evaluation, test_evaluation


# if __name__ == '__main__':
#     # See function train for all possible parameter and there definition.
#     eval_valid, eval_test = train_gru(max_epochs=10,test_size=-1)
    
    
    
#    dim_proj=50,  # word embeding dimension
#     hidden_units=100,  # GRU number of hidden units.
#     patience=100,  # Number of epoch to wait before early stop if no progress
#     max_epochs=30,  # The maximum number of epoch to run
#     dispFreq=100,  # Display to stdout the training progress every N updates
#     lrate=0.001,  # Learning rate
#     n_items=37484,  # Vocabulary size
#     encoder='gru',  # TODO: can be removed must be gru.
#     saveto='gru_model.npz',  # The best model will be saved there
#     is_valid=True,  # Compute the validation error after this number of update.
#     is_save=False,  # Save the parameters after every saveFreq updates
#     batch_size=512,  # The batch size during training.
#     valid_batch_size=512,  # The batch size used for validation/test set.
#     dataset='rsc2015',


In [25]:
# if __name__ == '__main__':
#     # See function train for all possible parameter and there definition.
#     eval_valid, eval_test = train_gru(max_epochs=10,test_size=-1, reload_model="./narm_model.npz")

In [16]:
dim_projL = [10,20,30,40,50,60]
lrateL = [0.001, 0.002, 0.003, 0.004]
n_itemsL = 67172

def gridsearch():
    best_option = ()
    best_eval_test = 0
    for dim_proj in dim_projL:
        for lrate in lrateL:
            eval_valid, eval_test = train_gru(dim_proj=dim_proj, hidden_units=dim_proj*2, max_epochs=1, lrate=lrate, n_items=n_itemsL)
            if eval_test[0] > best_eval_test:
                best_option = (dim_proj, lrate)
                best_eval_test = eval_test[0]
    
    best_dim_proj, best_lrate = best_option
    eval_valid, eval_test = train_gru(dim_proj=best_dim_proj,
                                      hidden_units=dim_proj*2, 
                                      max_epochs=10, 
                                      lrate = best_lrate, 
                                      n_items=n_itemsL, 
                                      is_save=True)
            
            
    return best_option

In [17]:
gridsearch = False
if gridsearch:
    gridsearch()
else
    dim_proj= 50
    lrate = 0.004
    eval_valid, eval_test = train_gru(dim_proj=best_dim_proj,
                                      hidden_units=dim_proj*2, 
                                      max_epochs=10, 
                                      lrate = best_lrate, 
                                      n_items=n_itemsL, 
                                      is_save=True)

model options {'dim_proj': 10, 'hidden_units': 20, 'patience': 100, 'max_epochs': 1, 'dispFreq': 100, 'lrate': 0.001, 'n_items': 67172, 'encoder': 'gru', 'saveto': 'narm_model.npz', 'is_valid': True, 'is_save': False, 'batch_size': 512, 'valid_batch_size': 512, 'dataset': 'rsc2015', 'use_dropout': True, 'reload_model': None, 'test_size': -1}
Loading data
Building model


INFO (theano.gof.compilelock): Refreshing lock /home/sietze/.theano/compiledir_Linux-4.15--generic-x86_64-with-debian-buster-sid-x86_64-3.7.0-64/lock_dir/lock


Optimization
101436 train examples
11271 valid examples
101849 test examples


  rval = inputs[0].__getitem__(inputs[1:])


Epoch  0 Update  100 Loss  9.11084519515994
Best perfomance updated!
Valid Recall@20: 0.8087126253216219    Valid Mrr@20: 0.20863360841508957 
Test Recall@20 0.8287562960853813    Test Mrr@20: 0.21677733511516148
Seen 101436 samples


This epoch took 579.2s


Valid Recall@20: 0.8087126253216219    Valid Mrr@20: 0.20863360841508957 
Test Recall@20 0.8287562960853813    Test Mrr@20: 0.21677733511516148
model options {'dim_proj': 10, 'hidden_units': 20, 'patience': 100, 'max_epochs': 1, 'dispFreq': 100, 'lrate': 0.002, 'n_items': 67172, 'encoder': 'gru', 'saveto': 'narm_model.npz', 'is_valid': True, 'is_save': False, 'batch_size': 512, 'valid_batch_size': 512, 'dataset': 'rsc2015', 'use_dropout': True, 'reload_model': None, 'test_size': -1}
Loading data
Building model


INFO (theano.gof.compilelock): Refreshing lock /home/sietze/.theano/compiledir_Linux-4.15--generic-x86_64-with-debian-buster-sid-x86_64-3.7.0-64/lock_dir/lock


Optimization
101436 train examples
11271 valid examples
101849 test examples
Epoch  0 Update  100 Loss  7.338937913147305
Best perfomance updated!
Valid Recall@20: 0.8103096442196788    Valid Mrr@20: 0.2312097127660913 
Test Recall@20 0.8301406984850122    Test Mrr@20: 0.2483260562597549
Seen 101436 samples


This epoch took 344.3s


Valid Recall@20: 0.8103096442196788    Valid Mrr@20: 0.2312097127660913 
Test Recall@20 0.8301406984850122    Test Mrr@20: 0.2483260562597549
model options {'dim_proj': 10, 'hidden_units': 20, 'patience': 100, 'max_epochs': 1, 'dispFreq': 100, 'lrate': 0.003, 'n_items': 67172, 'encoder': 'gru', 'saveto': 'narm_model.npz', 'is_valid': True, 'is_save': False, 'batch_size': 512, 'valid_batch_size': 512, 'dataset': 'rsc2015', 'use_dropout': True, 'reload_model': None, 'test_size': -1}
Loading data
Building model


INFO (theano.gof.compilelock): Refreshing lock /home/sietze/.theano/compiledir_Linux-4.15--generic-x86_64-with-debian-buster-sid-x86_64-3.7.0-64/lock_dir/lock


Optimization
101436 train examples
11271 valid examples
101849 test examples
Epoch  0 Update  100 Loss  6.158822769421386
Best perfomance updated!
Valid Recall@20: 0.8119066631177357    Valid Mrr@20: 0.2443780627923273 
Test Recall@20 0.8323400327936455    Test Mrr@20: 0.2564530361351625
Seen 101436 samples


This epoch took 362.7s


Valid Recall@20: 0.8119066631177357    Valid Mrr@20: 0.2443780627923273 
Test Recall@20 0.8323400327936455    Test Mrr@20: 0.2564530361351625
model options {'dim_proj': 10, 'hidden_units': 20, 'patience': 100, 'max_epochs': 1, 'dispFreq': 100, 'lrate': 0.004, 'n_items': 67172, 'encoder': 'gru', 'saveto': 'narm_model.npz', 'is_valid': True, 'is_save': False, 'batch_size': 512, 'valid_batch_size': 512, 'dataset': 'rsc2015', 'use_dropout': True, 'reload_model': None, 'test_size': -1}
Loading data
Building model


INFO (theano.gof.compilelock): Refreshing lock /home/sietze/.theano/compiledir_Linux-4.15--generic-x86_64-with-debian-buster-sid-x86_64-3.7.0-64/lock_dir/lock


Optimization
101436 train examples
11271 valid examples
101849 test examples
Epoch  0 Update  100 Loss  5.741115854714622
Best perfomance updated!
Valid Recall@20: 0.8113743234850501    Valid Mrr@20: 0.2928358507698126 
Test Recall@20 0.8301406984850122    Test Mrr@20: 0.3101031345735014
Seen 101436 samples


This epoch took 343.9s


Valid Recall@20: 0.8113743234850501    Valid Mrr@20: 0.2928358507698126 
Test Recall@20 0.8301406984850122    Test Mrr@20: 0.3101031345735014
model options {'dim_proj': 20, 'hidden_units': 40, 'patience': 100, 'max_epochs': 1, 'dispFreq': 100, 'lrate': 0.001, 'n_items': 67172, 'encoder': 'gru', 'saveto': 'narm_model.npz', 'is_valid': True, 'is_save': False, 'batch_size': 512, 'valid_batch_size': 512, 'dataset': 'rsc2015', 'use_dropout': True, 'reload_model': None, 'test_size': -1}
Loading data
Building model


INFO (theano.gof.compilelock): Refreshing lock /home/sietze/.theano/compiledir_Linux-4.15--generic-x86_64-with-debian-buster-sid-x86_64-3.7.0-64/lock_dir/lock


Optimization
101436 train examples
11271 valid examples
101849 test examples
Epoch  0 Update  100 Loss  7.545156663403876
Best perfomance updated!
Valid Recall@20: 0.8192706947032207    Valid Mrr@20: 0.24492059742600322 
Test Recall@20 0.8323400327936455    Test Mrr@20: 0.25524626546428314
Seen 101436 samples


This epoch took 376.2s


Valid Recall@20: 0.8192706947032207    Valid Mrr@20: 0.24492059742600322 
Test Recall@20 0.8323400327936455    Test Mrr@20: 0.25524626546428314
model options {'dim_proj': 20, 'hidden_units': 40, 'patience': 100, 'max_epochs': 1, 'dispFreq': 100, 'lrate': 0.002, 'n_items': 67172, 'encoder': 'gru', 'saveto': 'narm_model.npz', 'is_valid': True, 'is_save': False, 'batch_size': 512, 'valid_batch_size': 512, 'dataset': 'rsc2015', 'use_dropout': True, 'reload_model': None, 'test_size': -1}
Loading data
Building model
Optimization
101436 train examples
11271 valid examples
101849 test examples
Epoch  0 Update  100 Loss  6.0516006578227
Best perfomance updated!
Valid Recall@20: 0.8129713423831071    Valid Mrr@20: 0.3636351342572484 
Test Recall@20 0.8236114247562568    Test Mrr@20: 0.3902690590192089
Seen 101436 samples


This epoch took 376.6s


Valid Recall@20: 0.8129713423831071    Valid Mrr@20: 0.3636351342572484 
Test Recall@20 0.8236114247562568    Test Mrr@20: 0.3902690590192089
model options {'dim_proj': 20, 'hidden_units': 40, 'patience': 100, 'max_epochs': 1, 'dispFreq': 100, 'lrate': 0.003, 'n_items': 67172, 'encoder': 'gru', 'saveto': 'narm_model.npz', 'is_valid': True, 'is_save': False, 'batch_size': 512, 'valid_batch_size': 512, 'dataset': 'rsc2015', 'use_dropout': True, 'reload_model': None, 'test_size': -1}
Loading data
Building model
Optimization
101436 train examples
11271 valid examples
101849 test examples
Epoch  0 Update  100 Loss  5.291391805280939
Best perfomance updated!
Valid Recall@20: 0.8528968148345311    Valid Mrr@20: 0.4243507765559893 
Test Recall@20 0.8673722864240199    Test Mrr@20: 0.4463108482769604
Seen 101436 samples


This epoch took 373.3s


Valid Recall@20: 0.8528968148345311    Valid Mrr@20: 0.4243507765559893 
Test Recall@20 0.8673722864240199    Test Mrr@20: 0.4463108482769604
model options {'dim_proj': 20, 'hidden_units': 40, 'patience': 100, 'max_epochs': 1, 'dispFreq': 100, 'lrate': 0.004, 'n_items': 67172, 'encoder': 'gru', 'saveto': 'narm_model.npz', 'is_valid': True, 'is_save': False, 'batch_size': 512, 'valid_batch_size': 512, 'dataset': 'rsc2015', 'use_dropout': True, 'reload_model': None, 'test_size': -1}
Loading data
Building model
Optimization
101436 train examples
11271 valid examples
101849 test examples
Epoch  0 Update  100 Loss  4.922423156146292
Best perfomance updated!
Valid Recall@20: 0.8820867713601278    Valid Mrr@20: 0.5489041552151814 
Test Recall@20 0.8933813783149565    Test Mrr@20: 0.5708053918408655
Seen 101436 samples


This epoch took 375.7s


Valid Recall@20: 0.8820867713601278    Valid Mrr@20: 0.5489041552151814 
Test Recall@20 0.8933813783149565    Test Mrr@20: 0.5708053918408655
model options {'dim_proj': 30, 'hidden_units': 60, 'patience': 100, 'max_epochs': 1, 'dispFreq': 100, 'lrate': 0.001, 'n_items': 67172, 'encoder': 'gru', 'saveto': 'narm_model.npz', 'is_valid': True, 'is_save': False, 'batch_size': 512, 'valid_batch_size': 512, 'dataset': 'rsc2015', 'use_dropout': True, 'reload_model': None, 'test_size': -1}
Loading data
Building model


INFO (theano.gof.compilelock): Refreshing lock /home/sietze/.theano/compiledir_Linux-4.15--generic-x86_64-with-debian-buster-sid-x86_64-3.7.0-64/lock_dir/lock


Optimization
101436 train examples
11271 valid examples
101849 test examples
Epoch  0 Update  100 Loss  6.839676670007075
Best perfomance updated!
Valid Recall@20: 0.8253038772069914    Valid Mrr@20: 0.3410031684004788 
Test Recall@20 0.8430814244617031    Test Mrr@20: 0.3617258497311919
Seen 101436 samples


This epoch took 408.1s


Valid Recall@20: 0.8253038772069914    Valid Mrr@20: 0.3410031684004788 
Test Recall@20 0.8430814244617031    Test Mrr@20: 0.3617258497311919
model options {'dim_proj': 30, 'hidden_units': 60, 'patience': 100, 'max_epochs': 1, 'dispFreq': 100, 'lrate': 0.002, 'n_items': 67172, 'encoder': 'gru', 'saveto': 'narm_model.npz', 'is_valid': True, 'is_save': False, 'batch_size': 512, 'valid_batch_size': 512, 'dataset': 'rsc2015', 'use_dropout': True, 'reload_model': None, 'test_size': -1}
Loading data
Building model
Optimization
101436 train examples
11271 valid examples
101849 test examples
Epoch  0 Update  100 Loss  5.513074505623614
Best perfomance updated!
Valid Recall@20: 0.8654955194747582    Valid Mrr@20: 0.4856914538238501 
Test Recall@20 0.8797533603668175    Test Mrr@20: 0.5094566483391051
Seen 101436 samples


This epoch took 405.3s


Valid Recall@20: 0.8654955194747582    Valid Mrr@20: 0.4856914538238501 
Test Recall@20 0.8797533603668175    Test Mrr@20: 0.5094566483391051
model options {'dim_proj': 30, 'hidden_units': 60, 'patience': 100, 'max_epochs': 1, 'dispFreq': 100, 'lrate': 0.003, 'n_items': 67172, 'encoder': 'gru', 'saveto': 'narm_model.npz', 'is_valid': True, 'is_save': False, 'batch_size': 512, 'valid_batch_size': 512, 'dataset': 'rsc2015', 'use_dropout': True, 'reload_model': None, 'test_size': -1}
Loading data
Building model
Optimization
101436 train examples
11271 valid examples
101849 test examples
Epoch  0 Update  100 Loss  4.953907090781563
Best perfomance updated!
Valid Recall@20: 0.8874988909590986    Valid Mrr@20: 0.583577044586916 
Test Recall@20 0.8996553721686026    Test Mrr@20: 0.6066501320278418
Seen 101436 samples


This epoch took 406.8s


Valid Recall@20: 0.8874988909590986    Valid Mrr@20: 0.583577044586916 
Test Recall@20 0.8996553721686026    Test Mrr@20: 0.6066501320278418
model options {'dim_proj': 30, 'hidden_units': 60, 'patience': 100, 'max_epochs': 1, 'dispFreq': 100, 'lrate': 0.004, 'n_items': 67172, 'encoder': 'gru', 'saveto': 'narm_model.npz', 'is_valid': True, 'is_save': False, 'batch_size': 512, 'valid_batch_size': 512, 'dataset': 'rsc2015', 'use_dropout': True, 'reload_model': None, 'test_size': -1}
Loading data
Building model
Optimization
101436 train examples
11271 valid examples
101849 test examples
Epoch  0 Update  100 Loss  4.622795745672263
Best perfomance updated!
Valid Recall@20: 0.8953952621772691    Valid Mrr@20: 0.5655958743500801 
Test Recall@20 0.901501242034777    Test Mrr@20: 0.5845206482562955
Seen 101436 samples


This epoch took 409.4s


Valid Recall@20: 0.8953952621772691    Valid Mrr@20: 0.5655958743500801 
Test Recall@20 0.901501242034777    Test Mrr@20: 0.5845206482562955
model options {'dim_proj': 40, 'hidden_units': 80, 'patience': 100, 'max_epochs': 1, 'dispFreq': 100, 'lrate': 0.001, 'n_items': 67172, 'encoder': 'gru', 'saveto': 'narm_model.npz', 'is_valid': True, 'is_save': False, 'batch_size': 512, 'valid_batch_size': 512, 'dataset': 'rsc2015', 'use_dropout': True, 'reload_model': None, 'test_size': -1}
Loading data
Building model


INFO (theano.gof.compilelock): Refreshing lock /home/sietze/.theano/compiledir_Linux-4.15--generic-x86_64-with-debian-buster-sid-x86_64-3.7.0-64/lock_dir/lock


Optimization
101436 train examples
11271 valid examples
101849 test examples
Epoch  0 Update  100 Loss  6.33902526142972
Best perfomance updated!
Valid Recall@20: 0.8431372549019608    Valid Mrr@20: 0.29636131314421615 
Test Recall@20 0.8546279295820283    Test Mrr@20: 0.317541068572835
Seen 101436 samples


This epoch took 442.3s


Valid Recall@20: 0.8431372549019608    Valid Mrr@20: 0.29636131314421615 
Test Recall@20 0.8546279295820283    Test Mrr@20: 0.317541068572835
model options {'dim_proj': 40, 'hidden_units': 80, 'patience': 100, 'max_epochs': 1, 'dispFreq': 100, 'lrate': 0.002, 'n_items': 67172, 'encoder': 'gru', 'saveto': 'narm_model.npz', 'is_valid': True, 'is_save': False, 'batch_size': 512, 'valid_batch_size': 512, 'dataset': 'rsc2015', 'use_dropout': True, 'reload_model': None, 'test_size': -1}
Loading data
Building model
Optimization
101436 train examples
11271 valid examples
101849 test examples
Epoch  0 Update  100 Loss  5.204226946192684
Best perfomance updated!
Valid Recall@20: 0.8649631798420726    Valid Mrr@20: 0.5177946403192198 
Test Recall@20 0.8790267945684297    Test Mrr@20: 0.5448227244807731
Seen 101436 samples


This epoch took 442.0s


Valid Recall@20: 0.8649631798420726    Valid Mrr@20: 0.5177946403192198 
Test Recall@20 0.8790267945684297    Test Mrr@20: 0.5448227244807731
model options {'dim_proj': 40, 'hidden_units': 80, 'patience': 100, 'max_epochs': 1, 'dispFreq': 100, 'lrate': 0.003, 'n_items': 67172, 'encoder': 'gru', 'saveto': 'narm_model.npz', 'is_valid': True, 'is_save': False, 'batch_size': 512, 'valid_batch_size': 512, 'dataset': 'rsc2015', 'use_dropout': True, 'reload_model': None, 'test_size': -1}
Loading data
Building model
Optimization
101436 train examples
11271 valid examples
101849 test examples
Epoch  0 Update  100 Loss  4.660415403994305
Best perfomance updated!
Valid Recall@20: 0.9035578032117825    Valid Mrr@20: 0.5922665797454736 
Test Recall@20 0.9125862796885585    Test Mrr@20: 0.6131315708835294
Seen 101436 samples


This epoch took 434.8s


Valid Recall@20: 0.9035578032117825    Valid Mrr@20: 0.5922665797454736 
Test Recall@20 0.9125862796885585    Test Mrr@20: 0.6131315708835294
model options {'dim_proj': 40, 'hidden_units': 80, 'patience': 100, 'max_epochs': 1, 'dispFreq': 100, 'lrate': 0.004, 'n_items': 67172, 'encoder': 'gru', 'saveto': 'narm_model.npz', 'is_valid': True, 'is_save': False, 'batch_size': 512, 'valid_batch_size': 512, 'dataset': 'rsc2015', 'use_dropout': True, 'reload_model': None, 'test_size': -1}
Loading data
Building model
Optimization
101436 train examples
11271 valid examples
101849 test examples
Epoch  0 Update  100 Loss  4.320554585518765
Best perfomance updated!
Valid Recall@20: 0.9073729039126963    Valid Mrr@20: 0.6668605823185181 
Test Recall@20 0.9173973234886941    Test Mrr@20: 0.6903361008934936
Seen 101436 samples


This epoch took 442.6s


Valid Recall@20: 0.9073729039126963    Valid Mrr@20: 0.6668605823185181 
Test Recall@20 0.9173973234886941    Test Mrr@20: 0.6903361008934936
model options {'dim_proj': 50, 'hidden_units': 100, 'patience': 100, 'max_epochs': 1, 'dispFreq': 100, 'lrate': 0.001, 'n_items': 67172, 'encoder': 'gru', 'saveto': 'narm_model.npz', 'is_valid': True, 'is_save': False, 'batch_size': 512, 'valid_batch_size': 512, 'dataset': 'rsc2015', 'use_dropout': True, 'reload_model': None, 'test_size': -1}
Loading data
Building model


INFO (theano.gof.compilelock): Refreshing lock /home/sietze/.theano/compiledir_Linux-4.15--generic-x86_64-with-debian-buster-sid-x86_64-3.7.0-64/lock_dir/lock


Optimization
101436 train examples
11271 valid examples
101849 test examples
Epoch  0 Update  100 Loss  6.065315064595074
Best perfomance updated!
Valid Recall@20: 0.8382574749356756    Valid Mrr@20: 0.44316845660016424 
Test Recall@20 0.8568076269771917    Test Mrr@20: 0.46688773986927035
Seen 101436 samples


This epoch took 472.8s


Valid Recall@20: 0.8382574749356756    Valid Mrr@20: 0.44316845660016424 
Test Recall@20 0.8568076269771917    Test Mrr@20: 0.46688773986927035
model options {'dim_proj': 50, 'hidden_units': 100, 'patience': 100, 'max_epochs': 1, 'dispFreq': 100, 'lrate': 0.002, 'n_items': 67172, 'encoder': 'gru', 'saveto': 'narm_model.npz', 'is_valid': True, 'is_save': False, 'batch_size': 512, 'valid_batch_size': 512, 'dataset': 'rsc2015', 'use_dropout': True, 'reload_model': None, 'test_size': -1}
Loading data
Building model
Optimization
101436 train examples
11271 valid examples
101849 test examples
Epoch  0 Update  100 Loss  5.064693825136162
Best perfomance updated!
Valid Recall@20: 0.8859905953331559    Valid Mrr@20: 0.5533061530517465 
Test Recall@20 0.8977800469322232    Test Mrr@20: 0.5769769806230828
Seen 101436 samples


This epoch took 469.8s


Valid Recall@20: 0.8859905953331559    Valid Mrr@20: 0.5533061530517465 
Test Recall@20 0.8977800469322232    Test Mrr@20: 0.5769769806230828
model options {'dim_proj': 50, 'hidden_units': 100, 'patience': 100, 'max_epochs': 1, 'dispFreq': 100, 'lrate': 0.003, 'n_items': 67172, 'encoder': 'gru', 'saveto': 'narm_model.npz', 'is_valid': True, 'is_save': False, 'batch_size': 512, 'valid_batch_size': 512, 'dataset': 'rsc2015', 'use_dropout': True, 'reload_model': None, 'test_size': -1}
Loading data
Building model
Optimization
101436 train examples
11271 valid examples
101849 test examples
Epoch  0 Update  100 Loss  4.397752366104653
Best perfomance updated!
Valid Recall@20: 0.9110105580693816    Valid Mrr@20: 0.6429706302006697 
Test Recall@20 0.9181828000274916    Test Mrr@20: 0.6666781523659057
Seen 101436 samples


This epoch took 469.1s


Valid Recall@20: 0.9110105580693816    Valid Mrr@20: 0.6429706302006697 
Test Recall@20 0.9181828000274916    Test Mrr@20: 0.6666781523659057
model options {'dim_proj': 50, 'hidden_units': 100, 'patience': 100, 'max_epochs': 1, 'dispFreq': 100, 'lrate': 0.004, 'n_items': 67172, 'encoder': 'gru', 'saveto': 'narm_model.npz', 'is_valid': True, 'is_save': False, 'batch_size': 512, 'valid_batch_size': 512, 'dataset': 'rsc2015', 'use_dropout': True, 'reload_model': None, 'test_size': -1}
Loading data
Building model
Optimization
101436 train examples
11271 valid examples
101849 test examples
Epoch  0 Update  100 Loss  4.108811932573534
Best perfomance updated!
Valid Recall@20: 0.9264484074172655    Valid Mrr@20: 0.6752767545456179 
Test Recall@20 0.9289536470657542    Test Mrr@20: 0.6942919718119104
Seen 101436 samples


This epoch took 467.3s


Valid Recall@20: 0.9264484074172655    Valid Mrr@20: 0.6752767545456179 
Test Recall@20 0.9289536470657542    Test Mrr@20: 0.6942919718119104
model options {'dim_proj': 60, 'hidden_units': 120, 'patience': 100, 'max_epochs': 1, 'dispFreq': 100, 'lrate': 0.001, 'n_items': 67172, 'encoder': 'gru', 'saveto': 'narm_model.npz', 'is_valid': True, 'is_save': False, 'batch_size': 512, 'valid_batch_size': 512, 'dataset': 'rsc2015', 'use_dropout': True, 'reload_model': None, 'test_size': -1}
Loading data
Building model


INFO (theano.gof.compilelock): Refreshing lock /home/sietze/.theano/compiledir_Linux-4.15--generic-x86_64-with-debian-buster-sid-x86_64-3.7.0-64/lock_dir/lock


Optimization
101436 train examples
11271 valid examples
101849 test examples
Epoch  0 Update  100 Loss  5.881110345955649
Best perfomance updated!
Valid Recall@20: 0.8561795759027593    Valid Mrr@20: 0.40215700837917073 
Test Recall@20 0.8668519082170665    Test Mrr@20: 0.4162397329721049
Seen 101436 samples


This epoch took 505.6s


Valid Recall@20: 0.8561795759027593    Valid Mrr@20: 0.40215700837917073 
Test Recall@20 0.8668519082170665    Test Mrr@20: 0.4162397329721049
model options {'dim_proj': 60, 'hidden_units': 120, 'patience': 100, 'max_epochs': 1, 'dispFreq': 100, 'lrate': 0.002, 'n_items': 67172, 'encoder': 'gru', 'saveto': 'narm_model.npz', 'is_valid': True, 'is_save': False, 'batch_size': 512, 'valid_batch_size': 512, 'dataset': 'rsc2015', 'use_dropout': True, 'reload_model': None, 'test_size': -1}
Loading data
Building model
Optimization
101436 train examples
11271 valid examples
101849 test examples
Epoch  0 Update  100 Loss  4.793136317595246
Best perfomance updated!
Valid Recall@20: 0.9074616271848106    Valid Mrr@20: 0.565727121400978 
Test Recall@20 0.9143437834441183    Test Mrr@20: 0.5826348507533523
Seen 101436 samples


This epoch took 510.9s


Valid Recall@20: 0.9074616271848106    Valid Mrr@20: 0.565727121400978 
Test Recall@20 0.9143437834441183    Test Mrr@20: 0.5826348507533523
model options {'dim_proj': 60, 'hidden_units': 120, 'patience': 100, 'max_epochs': 1, 'dispFreq': 100, 'lrate': 0.003, 'n_items': 67172, 'encoder': 'gru', 'saveto': 'narm_model.npz', 'is_valid': True, 'is_save': False, 'batch_size': 512, 'valid_batch_size': 512, 'dataset': 'rsc2015', 'use_dropout': True, 'reload_model': None, 'test_size': -1}
Loading data
Building model
Optimization
101436 train examples
11271 valid examples
101849 test examples
Epoch  0 Update  100 Loss  4.287125480577496
Best perfomance updated!
Valid Recall@20: 0.916600124212581    Valid Mrr@20: 0.6897883126835624 
Test Recall@20 0.9241720586358236    Test Mrr@20: 0.7112752317159501
Seen 101436 samples


This epoch took 509.9s


Valid Recall@20: 0.916600124212581    Valid Mrr@20: 0.6897883126835624 
Test Recall@20 0.9241720586358236    Test Mrr@20: 0.7112752317159501
model options {'dim_proj': 60, 'hidden_units': 120, 'patience': 100, 'max_epochs': 1, 'dispFreq': 100, 'lrate': 0.004, 'n_items': 67172, 'encoder': 'gru', 'saveto': 'narm_model.npz', 'is_valid': True, 'is_save': False, 'batch_size': 512, 'valid_batch_size': 512, 'dataset': 'rsc2015', 'use_dropout': True, 'reload_model': None, 'test_size': -1}
Loading data
Building model
Optimization
101436 train examples
11271 valid examples
101849 test examples
Epoch  0 Update  100 Loss  3.922542044396758
Best perfomance updated!
Valid Recall@20: 0.9181084198385236    Valid Mrr@20: 0.6717328041372179 
Test Recall@20 0.9267444943003859    Test Mrr@20: 0.695090738082771
Seen 101436 samples


This epoch took 506.2s


Valid Recall@20: 0.9181084198385236    Valid Mrr@20: 0.6717328041372179 
Test Recall@20 0.9267444943003859    Test Mrr@20: 0.695090738082771
model options {'dim_proj': 50, 'hidden_units': 120, 'patience': 100, 'max_epochs': 10, 'dispFreq': 100, 'lrate': 0.004, 'n_items': 67172, 'encoder': 'gru', 'saveto': 'narm_model.npz', 'is_valid': True, 'is_save': True, 'batch_size': 512, 'valid_batch_size': 512, 'dataset': 'rsc2015', 'use_dropout': True, 'reload_model': None, 'test_size': -1}
Loading data
Building model
Optimization
101436 train examples
11271 valid examples
101849 test examples
Epoch  0 Update  100 Loss  3.9340501427066736
Saving...
Saving done
Best perfomance updated!
Valid Recall@20: 0.904711205749268    Valid Mrr@20: 0.5932955307928526 
Test Recall@20 0.916111105656413    Test Mrr@20: 0.6217663952735066
Seen 101436 samples


This epoch took 504.9s


Epoch  1 Update  200 Loss  1.759842010769674
Epoch  1 Update  300 Loss  1.5257248014014553
Saving...
Saving done
Best perfomance updated!
Valid Recall@20: 0.9329252062816077    Valid Mrr@20: 0.7881395965355543 
Test Recall@20 0.9393808481182928    Test Mrr@20: 0.8083240421390502
Seen 101436 samples


This epoch took 510.1s


Epoch  2 Update  400 Loss  1.256207239735788
Epoch  2 Update  500 Loss  1.1706680303309793
Saving...
Saving done
Valid Recall@20: 0.9315943571998936    Valid Mrr@20: 0.8233949964802014 
Test Recall@20 0.9393513927480879    Test Mrr@20: 0.8391687338951187
Seen 101436 samples


This epoch took 502.0s


Epoch  3 Update  600 Loss  1.1059156172737892
Epoch  3 Update  700 Loss  1.049296732240904
Saving...
Saving done
Best perfomance updated!
Valid Recall@20: 0.9366515837104072    Valid Mrr@20: 0.8254117571645597 
Test Recall@20 0.9433867784661607    Test Mrr@20: 0.8413880222943411
Seen 101436 samples


This epoch took 505.3s


Epoch  4 Update  800 Loss  0.9738303728447849
Epoch  4 Update  900 Loss  1.009934612092863
Saving...
Saving done
Best perfomance updated!
Valid Recall@20: 0.9417975334930352    Valid Mrr@20: 0.8267518732754624 
Test Recall@20 0.9471276104821844    Test Mrr@20: 0.842106364860305
Seen 101436 samples


This epoch took 506.5s


Epoch  5 Update  1000 Loss  0.9810501059260306
Epoch  5 Update  1100 Loss  0.9913024746743462
Saving...
Saving done
Valid Recall@20: 0.9409103007718924    Valid Mrr@20: 0.8178872307297905 
Test Recall@20 0.9471668843091243    Test Mrr@20: 0.8337047285425787
Seen 101436 samples


This epoch took 505.9s


Epoch  6 Update  1200 Loss  1.0015472837151378
Epoch  6 Update  1300 Loss  0.9784964393607578
Saving...
Saving done
Valid Recall@20: 0.9385147724248071    Valid Mrr@20: 0.8208740974698803 
Test Recall@20 0.9447711808657915    Test Mrr@20: 0.8374316740664245
Seen 101436 samples


This epoch took 507.7s


Epoch  7 Update  1400 Loss  0.9746375096317096
Epoch  7 Update  1500 Loss  0.964134390258779
Saving...
Saving done
Valid Recall@20: 0.9403779611392068    Valid Mrr@20: 0.822723524159368 
Test Recall@20 0.9446533593849719    Test Mrr@20: 0.8406432242056049
Seen 101436 samples


This epoch took 504.2s


Epoch  8 Update  1600 Loss  0.9655943790128871
Epoch  8 Update  1700 Loss  0.964616491385202
Saving...
Saving done
Valid Recall@20: 0.9390471120574927    Valid Mrr@20: 0.8213549597527267 
Test Recall@20 0.9438875197596441    Test Mrr@20: 0.8378535795737015
Seen 101436 samples


This epoch took 501.2s


Epoch  9 Update  1800 Loss  0.9264114008613722
Epoch  9 Update  1900 Loss  0.9566487137688621
Saving...
Saving done
Valid Recall@20: 0.941087747316121    Valid Mrr@20: 0.8265488464272713 
Test Recall@20 0.947618533318933    Test Mrr@20: 0.8420211106458694
Seen 101436 samples


This epoch took 504.2s


Valid Recall@20: 0.9417975334930352    Valid Mrr@20: 0.8267518732754624 
Test Recall@20 0.9471276104821844    Test Mrr@20: 0.842106364860305


(50, 0.004)