In [1]:
import os, sys, time, re
import numpy as np
import pandas as pd
from tqdm import tqdm
import cPickle as pk
np.random.seed(1) 

In [2]:
sys.path.insert(0, '../2-DRMM/') # use what is contained in the file `../2-DRMM/DRMM.py`

In [3]:
# paths
PK_FPATH = '../data/processed_data_sidhid.pk'
MODEL_FPATH = '../models/1124_model_2embed_2conv1d_2FC.h5' # path of best trained model 
NOTES_DIR = '/local/XW/DATA/MIMIC/noteevents_by_sid_hid/'
TOKENIZER_FPATH = '../data/tokenizer.pk'
# constants
MAX_NB_WORDS = 20000 # top 20k most freq words
MAX_SEQ_LEN = 1000
N_LABELS = 50
N_SIDHID = 58328

In [7]:
# load pickled data
pk_data = pk.load(open(PK_FPATH, 'rb'))
X_train = pk_data['X_train']
print pk_data['description']

This file contains the prepared data for note2vec training, 
* sidhids:     list of the 58361 unique (sid,hid) pairs
* sidhid2icds: mapping from (sid,hid) pair --> set of icd codes
* sidhid2khot: mapping from (sid,hid) pair --> khot-encoding correponding to this sidhid pair
* sidhid2seq:  mapping from (sid,hid) pair --> fix-length sequences (len=1000) of word ids
* word2idx:    mapping from a word to its id used in the sequence
* embedding_w2v／embedding_glove: matrices for the embedding layer (used as the weights parameter)
* train_sidhids/val_sidhids: list of (sid,hid) pairs used as training/validation set
* X_train/Y_train/X_val/Y_val: ndarray generated for training/validation

And here are 2 useful functions' source code: 

def to_khot(sidhid2icds, K=N_LABELS): # generate khot encoding (useful if want to change the K)
    icds = zip( *icd_ctr.most_common(N_LABELS-1) )[0] + ('other',)
    sidhid2khot = {} # map subject_id to k-hot vector
    for sid,hid in sidhid2icds.keys():
       

## Load note2vec model

In [8]:
print X_train[:10].shape
X_train[:10]

(10, 1000)


array([[    0,     0,     0, ...,     4,   415,  3867],
       [ 9846,    48,  3477, ...,  1159,   269,  7250],
       [ 4008,    11,  5635, ...,   121,    69,  6624],
       ..., 
       [10310,   421,  1747, ...,   680,    57,   475],
       [  785,   486,  1517, ...,  3268,   470,   682],
       [ 4917,    20,  2463, ...,  1062,    97,   116]], dtype=int32)

In [4]:
# ***NOTE***
# To load models from file, we have to modify metrics.py at: 
# `/local/XW/SOFT/anaconda2/envs/thesis_nb/lib/python2.7/site-packages/keras/` 
# to add the custom metric function, otherwise `load_model` throws exception ! 
# cf issue: https://github.com/fchollet/keras/issues/3911
from keras.models import load_model
model = load_model(MODEL_FPATH)

Using TensorFlow backend.


In [11]:
print model.layers[0].input
print model.layers[11].output

Tensor("main_input:0", shape=(?, 1000), dtype=int32)
Tensor("Relu_3:0", shape=(?, 500), dtype=float32)


In [12]:
# use K.function to construct a model that outputs embedding vector
from keras import backend as K
get_embedvec = K.function([model.layers[0].input, K.learning_phase()],
                                  [model.layers[11].output])
embedvec = lambda X: get_embedvec([X,0])[0]

In [13]:
# output in test mode = 0
layer_output = embedvec(X_train[:10])
print layer_output.shape

(10, 500)


## Turn a paragraph into 500-dimensional input vector

In [14]:
# sidhids = []
# texts = [] # text bodies
# for fname in tqdm(os.listdir(NOTES_DIR)): # the data is 3.7G in size, can hold in memory...
#     sid,hid = map( int, fname[:-4].split('_') )
#     sidhids.append( (sid,hid) )
#     fpath = os.path.join(NOTES_DIR, fname)
#     df = pd.read_csv(fpath)
#     texts.append( '\n=======\n\n\n'.join(df['text']) )
# print('found %d texts' % len(texts))

# tokenizer = Tokenizer(nb_words=MAX_NB_WORDS, # filter out numbers, otherwise lots of numbers
#                      filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n'+'0123456789') 
# print 'fitting on whole text corpus...',
# tokenizer.fit_on_texts(texts) # this might take some time
# print 'done. '

# pk.dump(tokenizer, open('data/tokenizer.pk', 'wb'), pk.HIGHEST_PROTOCOL)



In [104]:
def paragraph2vec(paragraph):
    seqs = tokenizer.texts_to_sequences([paragraph.encode('utf-8')])
    seqs_padded = pad_sequences(seqs, maxlen=MAX_SEQ_LEN)
    return embedvec(seqs_padded)

In [16]:
paragraph_sample = ''' The imaged portions of the abdomen show a few [**Last Name (un) 36399**]-filled loops of bowel
   within the left abdomen.  No abnormal soft tissue mass or calcifications.  No
   free interperitoneal air.  The imaged bony structures are unremarkable.'''
paragraph2vec(paragraph_sample).shape

(1, 500)

In [27]:
from lxml import etree
topic_tree = etree.parse('../data/topics2016.xml')
pat = re.compile('\W*\n\W*\n')
def get_query_paragraphs(i): # returns the paragraphs in topic i 
    text = '\n=====\n'.join( topic_tree.xpath('//topic[@number="%d"]/*/text()'%i) )
    paras = pat.split(text.lower())
    return [p.strip() for p in paras]

In [18]:
QUERIES = [get_query_paragraphs(i) for i in xrange(1,31)]
print map(len, QUERIES)

[4, 12, 3, 4, 4, 4, 7, 6, 11, 5, 4, 3, 7, 4, 3, 4, 5, 9, 7, 5, 3, 6, 8, 4, 4, 5, 8, 6, 6, 5]


In [20]:
MAX_QLEN = max(map(len, QUERIES))
print MAX_QLEN

12


In [21]:
# padding queries to the same length N
PARA_PLACEHOLDER = '</s>'
def pad_query(q, SZ=N):
    return q + [PARA_PLACEHOLDER]*(SZ-len(q))
QUERIES = map(pad_query, QUERIES)

In [67]:
QUERIES = {i+1:QUERIES[i] for i in xrange(30)}

## Helper functions to extract histvec from query/article

In [22]:
PMC_PATH = '/local/XW/DATA/TREC/PMCs/'
pmcid2fpath = {}

for subdir1 in os.listdir(PMC_PATH):
    for subdir2 in os.listdir(os.path.join(PMC_PATH, subdir1)):
        diry = os.path.join(PMC_PATH, subdir1, subdir2)
        for fn in os.listdir(diry):
            pmcid = fn[:-5]
            fpath = os.path.join(diry, fn)
            pmcid2fpath[pmcid] = fpath

In [30]:
for p in get_query_paragraphs(1):
    print '---'
    print p

---
78 m w/ pmh of cabg in early [**month (only) 3**] at [**hospital6 4406**]
   (transferred to nursing home for rehab on [**12-8**] after several falls out
   of bed.) he was then readmitted to [**hospital6 1749**] on
   [**3120-12-11**] after developing acute pulmonary edema/chf/unresponsiveness?.
   there was a question whether he had a small mi; he reportedly had a
   small nqwmi. he improved with diuresis and was not intubated
---
yesterday, he was noted to have a melanotic stool earlier this evening
   and then approximately 9 loose bm w/ some melena and some frank blood
   just prior to transfer, unclear quantity
---
78 m transferred to nursing home for rehab after cabg. reportedly readmitted with a small nqwmi. yesterday, he was noted to have a melanotic stool and then today he had approximately 9 loose bm w/ some melena and some frank blood just prior to transfer, unclear quantity
---
a 78 year old male presents with frequent stools and melena.


In [32]:
def get_article_paragraphs(pmcid):
    'returns a list of texts, each as a paragraph'
    fpath = pmcid2fpath[pmcid]
    tree = etree.parse(fpath)
    ret = []
    body = tree.xpath('//body')[0]
    for p in body.xpath('.//p'):
        ret.append( p.xpath('string(.)').strip() )
    return ret

In [1]:
# get_article_paragraphs('107838')

In [34]:
PARA_PLACEHOLDER = '</s>'
from numpy.linalg import norm

In [105]:
def get_histvec(query_para, pmcid):
    if query_para == PARA_PLACEHOLDER: 
        return np.zeros(30)
    qvec = paragraph2vec(query_para)
    dvecs = np.vstack( [ paragraph2vec(p.encode('ascii','ignore')) for p in get_article_paragraphs(pmcid)] )
    cossims = np.dot(dvecs, qvec.T) / norm(qvec) / norm(dvecs, axis=1)
    hist, _ = np.histogram( cossims, bins=30, range=(0,1) )
    ret = np.log(hist+1)
    return ret 

In [36]:
get_histvec(get_query_paragraphs(1)[1], '107838')

array([ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  3.04452244,
        4.4308168 ,  4.33073334,  3.52636052,  3.61091791,  4.21950771,
        4.26267988,  4.76217393,  4.49980967,  5.01727984,  5.27811466])

In [37]:
query_para = get_query_paragraphs(1)[1] 
pmcid = '107838'
qvec = paragraph2vec(query_para)

In [106]:
def get_query_doc_feature(qid, pmcid): # query: list of paragraphs
    query = QUERIES[qid]
    return np.array([ get_histvec(p, pmcid) for p in query])

In [70]:
get_query_doc_feature(1, '107838').shape

(12, 30)

## Prepare data: padded queries, positive and negative histograms

In [62]:
from collections import defaultdict
candidates = defaultdict(list) # dict[int, list<str>] mapping qid to list of its candidate docids (that appeared in the qrel)
n_pos = defaultdict(int) # dict[int, int] mapping qid to the number of positive documents in qrel
relevance = {} # dict[(int,str), int] mapping (qid,docid) pairs to its relevance (0,1,2)
with open('../data/qrels.txt') as f:
    for line in tqdm(f, total=37707): 
        qid, _, pmcid, rel = line.split()
        qid = int(qid); rel = int(rel)
        try: 
            if len( get_article_paragraphs(pmcid) ) <= 3: 
                continue
            relevance[(qid,pmcid)] =rel
            candidates[qid].append(pmcid)
            if rel>0: n_pos[qid] += 1
        except: pass

100%|██████████| 37707/37707 [01:36<00:00, 390.42it/s]


In [97]:
def idf(para):
    return -10 if para==PARA_PLACEHOLDER else 1.0 

In [98]:
IDFs = {}
for qid in QUERIES.keys(): 
    IDFs[qid] = np.array([idf(para) for para in QUERIES[qid]])

---

### prepare training pairs

In [63]:
print sorted(n_pos.items(), key=lambda (k,v): v)
all_pos = sorted( n_pos.values() ) 
print all_pos

[(22, 8), (27, 12), (4, 18), (10, 19), (2, 34), (30, 39), (18, 67), (15, 69), (7, 71), (21, 73), (5, 95), (23, 106), (12, 108), (26, 112), (14, 117), (29, 117), (9, 121), (1, 128), (6, 141), (13, 148), (3, 150), (17, 175), (16, 182), (28, 211), (25, 216), (19, 218), (11, 364), (20, 631), (24, 757), (8, 831)]
[8, 12, 18, 19, 34, 39, 67, 69, 71, 73, 95, 106, 108, 112, 117, 117, 121, 128, 141, 148, 150, 175, 182, 211, 216, 218, 364, 631, 757, 831]


In [64]:
avg_pos_80 = all_pos[len(all_pos) * 9 / 10 - 1] # x1.5
avg_pos_50 = all_pos[len(all_pos) * 5 / 10 - 2] # x3
avg_pos_10 = all_pos[len(all_pos) * 5 / 30] # x10
print avg_pos_10, avg_pos_50, avg_pos_80 # quantiles of posid numbers

39 112 364


In [68]:
instances = {} # mapping qid to list, instances[qid] = list (pos_docid, neg_docid) pairs for qid, 
# use pairs in instances for training
np.random.seed(1)
for qid in QUERIES.keys():
    
    pernegative = 20 # number of limited pairs per positive sample
    num_of_instances = 8000 # number limit of pairs per query
    
    num_pos_currquery = n_pos[qid]
    curr_pernegative = pernegative
    curr_num_of_instance = num_of_instances # -- their trick: gen less pairs for queries with more pos docs
    if(num_pos_currquery <= avg_pos_10): 
        curr_pernegative *= 10; curr_num_of_instance *= 10
    elif(num_pos_currquery <= avg_pos_50): 
        curr_pernegative *= 3; curr_num_of_instance *= 3; 
    elif(num_pos_currquery <= avg_pos_80): 
        curr_pernegative *= 1.5; curr_num_of_instance *= 1.5; 
    
    rel_scores = defaultdict(list) # mapping a rel score to list of docids
    for docid in candidates[qid]:
        rel = relevance[(qid,docid)]
        rel_scores[rel].append(docid)
    scores = sorted( rel_scores.keys(), reverse=True ) # scores are sorted in desc order
    print 'scores =',scores, 
    total_instance = 0
    for i in xrange(len(scores)): # scores[i] = pos score
        for j in xrange(i+1, len(scores)): # scores[j] = neg score
            total_instance += len(rel_scores[scores[i]]) * len(rel_scores[scores[j]])
    print 'total=', total_instance, 
    total_instance = min(total_instance, curr_num_of_instance)
    from numpy.random import choice 
    instances_for_q = []
    for i in xrange(len(scores)):# scores are sorted in desc order
        pos_score = scores[i]
        cur_pos_ids = rel_scores[pos_score] # mapping a rel score to list of docids
        cur_neg_ids = []
        for j in xrange(i+1, len(scores)):
            neg_score = scores[j]
            cur_neg_ids += rel_scores[neg_score]# FOUND A BUG HERE
        if len(cur_neg_ids)==0: break
        for posid in cur_pos_ids:
            for negid in choice(cur_neg_ids, min(len(cur_neg_ids),int(curr_pernegative)), replace=False):
                instances_for_q.append( (posid,negid) )
            if len(instances_for_q)>=total_instance: break
        if len(instances_for_q)>=total_instance: break
    print 'got %d instances for query %d' % (len(instances_for_q), qid)
    instances[qid] = instances_for_q

scores = [2, 1, 0] total= 163263 got 3840 instances for query 1
scores = [2, 1, 0] total= 42372 got 6800 instances for query 2
scores = [2, 1, 0] total= 192356 got 4500 instances for query 3
scores = [2, 1, 0] total= 24345 got 3600 instances for query 4
scores = [2, 1, 0] total= 126691 got 5700 instances for query 5
scores = [2, 1, 0] total= 97817 got 4230 instances for query 6
scores = [2, 1, 0] total= 72479 got 4260 instances for query 7
scores = [2, 1, 0] total= 435854 got 8000 instances for query 8
scores = [2, 1, 0] total= 126334 got 3630 instances for query 9
scores = [2, 0] total= 21318 got 3800 instances for query 10
scores = [2, 1, 0] total= 255176 got 10920 instances for query 11
scores = [2, 1, 0] total= 117927 got 6480 instances for query 12
scores = [2, 1, 0] total= 189428 got 4440 instances for query 13
scores = [2, 1, 0] total= 124466 got 3510 instances for query 14
scores = [2, 1, 0] total= 68651 got 4140 instances for query 15
scores = [2, 1, 0] total= 250340 got 5460 

In [75]:
print map(len, candidates.values())

[1380, 1272, 1404, 1368, 1414, 810, 1080, 1175, 1135, 1141, 986, 1177, 1397, 1167, 1062, 1512, 1054, 1125, 1185, 1039, 943, 1170, 1606, 1400, 1119, 1005, 962, 1623, 746, 1307]


In [108]:
qid_docid2histvec = {} # mapping from (qid, docid) to histvec
for qid in QUERIES.keys():
    for docid in tqdm(candidates[qid]):
        _hist = get_query_doc_feature(qid, docid).reshape(1,MAX_QLEN,30)
        qid_docid2histvec[(qid, docid)] = _hist

100%|██████████| 986/986 [36:11<00:00,  1.46s/it]
100%|██████████| 1177/1177 [1:13:16<00:00,  1.49s/it]
100%|██████████| 1397/1397 [2:57:28<00:00,  2.54s/it]
100%|██████████| 1167/1167 [1:24:52<00:00,  1.72s/it]
100%|██████████| 1062/1062 [1:03:48<00:00,  1.31s/it]
100%|██████████| 1512/1512 [1:51:10<00:00,  2.32s/it]
100%|██████████| 1054/1054 [1:55:07<00:00,  1.73s/it]
100%|██████████| 1125/1125 [3:08:06<00:00,  3.75s/it]
100%|██████████| 1185/1185 [2:49:27<00:00,  2.69s/it]
100%|██████████| 1039/1039 [1:55:01<00:00,  1.68s/it]
100%|██████████| 943/943 [1:01:51<00:00,  1.24it/s]
100%|██████████| 1170/1170 [2:09:17<00:00,  2.26s/it]
100%|██████████| 1606/1606 [3:27:05<00:00,  5.68s/it]
100%|██████████| 1400/1400 [1:40:39<00:00,  2.07s/it]
100%|██████████| 1119/1119 [1:17:22<00:00,  1.14s/it]
100%|██████████| 1005/1005 [1:50:01<00:00,  2.14s/it]
100%|██████████| 962/962 [3:02:28<00:00,  3.48s/it]
100%|██████████| 1623/1623 [2:19:34<00:00,  2.74s/it]
100%|██████████| 746/746 [1:51:36<00

In [127]:
len(qid_docid2histvec)

35764

In [128]:
data_to_pickle = {
    'QUERIES': QUERIES,
    'candidates': candidates,# mapping qid to list of docids that corresponds to qid in the qrel file 
    'n_pos': n_pos, # n_pos[qid] = number of positive 
    'relevance': relevance,  # mapping (qid,docid) pairs to relevance (0,1,2)
    'qid_docid2histvec': qid_docid2histvec, # mapping (qid, docid) to histvec
    'instances': instances,  # instances[qid] = list (pos_docid, neg_docid) pairs for qid
}
PK_FOUT = '../data/DRMM+embedding_processed.pk'
with open(PK_FOUT, 'wb') as f:
    pk.dump(data_to_pickle, f, pk.HIGHEST_PROTOCOL)

----

# training

In [111]:
from DRMM import gen_DRMM_model

scoring_model, ranking_model = gen_DRMM_model(MAX_QLEN)

In [113]:
VALDATION_SPLIT = 0.2
BATCH_SZ = 64
NB_EPOCH = 50
logdir = '../logs/relevance_matching_0131'
from keras.callbacks import EarlyStopping, TensorBoard
_callbacks = [ EarlyStopping(monitor='val_loss', patience=2),
               TensorBoard(log_dir=logdir, histogram_freq=0, write_graph=False) ]

In [114]:
def batch_generator(idx_pairs, batch_size=BATCH_SZ): 
    # ** parameter `idx_pairs` is list of tuple (qid, pos_docid, neg_docid)**
    np.random.shuffle(idx_pairs)
    batches_pre_epoch = len(idx_pairs) // batch_size
    samples_per_epoch = batches_pre_epoch * batch_size # make samples_per_epoch a multiple of batch size
    counter = 0
    y_true_batch_dummy = np.ones((batch_size))
    while 1:
        idx_batch = idx_pairs[batch_size*counter: min(samples_per_epoch, batch_size*(counter+1))]
        idfs_batch, pos_batch, neg_batch = [], [], []
        for qid, pos_docid, neg_docid in idx_batch:
            idfs_batch.append(IDFs[qid])
            pos_batch.append(qid_docid2histvec[(qid,pos_docid)].reshape(MAX_QLEN,30))
            neg_batch.append(qid_docid2histvec[(qid,neg_docid)].reshape(MAX_QLEN,30))
        idfs_batch, pos_batch, neg_batch = map(np.array, [idfs_batch, pos_batch, neg_batch])
#         print idfs_batch.shape, pos_batch.shape, neg_batch.shape
        counter += 1
        if (counter >= batches_pre_epoch):
            np.random.shuffle(idx_pairs)
            counter=0
        yield [idfs_batch, pos_batch, neg_batch], y_true_batch_dummy

In [115]:
def get_idx_pairs(qids):
    idx_pairs = []
    for qid in qids:
        for posid, negid in instances[qid]:
            idx_pairs.append( (qid,posid, negid) )
    return idx_pairs

In [116]:
initial_weights = ranking_model.get_weights()

def shuffle_weights(model, weights=None):
    """Randomly permute the weights in `model`, or the given `weights`.
    This is a fast approximation of re-initializing the weights of a model.
    Assumes weights are distributed independently of the dimensions of the weight tensors
      (i.e., the weights have the same distribution along each dimension).
    :param Model model: Modify the weights of the given model.
    :param list(ndarray) weights: The model's weights will be replaced by a random permutation of these weights.
      If `None`, permute the model's current weights.
    """
    if weights is None:
        weights = model.get_weights()
    weights = [np.random.permutation(w.flat).reshape(w.shape) for w in weights]
    model.set_weights(weights)

In [117]:
def TREC_output(qid, run_name = 'my_run', fpath = None):
    res = [] # list of (score, pmcid) tuples
    for docid in candidates[qid]:
        input_idf = IDFs[qid].reshape((-1,MAX_QLEN))
        input_hist = qid_docid2histvec[(qid,docid)]
        score = scoring_model.predict([input_idf, input_hist])[0]
        res.append( (score, docid) )
    res = sorted(res, reverse=True)
    fout = sys.stdout if fpath==None else open(fpath, 'a')
    for rank, (score, docid) in enumerate(res[:2000]):
        print >>fout, '%d  Q0  %s  %d  %f  %s' % (qid, docid, rank, score, run_name)

In [118]:
def KFold(fpath, K = 5, run_name = 'my_run',  batch_size=BATCH_SZ, qids = sorted( QUERIES.keys() )):
    open(fpath,'w').close() # clear previous content in file 
    np.random.seed(0)
    np.random.shuffle(qids)
    fold_sz = len(qids) / K
    for fold in xrange(K):
        print 'fold %d' % fold, 
        val_start, val_end = fold*fold_sz, (fold+1)*fold_sz
        qids_val = qids[val_start:val_end] # train/val queries for each fold 
        qids_train = qids[:val_start] + qids[val_end:]
        print qids_val
        idx_pairs_train = get_idx_pairs(qids_train)
        idx_pairs_val = get_idx_pairs(qids_val)
        
        shuffle_weights(ranking_model, initial_weights) # reset model parameters
        ranking_model.fit_generator( batch_generator(idx_pairs_train, batch_size=batch_size), # train model 
                    samples_per_epoch = len(idx_pairs_train)//batch_size*batch_size,
                    nb_epoch=10,
                    validation_data=batch_generator(idx_pairs_val, batch_size=batch_size),
                    nb_val_samples=len(idx_pairs_val)//batch_size*batch_size, 
                    callbacks = _callbacks)
        print 'fold %d complete, outputting to %s...' % (fold, fpath)
        for qid in qids_val:
            TREC_output(qid, run_name = run_name, fpath = fpath)

In [None]:
KFold('../data/trec-output/0203_DRMM-embedding_LOO_10epoch.rankedlist', K=30)

fold 0 [15]
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
fold 0 complete, outputting to ../data/trec-output/0203_DRMM-embedding_LOO_10epoch.rankedlist...
fold 1 [21]
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
fold 1 complete, outputting to ../data/trec-output/0203_DRMM-embedding_LOO_10epoch.rankedlist...
fold 2 [24]
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
fold 2 complete, outputting to ../data/trec-output/0203_DRMM-embedding_LOO_10epoch.rankedlist...
fold 3 [25]
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
fold 3 complete, outputting to ../data/trec-output/0203_DRMM-embedding_LOO_10epoch.rankedlist...
fold 4 [3]
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
fold 4 complete, outputting to ../data/trec-