In [1]:
import pandas as pd
import numpy as np
import matchzoo as mz
import tensorflow as tf
import sys
path = '/Users/GovindShukla/tensorflow-env/DRMM_Data_Preprocessed/'
fold = int(5)

In [2]:
print("loading embedding ...")
glove_embedding = mz.datasets.embeddings.load_glove_embedding(dimension=300)
print("embedding loaded as `glove_embedding`")

loading embedding ...
embedding loaded as `glove_embedding`


In [3]:
print("Loading fold:  ",fold)
preprocessor = mz.load_preprocessor(path + "robust_preprocessor_fold_"+str(fold))

Loading fold:   5


In [4]:
print("preprocessor context:   ", preprocessor.context)

preprocessor context:    {'filter_unit': <matchzoo.preprocessors.units.frequency_filter.FrequencyFilter object at 0x106456d10>, 'vocab_unit': <matchzoo.preprocessors.units.vocabulary.Vocabulary object at 0x106456710>, 'vocab_size': 535342, 'embedding_input_dim': 535342, 'input_shapes': [(10,), (100,)]}


In [5]:
ranking_task = mz.tasks.Ranking(loss=mz.losses.RankHingeLoss(num_neg=1))
ranking_task.metrics = [
    mz.metrics.NormalizedDiscountedCumulativeGain(k=20),
    mz.metrics.MeanAveragePrecision(),
    mz.metrics.Precision(k=20)
]

print("ranking task ok")

ranking task ok


In [6]:
bin_size = 30
model = mz.models.DRMM()
model.params.update(preprocessor.context)
model.params['input_shapes'] = [[10,], [10, bin_size,]]
model.params['task'] = ranking_task
model.params['mask_value'] = -1
model.params['embedding_output_dim'] = glove_embedding.output_dim
model.params['mlp_num_layers'] = 2
model.params['mlp_num_units'] = 5
model.params['mlp_num_fan_out'] = 1
model.params['mlp_activation_func'] = 'tanh'
model.params['optimizer'] = 'adadelta'
model.build()
model.compile()
model.backend.summary()

print("model params set")

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 text_left (InputLayer)         [(None, 10)]         0           []                               
                                                                                                  
 embedding (Embedding)          (None, 10, 300)      160602600   ['text_left[0][0]']              
                                                                                                  
 match_histogram (InputLayer)   [(None, 10, 30)]     0           []                               
                                                                                                  
 dense (Dense)                  (None, 10, 1)        300         ['embedding[0][0]']              
                                                                                              

In [7]:
train_pack_processed = mz.load_data_pack(path + "robust_train_fold_"+str(fold))

dev_pack_processed = mz.load_data_pack(path + "robust_dev_fold_"+str(fold))

print("datapacks OK")

datapacks OK


In [8]:
embedding_matrix = glove_embedding.build_matrix(preprocessor.context['vocab_unit'].state['term_index'])
#normalize the word embedding for fast histogram generating.
l2_norm = np.sqrt((embedding_matrix*embedding_matrix).sum(axis=1))
embedding_matrix = embedding_matrix / l2_norm[:, np.newaxis]

In [9]:
model.load_embedding_matrix(embedding_matrix)

print("embedding matrix loaded")

2022-11-08 22:51:39.610043: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:354] MLIR V1 optimization pass is not enabled
2022-11-08 22:51:39.613821: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


embedding matrix loaded


In [10]:
hist_callback = mz.data_generator.callbacks.Histogram(embedding_matrix, bin_size=30, hist_mode='LCH')

In [11]:
pred_generator = mz.DataGenerator(dev_pack_processed, mode='point', callbacks=[hist_callback])

print("pred generator")

pred generator


In [17]:
pred_x, pred_y = pred_generator[:]
evaluate = mz.callbacks.EvaluateAllMetrics(model,
                                           x=pred_x,
                                           y=pred_y,
                                           once_every=1,
                                           batch_size=len(pred_y),
                                           model_save_path='./pretrained_models/drmm_pretrained_model_fold'+str(fold)+'/'
                                          )

train_generator = mz.DataGenerator(train_pack_processed, mode='pair', num_dup=2, num_neg=1, batch_size=20,
                                   callbacks=[hist_callback])
print('num batches:', len(train_generator))

num batches: 532


In [21]:
history = model.fit_generator(train_generator, epochs=50, callbacks=[evaluate], workers=8)

Epoch 1/50


  return self._backend.fit_generator(




TypeError: '<' not supported between instances of 'NormalizedDiscountedCumulativeGain' and 'str'

In [1]:
# Install required libraries
!pip install rank-bm25

Collecting rank-bm25
  Using cached rank_bm25-0.2.2-py3-none-any.whl (8.6 kB)
Installing collected packages: rank-bm25
Successfully installed rank-bm25-0.2.2


In [9]:
!pip install tf-models-official

Collecting tf-models-official
  Using cached tf_models_official-2.10.1-py2.py3-none-any.whl (2.2 MB)
Collecting oauth2client
  Using cached oauth2client-4.1.3-py2.py3-none-any.whl (98 kB)
Collecting sentencepiece
  Using cached sentencepiece-0.1.97-cp310-cp310-macosx_11_0_arm64.whl (1.1 MB)
Collecting kaggle>=1.3.9
  Using cached kaggle-1.5.12-py3-none-any.whl
Collecting opencv-python-headless
  Using cached opencv_python_headless-4.6.0.66-cp37-abi3-macosx_11_0_arm64.whl (30.0 MB)
Collecting tensorflow-addons
  Using cached tensorflow_addons-0.18.0-cp310-cp310-macosx_11_0_arm64.whl (10.9 MB)
Collecting gin-config
  Using cached gin_config-0.5.0-py3-none-any.whl (61 kB)
Collecting py-cpuinfo>=3.3.0
  Using cached py_cpuinfo-9.0.0-py3-none-any.whl (22 kB)
Collecting seqeval
  Using cached seqeval-1.2.2-py3-none-any.whl
Collecting tf-slim>=1.1.0
  Using cached tf_slim-1.1.0-py2.py3-none-any.whl (352 kB)
Collecting pyyaml<6.0,>=5.1
  Using cached PyYAML-5.4.1.tar.gz (1

In [3]:
!pip install qtconsole

Collecting qtconsole
  Downloading qtconsole-5.4.0-py3-none-any.whl (121 kB)
[K     |████████████████████████████████| 121 kB 4.4 MB/s eta 0:00:01
Collecting qtpy>=2.0.1
  Downloading QtPy-2.3.0-py3-none-any.whl (83 kB)
[K     |████████████████████████████████| 83 kB 5.7 MB/s eta 0:00:011
Installing collected packages: qtpy, qtconsole
Successfully installed qtconsole-5.4.0 qtpy-2.3.0
