In [1]:
# rank batch logic
import pickle
import argparse
import boto3
import os
import numpy as np
import itertools
import tarfile
import pandas as pd
from tqdm import tqdm
from tensorflow.python.keras.models import Model, save_model, load_model
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss, roc_auc_score
from deepctr.models import DeepFM
from deepctr.feature_column import SparseFeat, DenseFeat,get_feature_names
from deepctr.estimator.models import DeepFMEstimator




In [2]:
########################################
# 从s3同步数据
########################################
def sync_s3(file_name_list, s3_folder, local_folder):
    for f in file_name_list:
        print("file preparation: download src key {} to dst key {}".format(os.path.join(
            s3_folder, f), os.path.join(local_folder, f)))
        s3client.download_file(bucket, os.path.join(
            s3_folder, f), os.path.join(local_folder, f))
        
def write_to_s3(filename, bucket, key):
    with open(filename, 'rb') as f:  # Read in binary mode
        return s3client.upload_fileobj(f, bucket, key)

default_bucket = 'sagemaker-us-east-1-002224604296'
default_mk_region = '1'
level_1 = 'recommender-system-film-mk'

parser = argparse.ArgumentParser()
parser.add_argument('--bucket', type=str, default=default_bucket)
parser.add_argument('--mk-region', type=str, default=default_mk_region)

args, _ = parser.parse_known_args()
bucket = args.bucket
mk_region = args.mk_region

prefix = f"{level_1}/{mk_region}"

print("bucket={}".format(bucket))
print("prefix='{}'".format(prefix))

s3client = boto3.client('s3')
bucket = 'sagemaker-us-east-1-002224604296'
local_folder = 'info'
if not os.path.exists(local_folder):
    os.makedirs(local_folder)

# recall batch 结果记载
file_name_list = ['recall_batch_result.pickle']
s3_folder = '{}/feature/recommend-list/movie'.format(prefix)
sync_s3(file_name_list, s3_folder, local_folder)
# 用户画像数据加载
file_name_list = ['portrait.pickle']
s3_folder = '{}/feature/recommend-list/portrait'.format(prefix)
sync_s3(file_name_list, s3_folder, local_folder)
# 倒排列表的pickle文件
file_name_list = ['movie_id_movie_feature_dict.pickle']
s3_folder = '{}/feature/content/inverted-list/'.format(prefix)
sync_s3(file_name_list, s3_folder, local_folder)
# deepfm模型文件下载
file_name_list = ['model.tar.gz']
s3_folder = '{}/model/rank/action/deepfm/latest/'.format(prefix)
sync_s3(file_name_list, s3_folder, local_folder)

# 加载pickle文件
file_to_load = open("info/recall_batch_result.pickle", "rb")
recall_batch_result = pickle.load(file_to_load)
file_to_load = open("info/portrait.pickle", "rb")
user_portrait = pickle.load(file_to_load)
file_to_load = open("info/movie_id_movie_feature_dict.pickle", "rb")
dict_id_feature_pddf = pd.read_pickle(file_to_load)
print("length of movie_id v.s. movie_property {}".format(len(dict_id_feature_pddf)))
# 解压缩deepfm模型
tar = tarfile.open("info/model.tar.gz","r:gz")
file_names = tar.getnames()
for file_name in file_names:
    tar.extract(file_name, "info/")
tar.close
# 加载recall结果
file_to_load = open("info/recall_batch_result.pickle", "rb")
dict_recall_result = pickle.load(file_to_load)
embed_dim = 32

bucket=sagemaker-us-east-1-002224604296
prefix='recommender-system-film-mk/1'
file preparation: download src key recommender-system-film-mk/1/feature/recommend-list/movie/recall_batch_result.pickle to dst key info/recall_batch_result.pickle
file preparation: download src key recommender-system-film-mk/1/feature/recommend-list/portrait/portrait.pickle to dst key info/portrait.pickle
file preparation: download src key recommender-system-film-mk/1/feature/content/inverted-list/movie_id_movie_feature_dict.pickle to dst key info/movie_id_movie_feature_dict.pickle
file preparation: download src key recommender-system-film-mk/1/model/rank/action/deepfm/latest/model.tar.gz to dst key info/model.tar.gz
length of movie_id v.s. movie_property 33767


In [3]:
from tensorflow import keras
from deepctr.layers import custom_objects
deepfm_model = keras.models.load_model("info/DeepFM", custom_objects)

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [4]:
from keras import backend as K
import tensorflow as tf

def freeze_session(session, keep_var_names=None, output_names=None, clear_devices=True):
    """
    Freezes the state of a session into a pruned computation graph.

    Creates a new computation graph where variable nodes are replaced by
    constants taking their current value in the session. The new graph will be
    pruned so subgraphs that are not necessary to compute the requested
    outputs are removed.
    @param session The TensorFlow session to be frozen.
    @param keep_var_names A list of variable names that should not be frozen,
                          or None to freeze all the variables in the graph.
    @param output_names Names of the relevant graph outputs.
    @param clear_devices Remove the device directives from the graph for better portability.
    @return The frozen graph definition.
    """
    from tensorflow.python.framework.graph_util import convert_variables_to_constants
    graph = session.graph
    with graph.as_default():
        freeze_var_names = list(set(v.op.name for v in tf.global_variables()).difference(keep_var_names or []))
        output_names = output_names or []
        output_names += [v.op.name for v in tf.global_variables()]
        # Graph -> GraphDef ProtoBuf
        input_graph_def = graph.as_graph_def()
        if clear_devices:
            for node in input_graph_def.node:
                node.device = ""
        frozen_graph = convert_variables_to_constants(session, input_graph_def,
                                                      output_names, freeze_var_names)
        return frozen_graph

Using TensorFlow backend.


In [27]:
frozen_graph = freeze_session(K.get_session(),
                              output_names=[out.op.name for out in deepfm_model.outputs])





Instructions for updating:
Use `tf.compat.v1.graph_util.convert_variables_to_constants`
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`


FailedPreconditionError: 2 root error(s) found.
  (0) Failed precondition: Error while reading resource variable training/Adam/sparse_emb_C4/embeddings/v from Container: localhost. This could mean that the variable was uninitialized. Not found: Container localhost does not exist. (Could not find resource: localhost/training/Adam/sparse_emb_C4/embeddings/v)
	 [[node training/Adam/sparse_emb_C4/embeddings/v/Read/ReadVariableOp (defined at /home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py:1748) ]]
	 [[dnn/bias0/Read/ReadVariableOp/_5]]
  (1) Failed precondition: Error while reading resource variable training/Adam/sparse_emb_C4/embeddings/v from Container: localhost. This could mean that the variable was uninitialized. Not found: Container localhost does not exist. (Could not find resource: localhost/training/Adam/sparse_emb_C4/embeddings/v)
	 [[node training/Adam/sparse_emb_C4/embeddings/v/Read/ReadVariableOp (defined at /home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py:1748) ]]
0 successful operations.
0 derived errors ignored.

Original stack trace for 'training/Adam/sparse_emb_C4/embeddings/v/Read/ReadVariableOp':
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/traitlets/config/application.py", line 664, in launch_instance
    app.start()
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 583, in start
    self.io_loop.start()
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 149, in start
    self.asyncio_loop.run_forever()
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/asyncio/base_events.py", line 442, in run_forever
    self._run_once()
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/asyncio/base_events.py", line 1462, in _run_once
    handle._run()
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tornado/ioloop.py", line 690, in <lambda>
    lambda f: self._run_callback(functools.partial(callback, future))
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tornado/ioloop.py", line 743, in _run_callback
    ret = callback()
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tornado/gen.py", line 787, in inner
    self.run()
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tornado/gen.py", line 748, in run
    yielded = self.gen.send(value)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 361, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 268, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 541, in execute_request
    user_expressions, allow_stdin,
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 300, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2858, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2886, in _run_cell
    return runner(coro)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/IPython/core/async_helpers.py", line 68, in _pseudo_sync_runner
    coro.send(None)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3063, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3254, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3331, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-6-2da09e192205>", line 3, in <module>
    deepfm_model = keras.models.load_model("info/DeepFM", custom_objects)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/keras/saving/save.py", line 143, in load_model
    return hdf5_format.load_model_from_hdf5(filepath, custom_objects, compile)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/keras/saving/hdf5_format.py", line 187, in load_model_from_hdf5
    model._make_train_function()
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training.py", line 2133, in _make_train_function
    params=self._collected_trainable_weights, loss=self.total_loss)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/keras/optimizer_v2/optimizer_v2.py", line 513, in get_updates
    return [self.apply_gradients(grads_and_vars)]
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/keras/optimizer_v2/optimizer_v2.py", line 442, in apply_gradients
    self._create_slots(var_list)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/keras/optimizer_v2/adam.py", line 151, in _create_slots
    self.add_slot(var, 'v')
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/keras/optimizer_v2/optimizer_v2.py", line 594, in add_slot
    initial_value=initial_value)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/ops/variables.py", line 260, in __call__
    return cls._variable_v2_call(*args, **kwargs)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/ops/variables.py", line 254, in _variable_v2_call
    shape=shape)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/ops/variables.py", line 235, in <lambda>
    previous_getter = lambda **kws: default_variable_creator_v2(None, **kws)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/ops/variable_scope.py", line 2552, in default_variable_creator_v2
    shape=shape)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/ops/variables.py", line 262, in __call__
    return super(VariableMetaclass, cls).__call__(*args, **kwargs)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/ops/resource_variable_ops.py", line 1406, in __init__
    distribute_strategy=distribute_strategy)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/ops/resource_variable_ops.py", line 1587, in _init_from_args
    value = gen_resource_variable_ops.read_variable_op(handle, dtype)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/ops/gen_resource_variable_ops.py", line 587, in read_variable_op
    "ReadVariableOp", resource=resource, dtype=dtype, name=name)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/framework/op_def_library.py", line 794, in _apply_op_helper
    op_def=op_def)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/util/deprecation.py", line 507, in new_func
    return func(*args, **kwargs)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py", line 3357, in create_op
    attrs, op_def, compute_device)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py", line 3426, in _create_op_internal
    op_def=op_def)
  File "/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py", line 1748, in __init__
    self._traceback = tf_stack.extract_stack()


In [5]:
# 整理recall结果
data_input_pddf_dict = {}
data_input_pddf_dict['userId'] = []
data_input_pddf_dict['programId'] = []
for user_k,result_v in dict_recall_result.items():
    for item_v in result_v.keys():
        data_input_pddf_dict['userId'].append(str(user_k))
        data_input_pddf_dict['programId'].append(str(item_v))
data_input_pddf = pd.DataFrame.from_dict(data_input_pddf_dict)

In [6]:
data_input_pddf['programId'] = data_input_pddf['programId'].astype(int)

In [7]:
dict_id_feature_pddf['programId'] = dict_id_feature_pddf['programId'].astype(int)

In [8]:
data_input_pddf = pd.merge(left=data_input_pddf, right=dict_id_feature_pddf.drop_duplicates(), how='left', left_on='programId', right_on='programId')

In [9]:
def user_embed(x, user_portrait):    
    if x in user_portrait.keys():
#         print(user_portrait[x])
        return user_portrait[x]['ub_embeddding'][0]
    else:
        return [0]*32
    
def user_id_feat(x, i):
    return x[i]
#     return pd.Series(f_dict)
    
# def item_embed(x, raw_embed_item_mapping, ub_item_embeddings):
#     if str(x) not in raw_embed_item_mapping.keys():
#         return [0]*32
#     embed_item_idx = raw_embed_item_mapping[str(x)]
#     if  int(embed_item_idx) < len(ub_item_embeddings):
# #         print(user_portrait[x])
#         return ub_item_embeddings[int(embed_item_idx)]
#     else:
#         return [0]*32
    
# def item_id_feat(x, i):
#     return x[i]
# #     return pd.Series(f_dict)

# def sparse_item_id_feat(x, mt, dict_id_content=dict_id_content):
#     result = dict_id_content[str(x)][mt]
#     if result[0] is None:
#         return None
#     else:
#         return '|'.join(result)

In [10]:
# user id feature - user embedding
data_input_pddf['userid_feat'] = data_input_pddf['userId'].apply(lambda x: user_embed(x, user_portrait))
for i in range(32):
    data_input_pddf['user_feature_{}'.format(i)] = data_input_pddf['userid_feat'].apply(lambda x: user_id_feat(x, i))
# # item id feature - item embedding
# data_input_pddf['itemid_feat'] = data_input_pddf['programId'].apply(lambda x: item_embed(x, raw_embed_item_mapping, ub_item_embeddings))
# for i in range(32):
#     data_input_pddf['item_feature_{}'.format(i)] = data_input_pddf['itemid_feat'].apply(lambda x: item_id_feat(x, i))
# # sparse feature
# popularity_method_list = ['category', 'director',
#                           'actor', 'language', 'level', 'year']
# feature_num = 6
# for i, mt in enumerate(popularity_method_list):
#     data_input_pddf['sparse_feature_{}'.format(i)] = data_input_pddf['programId'].apply(lambda x: sparse_item_id_feat(x, mt))

In [11]:
data_input_pddf.head()

Unnamed: 0,userId,programId,C1,C2,C3,C4,C5,C6,I32,I33,...,user_feature_22,user_feature_23,user_feature_24,user_feature_25,user_feature_26,user_feature_27,user_feature_28,user_feature_29,user_feature_30,user_feature_31
0,10541,16895,429,0,1,0,0,146,0.495762,0.898616,...,-1.720443,-4.557661,-1.123465,2.442297,-6.94726,3.610927,0.544282,0.056881,2.668141,3.372865
1,10541,1689808,792,0,1,1,0,146,0.482548,0.834998,...,-1.720443,-4.557661,-1.123465,2.442297,-6.94726,3.610927,0.544282,0.056881,2.668141,3.372865
2,10541,1688432,445,1850,3631,16,0,146,0.581059,0.595511,...,-1.720443,-4.557661,-1.123465,2.442297,-6.94726,3.610927,0.544282,0.056881,2.668141,3.372865
3,10541,1679249,737,3553,4501,12,69,144,0.3659,0.369701,...,-1.720443,-4.557661,-1.123465,2.442297,-6.94726,3.610927,0.544282,0.056881,2.668141,3.372865
4,10541,1688144,792,0,1,1,0,146,0.600024,0.800118,...,-1.720443,-4.557661,-1.123465,2.442297,-6.94726,3.610927,0.544282,0.056881,2.668141,3.372865


In [12]:
mk_test_data = data_input_pddf
dense_feature_size = embed_dim
sparse_feature_size = 6
for i in range(dense_feature_size):
    if i < embed_dim:
        mk_test_data['I{}'.format(i+1)] = mk_test_data['user_feature_{}'.format(i)]
        mk_test_data.drop(['user_feature_{}'.format(i)], axis=1)

mk_test_data.drop(['userid_feat'],axis=1)
mk_sparse_features = ['C' + str(i)for i in range(1, sparse_feature_size+1)]
mk_dense_features = ['I'+str(i) for i in range(1, dense_feature_size+1)]
mk_test_data[mk_sparse_features] = mk_test_data[mk_sparse_features].fillna('-1', )
mk_test_data[mk_dense_features] = mk_test_data[mk_dense_features].fillna(0,)

In [13]:
mk_fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=mk_test_data[feat].nunique(), embedding_dim=4) for i,feat in enumerate(mk_sparse_features)] + [DenseFeat(feat, 1) for feat in mk_dense_features]

In [14]:
mk_dnn_feature_columns = mk_fixlen_feature_columns
mk_linear_feature_columns = mk_fixlen_feature_columns
mk_feature_names = get_feature_names(mk_linear_feature_columns + mk_dnn_feature_columns)

In [15]:
mk_model_input = {name:mk_test_data[name].values for name in mk_feature_names}
# deepfm_model = DeepFM(mk_linear_feature_columns,mk_dnn_feature_columns,task='binary')
# deepfm_model.load_weights('info/DeepFM_w.h5')

In [16]:
mk_pred_ans = deepfm_model.predict(mk_model_input, batch_size=256)

In [17]:
import tensorflow as tf
print(tf.__version__)
import deepctr
print(deepctr.__version__)

1.15.2
0.8.5


In [19]:
deepfm_model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
C1 (InputLayer)                 [(None, 1)]          0                                            
__________________________________________________________________________________________________
C2 (InputLayer)                 [(None, 1)]          0                                            
__________________________________________________________________________________________________
C3 (InputLayer)                 [(None, 1)]          0                                            
__________________________________________________________________________________________________
C4 (InputLayer)                 [(None, 1)]          0                                            
______________________________________________________________________________________________

In [20]:
print(mk_model_input)

{'C1': array([429, 792, 445, ..., 282, 282, 282]), 'C2': array([   0,    0, 1850, ..., 4107, 4107, 4107]), 'C3': array([   1,    1, 3631, ..., 4885, 4885, 4885]), 'C4': array([ 0,  1, 16, ...,  5,  5,  5]), 'C5': array([ 0,  0,  0, ..., 49, 49, 49]), 'C6': array([146, 146, 146, ..., 146, 146, 146]), 'I1': array([ 2.3436749 ,  2.3436749 ,  2.3436749 , ..., -1.61636293,
       -1.61636293, -1.61636293]), 'I2': array([-1.53368735, -1.53368735, -1.53368735, ...,  1.67615747,
        1.67615747,  1.67615747]), 'I3': array([-1.85156298, -1.85156298, -1.85156298, ...,  1.72999907,
        1.72999907,  1.72999907]), 'I4': array([ 0.27060929,  0.27060929,  0.27060929, ..., -0.62270951,
       -0.62270951, -0.62270951]), 'I5': array([ 3.48064399,  3.48064399,  3.48064399, ..., -2.08019042,
       -2.08019042, -2.08019042]), 'I6': array([ 1.98770738,  1.98770738,  1.98770738, ..., -2.0286653 ,
       -2.0286653 , -2.0286653 ]), 'I7': array([-1.00328565, -1.00328565, -1.00328565, ...,  1.72169757,

In [None]:
mk_test_data['rank_score'] = [v[0] for v in list(mk_pred_ans)]

In [None]:
rank_result = {}
for reviewerID, hist in tqdm(mk_test_data.groupby('userId')):
    candidate_list = hist['programId'].tolist()
    score_list = hist['rank_score'].tolist()
    id_score_dict = dict(zip(candidate_list, score_list))
    sort_id_score_dict = {k: v for k, v in sorted(id_score_dict.items(), key=lambda item: item[1], reverse=True)}
    rank_result[reviewerID] = sort_id_score_dict

In [1]:
!rm -r info/*
!python rank-batch.py


bucket=sagemaker-us-east-1-002224604296
prefix='recommender-system-film-mk/1'
file preparation: download src key recommender-system-film-mk/1/feature/recommend-list/movie/recall_batch_result.pickle to dst key info/recall_batch_result.pickle
file preparation: download src key recommender-system-film-mk/1/feature/recommend-list/portrait/portrait.pickle to dst key info/portrait.pickle
file preparation: download src key recommender-system-film-mk/1/feature/content/inverted-list/movie_id_movie_feature_dict.pickle to dst key info/movie_id_movie_feature_dict.pickle
file preparation: download src key recommender-system-film-mk/1/model/rank/action/deepfm/latest/model.tar.gz to dst key info/model.tar.gz
length of movie_id v.s. movie_property 33767
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions fo

In [2]:
import pandas as pd
print(pd.__version__)

1.1.5
