### Wide & Deep Recommendation System with Movie Lens
출처 : [Microsoft Github] (https://github.com/microsoft/recommenders)

In [67]:
import os
from tempfile import TemporaryDirectory

import tensorflow as tf
import pandas as pd
import sklearn.preprocessing
import papermill as pm

from tensorflow.python.client import device_lib
from python_splitters import python_random_split
import wide_deep_utils as wide_deep
import tf_utils
from pandas_df_utils import user_item_pairs
import python_evaluation

print("Tensorflow Version:", tf.VERSION)
devices = device_lib.list_local_devices()
print([x.name for x in devices])

num_cpus = os.cpu_count()
print("Num CPUs:", num_cpus)

Tensorflow Version:

 

1.14.0




['/device:CPU:0']




Num CPUs:

 

4




In [60]:
####################
# 파라미터 세팅
####################
#Recommend top k items
TOP_K = 10
# Select MovieLens data size: 100k, 1m, 10m, or 20m
MOVIELENS_DATA_SIZE = '100k'
# Metrics to use for evaluation. reco_utils.evaluation.python_evaluation function names
RANKING_METRICS = ['map_at_k', 'ndcg_at_k', 'precision_at_k', 'recall_at_k']
RATING_METRICS = ['rmse', 'mae', 'rsquared', 'exp_var']
# Use session hook to evaluate model while training
EVALUATE_WHILE_TRAINING = True

# Data column names
USER_COL = 'UserId'
ITEM_COL = 'MovieId'
RATING_COL = 'Rating'
ITEM_FEAT_COL = 'Genres'

# Train and test set pickle file paths. If None, download and split the dataset.
DATA_DIR = None
TRAIN_PICKLE_PATH = None
TEST_PICKLE_PATH = None
EXPORT_DIR_BASE = './outputs/model'

#### Hyperparameters
MODEL_TYPE = 'wide_deep'
EPOCHS = 50  # if 0, only 1 batch will be processed
BATCH_SIZE = 64
# Wide (linear) model hyperparameters
LINEAR_OPTIMIZER = 'Ftrl'
LINEAR_OPTIMIZER_LR =0.0029   # Learning rate
LINEAR_L1_REG = 0.0           # L1 Regularization rate for FtrlOptimizer
LINEAR_MOMENTUM = 0.9         # Momentum for MomentumOptimizer or RMSPropOptimizer
# DNN model hyperparameters
DNN_OPTIMIZER = 'Adagrad'
DNN_OPTIMIZER_LR = 0.1
DNN_L1_REG = 0.0           # L1 Regularization rate for FtrlOptimizer
DNN_MOMENTUM = 0.9         # Momentum for MomentumOptimizer or RMSPropOptimizer
# Layer dimensions are defined separately to make this work with AzureML Hyperdrive
DNN_HIDDEN_LAYER_1 = 0     # Set 0 to not use this layer
DNN_HIDDEN_LAYER_2 = 128   # Set 0 to not use this layer
DNN_HIDDEN_LAYER_3 = 256   # Set 0 to not use this layer
DNN_HIDDEN_LAYER_4 = 32    # With this setting, DNN hidden units will be = [512, 256, 128, 128]
DNN_USER_DIM = 4
DNN_ITEM_DIM = 4
DNN_DROPOUT = 0.4
DNN_BATCH_NORM = 1         # 1 to use batch normalization, 0 if not.
# Set cache directory path if want to keep the model checkpoints
MODEL_DIR = None

In [45]:
###############################
# 데이터 전처리
# 1. Rating Data & Genres Data
###############################
df_rating = pd.read_csv('./data/100K_Latest/ratings.csv', 
                        sep=",", skiprows=1, header=None, 
                        names=[USER_COL, ITEM_COL, RATING_COL, 'timestamp'], engine='python')
df_movie = pd.read_csv('./data/100K_Latest/movies.csv', 
                       sep=",", skiprows=1, header=None, 
                       names=[ITEM_COL, 'MovieName', 'Genres_string'], engine='python')

# print('df_ratings \n', df_rating.head())
# print('df_movie \n', df_movie.head())

df_data = pd.merge(df_rating, df_movie)

print('df_data \n', df_data.head())


###############################
# 데이터 전처리
# 2. Feature 인코딩
###############################
# Encode 'genres' into int array (multi-hot representation) to use as item features
genres_encoder = sklearn.preprocessing.MultiLabelBinarizer()
df_data[ITEM_FEAT_COL] = genres_encoder.fit_transform(
    df_data['Genres_string'].apply(lambda s: s.split("|"))
).tolist()
print("Genres:", genres_encoder.classes_)
print(df_data.drop_duplicates(ITEM_COL)[[ITEM_COL, 'Genres_string', ITEM_FEAT_COL]].head())

df_data 
    UserId  MovieId  Rating   timestamp         MovieName  \
0       1        1     4.0   964982703  Toy Story (1995)   
1       5        1     4.0   847434962  Toy Story (1995)   
2       7        1     4.5  1106635946  Toy Story (1995)   
3      15        1     2.5  1510577970  Toy Story (1995)   
4      17        1     4.5  1305696483  Toy Story (1995)   

                                 Genres_string  
0  Adventure|Animation|Children|Comedy|Fantasy  
1  Adventure|Animation|Children|Comedy|Fantasy  
2  Adventure|Animation|Children|Comedy|Fantasy  
3  Adventure|Animation|Children|Comedy|Fantasy  
4  Adventure|Animation|Children|Comedy|Fantasy  


Genres: ['(no genres listed)' 'Action' 'Adventure' 'Animation' 'Children' 'Comedy'
 'Crime' 'Documentary' 'Drama' 'Fantasy' 'Film-Noir' 'Horror' 'IMAX'
 'Musical' 'Mystery' 'Romance' 'Sci-Fi' 'Thriller' 'War' 'Western']
     MovieId                                Genres_string  \
0          1  Adventure|Animation|Children|Comedy|Fantasy   
215        3                               Comedy|Romance   
267        6                        Action|Crime|Thriller   
369       47                             Mystery|Thriller   
572       50                       Crime|Mystery|Thriller   

                                                Genres  
0    [0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, ...  
215  [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...  
267  [0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...  
369  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...  
572  [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, ...  


In [46]:
###############################
# Train, Test 데이터 나누기
###############################
train, test = python_random_split(
    df_data.drop('Genres_string', axis=1),  # We don't need Genres original string column
    ratio=0.75,
    seed=42
)

print("Train = {}, test = {}".format(len(train), len(test)))

Train = 75627, test = 25209


In [47]:
###############################
# item, user 수 확인
###############################
# Unique items in the dataset
if ITEM_FEAT_COL is None:
    items = df_data.drop_duplicates(ITEM_COL)[[ITEM_COL]].reset_index(drop=True)
    item_feat_shape = None
else:
    items = df_data.drop_duplicates(ITEM_COL)[[ITEM_COL, ITEM_FEAT_COL]].reset_index(drop=True)
    item_feat_shape = len(items[ITEM_FEAT_COL][0])
# Unique users in the dataset
users = df_data.drop_duplicates(USER_COL)[[USER_COL]].reset_index(drop=True)

print("Num items = {}, num users = {}".format(len(items), len(users)))

Num items = 9724, num users = 610


In [48]:
# Train at least one batch; store checkpoints at least once
train_steps = max(1, EPOCHS * len(train) // BATCH_SIZE)
save_checkpoints_steps = max(1, train_steps // 5)

# Note, if there exists model files in MODEL_DIR, the existing model in the dir will be re-trained and
# could throw an error if the model architecture is different.
if MODEL_DIR is None:
    tmp_dir = TemporaryDirectory()
    MODEL_DIR = tmp_dir.name

DNN_HIDDEN_UNITS = [DNN_HIDDEN_LAYER_1, DNN_HIDDEN_LAYER_2, DNN_HIDDEN_LAYER_3, DNN_HIDDEN_LAYER_4]
DNN_HIDDEN_UNITS = [h for h in DNN_HIDDEN_UNITS if h > 0] 
if MODEL_TYPE is 'deep' or MODEL_TYPE is 'wide_deep':
    print("DNN hidden units =", DNN_HIDDEN_UNITS)
    print("Embedding {} users to {}-dim vector".format(len(users), DNN_USER_DIM))
    print("Embedding {} items to {}-dim vector".format(len(items), DNN_ITEM_DIM))
    
# Optimizer specific parameters
linear_params = {}
if LINEAR_OPTIMIZER == 'Ftrl':
    linear_params['l1_regularization_strength'] = LINEAR_L1_REG
elif LINEAR_OPTIMIZER == 'Momentum' or LINEAR_OPTIMIZER == 'RMSProp':
    linear_params['momentum'] = LINEAR_MOMENTUM

dnn_params = {}
if DNN_OPTIMIZER == 'Ftrl':
    dnn_params['l1_regularization_strength'] = DNN_L1_REG
elif DNN_OPTIMIZER == 'Momentum' or DNN_OPTIMIZER == 'RMSProp':
    dnn_params['momentum'] = DNN_MOMENTUM

print("\n", linear_params, dnn_params)

DNN hidden units = [128, 256, 32]
Embedding 610 users to 4-dim vector
Embedding 9724 items to 4-dim vector

 {'l1_regularization_strength': 0.0} {}


In [49]:
###############################
# Model Feature 세팅
###############################

# Define wide (linear) and deep (dnn) features
wide_columns, deep_columns = wide_deep.build_feature_columns(
    users=users[USER_COL].values,
    items=items[ITEM_COL].values,
    user_col=USER_COL,
    item_col=ITEM_COL,
    item_feat_col=ITEM_FEAT_COL,
    user_dim=DNN_USER_DIM,
    item_dim=DNN_ITEM_DIM,
    item_feat_shape=item_feat_shape,
    model_type=MODEL_TYPE,
)

print("\nFeature specs:")
for c in wide_columns + deep_columns:
    print(str(c)[:100], "...")


Feature specs:
CrossedColumn(keys=(VocabularyListCategoricalColumn(key='UserId', vocabulary_list=(1, 5, 7, 15, 17,  ...
EmbeddingColumn(categorical_column=VocabularyListCategoricalColumn(key='UserId', vocabulary_list=(1, ...
EmbeddingColumn(categorical_column=VocabularyListCategoricalColumn(key='MovieId', vocabulary_list=(1 ...
NumericColumn(key='Genres', shape=(20,), default_value=None, dtype=tf.float32, normalizer_fn=None) ...


In [50]:
###############################
# 모델 빌드
###############################
# Build a model based on the parameters
model = wide_deep.build_model(
    model_dir=MODEL_DIR,
    wide_columns=wide_columns,
    deep_columns=deep_columns,
    linear_optimizer=tf_utils.build_optimizer(LINEAR_OPTIMIZER, LINEAR_OPTIMIZER_LR, **linear_params),
    dnn_optimizer=tf_utils.build_optimizer(DNN_OPTIMIZER, DNN_OPTIMIZER_LR, **dnn_params),
    dnn_hidden_units=DNN_HIDDEN_UNITS,
    dnn_dropout=DNN_DROPOUT,
    dnn_batch_norm=(DNN_BATCH_NORM==1),
    log_every_n_iter=max(1, train_steps//20),  # log 20 times
    save_checkpoints_steps=save_checkpoints_steps
)

I0720 23:47:07.306102 17056 estimator.py:209] Using config: {'_model_dir': 'C:\\Users\\alsoj\\AppData\\Local\\Temp\\tmpy4ch14vb', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 11816, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 2954, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000001A4501F6EF0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [52]:
cols = {
    'col_user': USER_COL,
    'col_item': ITEM_COL,
    'col_rating': RATING_COL,
    'col_prediction': 'prediction'
}

# Prepare ranking evaluation set, i.e. get the cross join of all user-item pairs
ranking_pool = user_item_pairs(
    user_df=users,
    item_df=items,
    user_col=USER_COL,
    item_col=ITEM_COL,
    user_item_filter_df=train,  # Remove seen items
    shuffle=True
)

In [53]:
# Define training hooks to track performance while training
hooks = []
if EVALUATE_WHILE_TRAINING:
    evaluation_logger = tf_utils.MetricsLogger()
    metrics = (m for m in (RANKING_METRICS, RATING_METRICS) if len(m) > 0)
    for ms in metrics:
        hooks.append(
            tf_utils.evaluation_log_hook(
                model,
                logger=evaluation_logger,
                true_df=test,
                y_col=RATING_COL,
                eval_df=ranking_pool if ms==RANKING_METRICS else test.drop(RATING_COL, axis=1),
                every_n_iter=save_checkpoints_steps,
                model_dir=MODEL_DIR,
                eval_fns=[getattr(python_evaluation, m) for m in ms],
                **({**cols, 'k': TOP_K} if ms==RANKING_METRICS else cols)
            )
        )

# Define training input (sample feeding) function
train_fn = tf_utils.pandas_input_fn(
    df=train,
    y_col=RATING_COL,
    batch_size=BATCH_SIZE,
    num_epochs=None,  # None == run forever. We use steps=TRAIN_STEPS instead.
    shuffle=True,
    num_threads=num_cpus-1
)

In [54]:
print("Training steps = {}, Batch size = {} (num epochs = {})".format(train_steps, BATCH_SIZE, EPOCHS))
tf.logging.set_verbosity(tf.logging.INFO)

try:
    model.train(
        input_fn=train_fn,
        hooks=hooks,
        steps=train_steps
    )
except tf.train.NanLossDuringTrainingError:
    raise ValueError(
        """Training stopped with NanLossDuringTrainingError.
        Try other optimizers, smaller batch size and/or smaller learning rate."""
    )

I0720 23:57:57.222687 17056 estimator.py:1145] Calling model_fn.


Training steps = 59083, Batch size = 64 (num epochs = 50)


I0720 23:57:59.929419 17056 estimator.py:1147] Done calling model_fn.


I0720 23:57:59.932409 17056 basic_session_run_hooks.py:541] Create CheckpointSaverHook.


I0720 23:58:01.070366 17056 monitored_session.py:240] Graph was finalized.


I0720 23:58:01.095301 17056 saver.py:1280] Restoring parameters from C:\Users\alsoj\AppData\Local\Temp\tmpy4ch14vb\model.ckpt-0


W0720 23:58:01.318702 17056 deprecation.py:323] From D:\01.Programming\PycharmProjects\Recommenders-movielens\venv\lib\site-packages\tensorflow\python\training\saver.py:1066: get_checkpoint_mtimes (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file utilities to get mtimes.


I0720 23:58:01.461324 17056 session_manager.py:500] Running local_init_op.


I0720 23:58:01.724618 17056 session_manager.py:502] Done running local_init_op.


I0720 23:58:03.845945 17056 basic_session_run_hooks.py:606] Saving checkpoints for 0 into C:\Users\alsoj\AppData\Local\Temp\tmpy4ch14vb\model.ckpt.


I0720 23:58:07.156094 17056 basic_session_run_hooks.py:262] loss = 1080.0378, step = 1


W0720 23:58:11.885449 17056 basic_session_run_hooks.py:724] It seems that global step (tf.train.get_global_step) has not been increased. Current value (could be stable): 714 vs previous value: 714. You could increase the global step by passing tf.train.get_global_step() to Optimizer.apply_gradients or Optimizer.minimize.


I0720 23:58:24.282303 17056 basic_session_run_hooks.py:692] global_step/sec: 172.484


I0720 23:58:24.284297 17056 basic_session_run_hooks.py:260] loss = 58.952923, step = 2955 (17.128 sec)


I0720 23:58:42.575903 17056 basic_session_run_hooks.py:692] global_step/sec: 161.468


I0720 23:58:42.578894 17056 basic_session_run_hooks.py:260] loss = 43.936214, step = 5909 (18.295 sec)


I0720 23:58:59.691139 17056 basic_session_run_hooks.py:692] global_step/sec: 172.595


I0720 23:58:59.693134 17056 basic_session_run_hooks.py:260] loss = 56.104057, step = 8863 (17.114 sec)


I0720 23:59:15.619550 17056 basic_session_run_hooks.py:606] Saving checkpoints for 11816 into C:\Users\alsoj\AppData\Local\Temp\tmpy4ch14vb\model.ckpt.


I0721 00:03:11.009167 17056 basic_session_run_hooks.py:692] global_step/sec: 11.7541


I0721 00:03:11.082006 17056 basic_session_run_hooks.py:260] loss = 62.466927, step = 11817 (251.389 sec)


I0721 00:03:24.616781 17056 basic_session_run_hooks.py:692] global_step/sec: 217.069


I0721 00:03:24.618777 17056 basic_session_run_hooks.py:260] loss = 44.291084, step = 14771 (13.537 sec)


I0721 00:03:37.740692 17056 basic_session_run_hooks.py:692] global_step/sec: 225.085


I0721 00:03:37.742687 17056 basic_session_run_hooks.py:260] loss = 44.970993, step = 17725 (13.124 sec)


I0721 00:03:50.380893 17056 basic_session_run_hooks.py:692] global_step/sec: 233.699


I0721 00:03:50.388872 17056 basic_session_run_hooks.py:260] loss = 36.26291, step = 20679 (12.646 sec)


I0721 00:04:03.252478 17056 basic_session_run_hooks.py:606] Saving checkpoints for 23632 into C:\Users\alsoj\AppData\Local\Temp\tmpy4ch14vb\model.ckpt.


I0721 00:07:52.138703 17056 basic_session_run_hooks.py:692] global_step/sec: 12.2188


I0721 00:07:52.179594 17056 basic_session_run_hooks.py:260] loss = 28.733704, step = 23633 (241.791 sec)


I0721 00:08:05.066138 17056 basic_session_run_hooks.py:692] global_step/sec: 228.506


I0721 00:08:05.068133 17056 basic_session_run_hooks.py:260] loss = 47.60215, step = 26587 (12.889 sec)


I0721 00:08:18.077348 17056 basic_session_run_hooks.py:692] global_step/sec: 227.052


I0721 00:08:18.078345 17056 basic_session_run_hooks.py:260] loss = 37.181858, step = 29541 (13.010 sec)


I0721 00:08:31.149397 17056 basic_session_run_hooks.py:692] global_step/sec: 225.961


I0721 00:08:31.152388 17056 basic_session_run_hooks.py:260] loss = 33.337105, step = 32495 (13.074 sec)


I0721 00:08:43.737737 17056 basic_session_run_hooks.py:606] Saving checkpoints for 35448 into C:\Users\alsoj\AppData\Local\Temp\tmpy4ch14vb\model.ckpt.


I0721 00:12:21.414740 17056 basic_session_run_hooks.py:692] global_step/sec: 12.8287


I0721 00:12:21.465603 17056 basic_session_run_hooks.py:260] loss = 42.7266, step = 35449 (230.311 sec)


I0721 00:12:37.935080 17056 basic_session_run_hooks.py:692] global_step/sec: 178.81


I0721 00:12:37.937073 17056 basic_session_run_hooks.py:260] loss = 39.980446, step = 38403 (16.473 sec)


I0721 00:12:56.432620 17056 basic_session_run_hooks.py:692] global_step/sec: 159.697


I0721 00:12:56.435612 17056 basic_session_run_hooks.py:260] loss = 40.145027, step = 41357 (18.499 sec)


I0721 00:13:14.436000 17056 basic_session_run_hooks.py:692] global_step/sec: 164.08


I0721 00:13:14.438991 17056 basic_session_run_hooks.py:260] loss = 34.644535, step = 44311 (18.003 sec)


I0721 00:13:34.511323 17056 basic_session_run_hooks.py:606] Saving checkpoints for 47264 into C:\Users\alsoj\AppData\Local\Temp\tmpy4ch14vb\model.ckpt.


I0721 00:20:48.325701 17056 basic_session_run_hooks.py:692] global_step/sec: 6.50819


I0721 00:20:48.481593 17056 basic_session_run_hooks.py:260] loss = 33.67912, step = 47265 (454.037 sec)


I0721 00:21:03.478185 17056 basic_session_run_hooks.py:692] global_step/sec: 194.964


I0721 00:21:03.480181 17056 basic_session_run_hooks.py:260] loss = 56.471146, step = 50219 (15.005 sec)


I0721 00:21:19.715315 17056 basic_session_run_hooks.py:692] global_step/sec: 181.918


I0721 00:21:19.717310 17056 basic_session_run_hooks.py:260] loss = 42.30977, step = 53173 (16.237 sec)


I0721 00:21:37.673298 17056 basic_session_run_hooks.py:692] global_step/sec: 164.495


I0721 00:21:37.676291 17056 basic_session_run_hooks.py:260] loss = 42.55323, step = 56127 (17.959 sec)


I0721 00:21:53.500979 17056 basic_session_run_hooks.py:606] Saving checkpoints for 59080 into C:\Users\alsoj\AppData\Local\Temp\tmpy4ch14vb\model.ckpt.


W0721 00:21:53.819128 17056 deprecation.py:323] From D:\01.Programming\PycharmProjects\Recommenders-movielens\venv\lib\site-packages\tensorflow\python\training\saver.py:960: remove_checkpoint (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to delete files with this prefix.


I0721 00:27:01.299547 17056 basic_session_run_hooks.py:692] global_step/sec: 9.12781


I0721 00:27:01.393595 17056 basic_session_run_hooks.py:260] loss = 47.4302, step = 59081 (323.711 sec)


I0721 00:27:01.406261 17056 basic_session_run_hooks.py:606] Saving checkpoints for 59083 into C:\Users\alsoj\AppData\Local\Temp\tmpy4ch14vb\model.ckpt.


I0721 00:27:03.059840 17056 estimator.py:368] Loss for final step: 46.8546.


In [68]:
if EVALUATE_WHILE_TRAINING:
    for m, v in evaluation_logger.get_log().items():
        pm.record("eval_{}".format(m), v)

AttributeError: module 'papermill' has no attribute 'record'

In [56]:
if len(RATING_METRICS) > 0:
    predictions = list(model.predict(input_fn=tf_utils.pandas_input_fn(df=test)))
    prediction_df = test.drop(RATING_COL, axis=1)
    prediction_df['prediction'] = [p['predictions'][0] for p in predictions]
    prediction_df['prediction'].describe()
    
    rating_results = {}
    for m in RATING_METRICS:
        fn = getattr(python_evaluation, m)
        result = fn(test, prediction_df, **cols)
        # pm.record(m, result)
        rating_results[m] = result
    print(rating_results)

I0721 00:29:30.605366 17056 estimator.py:1145] Calling model_fn.


I0721 00:29:31.941791 17056 estimator.py:1147] Done calling model_fn.


I0721 00:29:32.328760 17056 monitored_session.py:240] Graph was finalized.


I0721 00:29:32.335739 17056 saver.py:1280] Restoring parameters from C:\Users\alsoj\AppData\Local\Temp\tmpy4ch14vb\model.ckpt-59083


I0721 00:29:32.474368 17056 session_manager.py:500] Running local_init_op.


I0721 00:29:32.529221 17056 session_manager.py:502] Done running local_init_op.


{'rmse': 0.8884535023240029, 'mae': 0.6773814275281725, 'rsquared': 0.26773644397128427, 'exp_var': 0.26930611003890714}


In [57]:
if len(RANKING_METRICS) > 0:
    predictions = list(model.predict(input_fn=tf_utils.pandas_input_fn(df=ranking_pool)))
    prediction_df = ranking_pool.copy()
    prediction_df['prediction'] = [p['predictions'][0] for p in predictions]

    ranking_results = {}
    for m in RANKING_METRICS:
        fn = getattr(python_evaluation, m)
        result = fn(test, prediction_df, **{**cols, 'k': TOP_K})
        # pm.record(m, result)
        ranking_results[m] = result
    print(ranking_results)

I0721 00:30:15.908235 17056 estimator.py:1145] Calling model_fn.


I0721 00:30:17.013307 17056 estimator.py:1147] Done calling model_fn.


I0721 00:30:17.262639 17056 monitored_session.py:240] Graph was finalized.


I0721 00:30:17.268598 17056 saver.py:1280] Restoring parameters from C:\Users\alsoj\AppData\Local\Temp\tmpy4ch14vb\model.ckpt-59083


I0721 00:30:17.371322 17056 session_manager.py:500] Running local_init_op.


I0721 00:30:17.416238 17056 session_manager.py:502] Done running local_init_op.


{'map_at_k': 3.2144005143040823e-06, 'ndcg_at_k': 0.0003608061742591478, 'precision_at_k': 0.0001639344262295082, 'recall_at_k': 3.2144005143040823e-06}


In [62]:
os.makedirs(EXPORT_DIR_BASE, exist_ok=True)

In [65]:
tf.logging.set_verbosity(tf.logging.ERROR)

train_rcvr_fn = tf.contrib.estimator.build_supervised_input_receiver_fn_from_input_fn(
    train_fn
)
eval_rcvr_fn = tf.contrib.estimator.build_supervised_input_receiver_fn_from_input_fn(
    tf_utils.pandas_input_fn(df=test, y_col=RATING_COL)
)
serve_rcvr_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(
    tf.feature_column.make_parse_example_spec(wide_columns+deep_columns)
)
rcvr_fn_map = {
    tf.estimator.ModeKeys.TRAIN: train_rcvr_fn,
    tf.estimator.ModeKeys.EVAL: eval_rcvr_fn,
    tf.estimator.ModeKeys.PREDICT: serve_rcvr_fn
}

export_dir = tf.contrib.estimator.export_all_saved_models(
    model,
    export_dir_base=EXPORT_DIR_BASE,
    input_receiver_fn_map=rcvr_fn_map
)
# pm.record('saved_model_dir', str(export_dir))
print("Model exported to", str(export_dir))

Model exported to b'./outputs/model\\1563665171'
