### Wide & Deep 튜토리얼 버전

In [2]:
import tensorflow as tf
import numpy as np
import pandas as pd
import sklearn.preprocessing
from python_splitters import python_random_split


In [32]:
#######################
# Dataset 100K 용 세팅
#######################

# Load each data set (users, movies, and ratings).
users_cols = ['userid', 'age', 'gender', 'occupation', 'zip_code']
users = pd.read_csv('./data/100K/u.user', sep='|', names=users_cols, encoding='latin-1')

ratings_cols = ['userid', 'movieid', 'rating', 'timestamp']
ratings = pd.read_csv('./data/100K/u.data', sep='\t', names=ratings_cols, encoding='latin-1')

# The movies file contains a binary feature for each genre.
genre_cols = [
    "genre_unknown", "action", "adventure", "animation", "children", "comedy",
    "crime", "documentary", "drama", "fantasy", "film-noir", "horror",
    "musical", "mystery", "romance", "sci-fi", "thriller", "war", "western"
]
movies_cols = [
    'movieid', 'title', 'release_date', "video_release_date", "imdb_url"
] + genre_cols
movies = pd.read_csv(
    './data/100K/u.item', sep='|', names=movies_cols, encoding='latin-1')

movies = movies.dropna(subset=['release_date'])

users["userid"] = users['userid'].apply(lambda x: str(x))
users["zip_code"] = users['zip_code'].apply(lambda x: str(x))
ratings["userid"] = ratings['userid'].apply(lambda x: str(x))
ratings["movieid"] = ratings['movieid'].apply(lambda x: str(x))
movies["movieid"] = movies['movieid'].apply(lambda x: str(x))
movies["release_date"] = movies['release_date'].apply(lambda x: int(str(x).split('-')[-1]))

# Create one merged DataFrame containing all the movielens data.
df_data = ratings.merge(movies, on='movieid').merge(users, on='userid')

# df_data["release_date"] = df_data['release_date'].apply(lambda x: int(x))

df_data = df_data.drop('title', axis=1)
df_data = df_data.drop('video_release_date', axis=1)
df_data = df_data.drop('imdb_url', axis=1) 

print("df_data::: \n", df_data.head())

df_data::: 
   userid movieid  rating  timestamp  release_date  genre_unknown  action  \
0    196     242       3  881250949          1997              0       0   
1    196     257       2  881251577          1997              0       1   
2    196     111       4  881251793          1996              0       0   
3    196      25       4  881251955          1996              0       0   
4    196     382       4  881251843          1994              0       0   

   adventure  animation  children  ...  mystery  romance  sci-fi  thriller  \
0          0          0         0  ...        0        0       0         0   
1          1          0         0  ...        0        0       1         0   
2          0          0         0  ...        0        1       0         0   
3          0          0         0  ...        0        0       0         0   
4          0          0         0  ...        0        0       0         0   

   war  western  age  gender  occupation  zip_code  
0    0  

In [64]:
# Categorical base columns.
userid = tf.contrib.layers.sparse_column_with_hash_bucket("userid", hash_bucket_size=1000)
movieid = tf.contrib.layers.sparse_column_with_hash_bucket("movieid", hash_bucket_size=1000)
gender = tf.contrib.layers.sparse_column_with_keys(column_name="gender", keys=["M", "W"])
# genre = tf.contrib.layers.sparse_column_with_keys(column_name="genre", keys=[
#   "genre_unknown", "action", "adventure", "animation", "children", "comedy",
#     "crime", "documentary", "drama", "fantasy", "film-noir", "horror",
#     "musical", "mystery", "romance", "sci-fi", "thriller", "war", "western"])
occupation = tf.contrib.layers.sparse_column_with_hash_bucket("occupation", hash_bucket_size=1000)
zip_code = tf.contrib.layers.sparse_column_with_hash_bucket("zip_code", hash_bucket_size=1000)

# Continuous base columns.
age = tf.contrib.layers.real_valued_column("age")
age_buckets = tf.contrib.layers.bucketized_column(age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])
release_date = tf.contrib.layers.real_valued_column("release_date")

genre_unknown = tf.contrib.layers.real_valued_column("genre_unknown")
action = tf.contrib.layers.real_valued_column("action")
adventure = tf.contrib.layers.real_valued_column("adventure")
animation = tf.contrib.layers.real_valued_column("animation")
children = tf.contrib.layers.real_valued_column("children")
comedy = tf.contrib.layers.real_valued_column("comedy")
crime = tf.contrib.layers.real_valued_column("crime")
documentary = tf.contrib.layers.real_valued_column("documentary")
drama = tf.contrib.layers.real_valued_column("drama")
fantasy = tf.contrib.layers.real_valued_column("fantasy")
filmnoir = tf.contrib.layers.real_valued_column("film-noir")
horror = tf.contrib.layers.real_valued_column("horror")
musical = tf.contrib.layers.real_valued_column("musical")
mystery = tf.contrib.layers.real_valued_column("mystery")
romance = tf.contrib.layers.real_valued_column("romance")
scifi = tf.contrib.layers.real_valued_column("sci-fi")
thriller = tf.contrib.layers.real_valued_column("thriller")
war = tf.contrib.layers.real_valued_column("war")
western = tf.contrib.layers.real_valued_column("western")

In [65]:
wide_columns = [userid, movieid, gender, occupation, zip_code, release_date, age_buckets, 
                genre_unknown, action, adventure, animation, children, comedy, crime, documentary, drama, 
                fantasy, filmnoir, horror, musical, mystery, romance, scifi, thriller, war, western,  
    tf.contrib.layers.crossed_column([userid, movieid], hash_bucket_size=int(1e4))
]

In [10]:
deep_columns = [
  tf.contrib.layers.embedding_column(userid, dimension=8),
  tf.contrib.layers.embedding_column(movieid, dimension=8),
  # tf.contrib.layers.embedding_column(genre, dimension=8),
  tf.contrib.layers.embedding_column(gender, dimension=8),
  tf.contrib.layers.embedding_column(occupation, dimension=8),
  tf.contrib.layers.embedding_column(zip_code, dimension=8),
  age, release_date, genre_unknown, action, adventure, animation, children, comedy, crime, documentary, drama, 
  fantasy, filmnoir, horror, musical, mystery, romance, scifi, thriller, war, western
]

W0820 20:10:21.762389 24848 feature_column.py:1091] The default stddev value of initializer was changed from "1/sqrt(vocab_size)" to "1/sqrt(dimension)" in core implementation (tf.feature_column.embedding_column).


W0820 20:10:21.764377 24848 feature_column.py:1091] The default stddev value of initializer was changed from "1/sqrt(vocab_size)" to "1/sqrt(dimension)" in core implementation (tf.feature_column.embedding_column).


W0820 20:10:21.765374 24848 feature_column.py:1091] The default stddev value of initializer was changed from "1/sqrt(vocab_size)" to "1/sqrt(dimension)" in core implementation (tf.feature_column.embedding_column).


W0820 20:10:21.767370 24848 feature_column.py:1091] The default stddev value of initializer was changed from "1/sqrt(vocab_size)" to "1/sqrt(dimension)" in core implementation (tf.feature_column.embedding_column).


W0820 20:10:21.774351 24848 feature_column.py:1091] The default stddev value of initializer was changed from "1/sqrt(vocab_size)" to "1/sqrt(dimension)" in core implementation (tf.feature_column.embedding_column).


In [57]:
import tempfile
model_dir = tempfile.mkdtemp()
m = tf.contrib.learn.DNNLinearCombinedClassifier(
    model_dir=model_dir,
    linear_feature_columns=wide_columns,
    dnn_feature_columns=deep_columns,
    dnn_hidden_units=[1024, 512])

In [48]:
import pandas as pd
import urllib

# Define the column names for the data sets.
COLUMNS = ['userid','movieid','occupation','gender','zip_code','age', 'release_date', "genre_unknown", "action", "adventure", "animation", "children", "comedy",
    "crime", "documentary", "drama", "fantasy", "film-noir", "horror",
    "musical", "mystery", "romance", "sci-fi", "thriller", "war", "western"]
LABEL_COLUMN = 'rating'
CATEGORICAL_COLUMNS = ['userid','movieid','occupation','gender','zip_code']
CONTINUOUS_COLUMNS = ['age', 'release_date', "genre_unknown", "action", "adventure", "animation", "children", "comedy",
    "crime", "documentary", "drama", "fantasy", "film-noir", "horror",
    "musical", "mystery", "romance", "sci-fi", "thriller", "war", "western"]


# Read the training and test data sets into Pandas dataframe.
###############################
# Train, Test 데이터 나누기
###############################
df_train, df_test = python_random_split(
    df_data,
    ratio=0.75,
    seed=42
)

print("Train = {}, test = {}".format(len(df_train), len(df_test)))

# df_train = pd.read_csv(train_file, names=COLUMNS, skipinitialspace=True)
# df_test = pd.read_csv(test_file, names=COLUMNS, skipinitialspace=True, skiprows=1)
df_train[LABEL_COLUMN] = df_train['rating']
df_test[LABEL_COLUMN] = df_test['rating']


def input_fn(df):
  # Creates a dictionary mapping from each continuous feature column name (k) to
  # the values of that column stored in a constant Tensor.

  continuous_cols = {k: tf.constant(df[k].values)
                     for k in CONTINUOUS_COLUMNS}

  # Creates a dictionary mapping from each categorical feature column name (k)
  # to the values of that column stored in a tf.SparseTensor.
  categorical_cols = {k: tf.SparseTensor(
      indices=[[i, 0] for i in range(df[k].size)],
      values=df[k].values,
      dense_shape=[df[k].size, 1])
                      for k in CATEGORICAL_COLUMNS}
  
  # print("continuous_cols.items() ::: ", continuous_cols.items())
  # print("categorical_cols.items() ::: ", categorical_cols.items())
  
  # Merges the two dictionaries into one.
  
  # feature_cols = dict(continuous_cols.items() + categorical_cols.items())
  feature_cols = {**continuous_cols, **categorical_cols}
  
  # Converts the label column into a constant Tensor.
  label = tf.constant(df[LABEL_COLUMN].values)
  
  # Returns the feature columns and the label.
  return feature_cols, label

def train_input_fn():
  return input_fn(df_train)

def eval_input_fn():
  return input_fn(df_test)

print("train_input_fn ::: \n", train_input_fn())

Train = 74993, test = 24998


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


train_input_fn :::  ({'age': <tf.Tensor 'Const_308:0' shape=(74993,) dtype=int64>, 'release_date': <tf.Tensor 'Const_309:0' shape=(74993,) dtype=int64>, 'genre_unknown': <tf.Tensor 'Const_310:0' shape=(74993,) dtype=int64>, 'action': <tf.Tensor 'Const_311:0' shape=(74993,) dtype=int64>, 'adventure': <tf.Tensor 'Const_312:0' shape=(74993,) dtype=int64>, 'animation': <tf.Tensor 'Const_313:0' shape=(74993,) dtype=int64>, 'children': <tf.Tensor 'Const_314:0' shape=(74993,) dtype=int64>, 'comedy': <tf.Tensor 'Const_315:0' shape=(74993,) dtype=int64>, 'crime': <tf.Tensor 'Const_316:0' shape=(74993,) dtype=int64>, 'documentary': <tf.Tensor 'Const_317:0' shape=(74993,) dtype=int64>, 'drama': <tf.Tensor 'Const_318:0' shape=(74993,) dtype=int64>, 'fantasy': <tf.Tensor 'Const_319:0' shape=(74993,) dtype=int64>, 'film-noir': <tf.Tensor 'Const_320:0' shape=(74993,) dtype=int64>, 'horror': <tf.Tensor 'Const_321:0' shape=(74993,) dtype=int64>, 'musical': <tf.Tensor 'Const_322:0' shape=(74993,) dtype=

In [49]:
m.fit(input_fn=train_input_fn, steps=200)
results = m.evaluate(input_fn=eval_input_fn, steps=1)
for key in sorted(results):
  print("%s: %s" % (key, results[key]))

W0820 20:39:48.480861 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:39:48.487843 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:39:48.494824 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:39:48.502804 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:39:48.511780 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:39:48.519759 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:39:48.528278 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:39:48.535258 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:39:48.544234 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:39:48.551214 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:39:48.562185 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:39:48.569167 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:39:48.579141 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:39:48.586123 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:39:48.603078 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:39:48.613049 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:39:48.620033 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:39:48.628527 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:39:48.635510 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:39:48.645483 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:39:48.652467 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:39:53.581818 24848 head.py:2027] Casting <dtype: 'int64'> labels to bool.


W0820 20:39:53.883501 24848 head.py:2027] Casting <dtype: 'int64'> labels to bool.


W0820 20:39:54.015152 24848 metrics_impl.py:804] Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead.


W0820 20:39:54.059034 24848 metrics_impl.py:804] Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead.


W0820 20:39:55.553510 24848 deprecation.py:323] From D:\01.Programming\PycharmProjects\Recommenders-movielens\venv\lib\site-packages\tensorflow\python\training\saver.py:1066: get_checkpoint_mtimes (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file utilities to get mtimes.


W0820 20:41:23.483433 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:41:23.488421 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:41:23.494405 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:41:23.499393 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:41:23.506376 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:41:23.511361 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:41:23.516346 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:41:23.521333 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:41:23.527318 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:41:23.533300 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:41:23.538295 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:41:23.545271 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:41:23.551253 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:41:23.556241 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:41:23.562227 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:41:23.569206 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:41:23.574193 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:41:23.580177 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:41:23.592144 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:41:23.600123 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:41:23.606107 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 20:41:24.912611 24848 head.py:2027] Casting <dtype: 'int64'> labels to bool.


W0820 20:41:25.120057 24848 head.py:2027] Casting <dtype: 'int64'> labels to bool.


W0820 20:41:25.254697 24848 metrics_impl.py:804] Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead.


W0820 20:41:25.295588 24848 metrics_impl.py:804] Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead.


accuracy: 0.0600048
accuracy/baseline_label_mean: 3.5290024
accuracy/threshold_0.500000_mean: 0.0600048
auc: 0.0
auc_precision_recall: 1.0
global_step: 404
labels/actual_label_mean: 3.5290024
labels/prediction_mean: 1.0
loss: -47701804.0
precision/positive_threshold_0.500000_mean: 1.0
recall/positive_threshold_0.500000_mean: 1.0


In [50]:
###########################################
# 내가 매긴 평가를 바탕으로 추천
# my_ratings.csv에 각 영화에 대한 평점 입력
# UserId : 999
###########################################
# 데이터 불러오기
my_ratings = pd.read_csv('./data/100K/u.my_rating.csv',
                           sep=",", skiprows=1, names=['userid', 'movieid', 'rating', 'timestamp', 'movieName']
                         , engine='python')

my_ratings["userid"] = my_ratings['userid'].apply(lambda x: str(x))
my_ratings["movieid"] = my_ratings['movieid'].apply(lambda x: str(x))

# print("my_ratings ::: \n", my_ratings)

# Create one merged DataFrame containing my data.
df_my_data = my_ratings.merge(movies, on='movieid').merge(users, on='userid')

# df_data["release_date"] = df_data['release_date'].apply(lambda x: int(x))

df_my_data = df_my_data.drop('movieName', axis=1)
df_my_data = df_my_data.drop('title', axis=1)
df_my_data = df_my_data.drop('video_release_date', axis=1)
df_my_data = df_my_data.drop('imdb_url', axis=1) 

# TEST용 dataset
df_my_test = df_my_data[df_my_data['rating'].isnull()]

# TRAIN용 dataset (평가하지 않은 영화 삭제)
df_my_train = df_my_data.dropna(axis=0)


print("df_my_train::: \n", df_my_train.head())
print("df_my_test::: \n", df_my_test.head())

df_my_train::: 
    userid movieid  rating  timestamp  release_date  genre_unknown  action  \
0     999       1     3.0  892079237          1995              0       0   
8     999       9     4.0  892079239          1995              0       0   
11    999      12     5.0  892079239          1995              0       0   
28    999      29     4.0  892079239          1995              0       1   
49    999      50     4.0  892079239          1977              0       1   

    adventure  animation  children  ...  mystery  romance  sci-fi  thriller  \
0           0          1         1  ...        0        0       0         0   
8           0          0         0  ...        0        0       0         0   
11          0          0         0  ...        0        0       0         1   
28          1          0         0  ...        0        0       0         0   
49          1          0         0  ...        0        1       1         0   

    war  western  age  gender  occupation  zi

In [54]:
##########################
# 모델 추가 학습 학습 시작
##########################
df_my_train[LABEL_COLUMN] = df_my_train['rating']
df_my_test[LABEL_COLUMN] = df_my_test['rating']

def my_train_input_fn():
  return input_fn(df_my_train)

def my_test_input_fn():
  return input_fn(df_my_test)

# my_train_fn = input_fn(df_my_train)
# my_test_fn = input_fn(df_my_test)
# print("my_train_fn ::: ", my_train_fn)

m.fit(input_fn=my_train_input_fn, steps=200)
results = m.evaluate(input_fn=my_test_input_fn, steps=1)
for key in sorted(results):
  print("%s: %s" % (key, results[key]))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


ValueError: Labels are incompatible with given information. Given labels: Tensor("Const_21:0", shape=(41,), dtype=float64), required signatures: TensorSignature(dtype=tf.int64, shape=TensorShape([Dimension(74993)]), is_sparse=False).

In [56]:
##############################
# 사용자에게 추천할 영화 Top-k
##############################
# 어떤 영화를 추천했는지 보기 위해 pandas 옵션 세팅
pd.set_option('display.max_columns', None)

predictions = list(m.predict(input_fn=my_test_input_fn))

print("predictions ::: ", predictions)

prediction_df = df_my_test.drop(LABEL_COLUMN, axis=1)
df_my_test['prediction'] = [p['predictions'][0] for p in predictions]    
print(df_my_test.sort_values(['prediction'], ascending=False).head())   


# tsne_movie_embeddings(model)

W0820 21:10:30.715903 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 21:10:30.722881 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 21:10:30.730861 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 21:10:30.751805 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 21:10:30.760784 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 21:10:30.769760 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 21:10:30.779733 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 21:10:30.800675 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 21:10:30.808653 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 21:10:30.818625 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 21:10:30.833585 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 21:10:30.853532 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 21:10:30.873481 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 21:10:30.881457 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 21:10:30.891434 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 21:10:30.899409 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 21:10:30.908387 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 21:10:30.920353 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 21:10:30.927335 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 21:10:30.937307 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.


W0820 21:10:30.944291 24848 feature_column.py:1674] Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.




predictions :::  [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1

IndexError: invalid index to scalar variable.