In [141]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.cross_validation import train_test_split

# Read data

In [142]:
course = pd.read_csv('this2.csv', index_col= 0)
obj = ['place', 'education_code', 'gender', 'consumption_level']
for i in obj:
    course[i] = course[i].map(lambda x:str(x))

print course.dtypes
course.head()

customer_id            int64
place                 object
utilization_rate       int64
education_code        object
year_salary          float64
gender                object
age                    int64
consumption_level     object
buy                    int64
dtype: object


Unnamed: 0,customer_id,place,utilization_rate,education_code,year_salary,gender,age,consumption_level,buy
0,1,A,1,3,436283.8862,1,65,5,0
1,2,A,20,3,436283.8862,1,35,5,1
2,3,A,73,3,436283.8862,1,30,4,0
3,4,A,52,3,436283.8862,1,37,2,0
4,5,A,28,3,436283.8862,1,53,4,1


In [143]:
course_train, course_test = train_test_split(course, test_size = 0.3)

# Categorical 轉 sparse

In [145]:
gender =  tf.contrib.layers.sparse_column_with_hash_bucket("gender", hash_bucket_size=2)
education_code =  tf.contrib.layers.sparse_column_with_hash_bucket("education_code", hash_bucket_size=10)
consumption_level =  tf.contrib.layers.sparse_column_with_hash_bucket("consumption_level", hash_bucket_size=10)
place =  tf.contrib.layers.sparse_column_with_hash_bucket("place", hash_bucket_size=30)
age_buckets = tf.contrib.layers.bucketized_column(age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])

# Continuous 

In [146]:
def tensor_real(x):
    return tf.contrib.layers.real_valued_column(x)

In [147]:
year_salary = tensor_real("year_salary")
utilization_rate = tensor_real("utilization_rate")
age = tensor_real("age")

# Wide

In [148]:
wide_columns = [
  gender, education_code, consumption_level, place, age_buckets,
  tf.contrib.layers.crossed_column([education_code, age_buckets], hash_bucket_size=int(1e4)),
  tf.contrib.layers.crossed_column([place, age_buckets], hash_bucket_size=int(1e4)),
  tf.contrib.layers.crossed_column([gender, place, consumption_level, age_buckets], hash_bucket_size=int(1e6))]

# Deep

In [149]:
deep_columns = [
  tf.contrib.layers.embedding_column(consumption_level, dimension=8),
  tf.contrib.layers.embedding_column(education_code, dimension=8),
  tf.contrib.layers.embedding_column(gender, dimension=8),
  tf.contrib.layers.embedding_column(place, dimension=8),
  age, utilization_rate, year_salary]



# 設計Model

In [150]:
import tempfile
model_dir = tempfile.mkdtemp()
m = tf.contrib.learn.DNNLinearCombinedClassifier(
    model_dir=model_dir,
    linear_feature_columns=wide_columns,
    linear_optimizer=tf.train.FtrlOptimizer(
                                        learning_rate=0.5,
                                        l1_regularization_strength=0.001,
                                        l2_regularization_strength=0.001),
    dnn_feature_columns=deep_columns,
    dnn_hidden_units=[256, 128, 64],
    dnn_optimizer=tf.train.AdamOptimizer()
    )

Instructions for updating:
Please set fix_global_step_increment_bug=True and update training steps in your pipeline. See pydoc for details.
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': None, '_save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_tf_random_seed': None, '_task_type': None, '_environment': 'local', '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x123cb97d0>, '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_num_worker_replicas': 0, '_task_id': 0, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_evaluation_master': '', '_keep_checkpoint_every_n_hours': 10000, '_master': ''}


# 訓練 + 預測

In [164]:
import pandas as pd
import urllib

# Define the column names for the data sets.
CATEGORICAL_COLUMNS = ["place", "consumption_level", "gender", "education_code"]
CONTINUOUS_COLUMNS = ["age", "utilization_rate", "year_salary"]


def input_fn(df):
  # Creates a dictionary mapping from each continuous feature column name (k) to
  # the values of that column stored in a constant Tensor.
    continuous_cols = {k: tf.constant(df[k].values)
                     for k in CONTINUOUS_COLUMNS}
  # Creates a dictionary mapping from each categorical feature column name (k)
  # to the values of that column stored in a tf.SparseTensor.
    categorical_cols = {k: tf.SparseTensor(
      indices=[[i, 0] for i in range(df[k].size)],
      values=df[k].values,
      dense_shape=[df[k].size, 1])
                      for k in CATEGORICAL_COLUMNS}
  # Merges the two dictionaries into one.
    feature_cols = dict(continuous_cols.items() + categorical_cols.items())
  # Converts the label column into a constant Tensor.
    label = tf.constant(df['buy'].values)
    #label = tf.constant([list (x) for x in this['all_item']])
  # Returns the feature columns and the label.
    return feature_cols, label

def train_input_fn():
    return input_fn(course_train)

def eval_input_fn():
    return input_fn(course_test)
m.fit(input_fn=train_input_fn, steps=200)
pred =  m.predict(input_fn=lambda: input_fn(course_test))
results = m.evaluate(input_fn=eval_input_fn, steps=1)

Instructions for updating:
Please switch to tf.summary.scalar. Note that tf.summary.scalar uses the node name instead of the tag. This means that TensorFlow will automatically de-duplicate summary names based on the scope they are created in. Also, passing a tensor or list of tags to a scalar summary op is no longer supported.
Instructions for updating:
Please switch to tf.summary.histogram. Note that tf.summary.histogram uses the node name instead of the tag. This means that TensorFlow will automatically de-duplicate summary names based on the scope they are created in.
Instructions for updating:
Please switch to tf.summary.scalar. Note that tf.summary.scalar uses the node name instead of the tag. This means that TensorFlow will automatically de-duplicate summary names based on the scope they are created in. Also, passing a tensor or list of tags to a scalar summary op is no longer supported.
Instructions for updating:
Please switch to tf.summary.histogram. Note that tf.summary.histog

In [165]:
course_test.loc[:, 'pred_item'] = list(pred)

In [168]:
course_test.head()

Unnamed: 0,customer_id,place,utilization_rate,education_code,year_salary,gender,age,consumption_level,buy,pred_item
1423,1424,B,8,3,436283.8862,0,36,2,0,0
5086,5087,J,1,3,436283.8862,0,34,4,0,0
4546,4547,H,-30,3,436283.8862,0,24,2,0,0
1902,1903,D,20,3,436283.8862,0,39,5,1,0
3375,3376,F,8,3,436283.8862,0,27,4,0,0


In [163]:
results

{'accuracy': 0.47965217,
 'accuracy/baseline_label_mean': 0.47965217,
 'accuracy/threshold_0.500000_mean': 0.47965217,
 'auc': 0.5,
 'global_step': 404,
 'labels/actual_label_mean': 0.47965217,
 'labels/prediction_mean': 1.0,
 'loss': 436.0657,
 'precision/positive_threshold_0.500000_mean': 0.47965217,
 'recall/positive_threshold_0.500000_mean': 1.0}