In [1]:
import tensorflow as tf

# Categorical base columns.
gender = tf.contrib.layers.sparse_column_with_keys(column_name="gender", keys=["Female", "Male"])
race = tf.contrib.layers.sparse_column_with_keys(column_name="race", keys=[
  "Amer-Indian-Eskimo", "Asian-Pac-Islander", "Black", "Other", "White"])
education = tf.contrib.layers.sparse_column_with_hash_bucket("education", hash_bucket_size=1000)
marital_status = tf.contrib.layers.sparse_column_with_hash_bucket("marital_status", hash_bucket_size=100)
relationship = tf.contrib.layers.sparse_column_with_hash_bucket("relationship", hash_bucket_size=100)
workclass = tf.contrib.layers.sparse_column_with_hash_bucket("workclass", hash_bucket_size=100)
occupation = tf.contrib.layers.sparse_column_with_hash_bucket("occupation", hash_bucket_size=1000)
native_country = tf.contrib.layers.sparse_column_with_hash_bucket("native_country", hash_bucket_size=1000)

# Continuous base columns.
age = tf.contrib.layers.real_valued_column("age")
age_buckets = tf.contrib.layers.bucketized_column(age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])
education_num = tf.contrib.layers.real_valued_column("education_num")
capital_gain = tf.contrib.layers.real_valued_column("capital_gain")
capital_loss = tf.contrib.layers.real_valued_column("capital_loss")
hours_per_week = tf.contrib.layers.real_valued_column("hours_per_week")

  from ._conv import register_converters as _register_converters


In [2]:
wide_columns = [
  gender, native_country, education, occupation, workclass, marital_status, relationship, age_buckets,
  tf.contrib.layers.crossed_column([education, occupation], hash_bucket_size=int(1e4)),
  tf.contrib.layers.crossed_column([native_country, occupation], hash_bucket_size=int(1e4)),
  tf.contrib.layers.crossed_column([age_buckets, race, occupation], hash_bucket_size=int(1e6))]

In [3]:
deep_columns = [
  tf.contrib.layers.embedding_column(workclass, dimension=8),
  tf.contrib.layers.embedding_column(education, dimension=8),
  tf.contrib.layers.embedding_column(marital_status, dimension=8),
  tf.contrib.layers.embedding_column(gender, dimension=8),
  tf.contrib.layers.embedding_column(relationship, dimension=8),
  tf.contrib.layers.embedding_column(race, dimension=8),
  tf.contrib.layers.embedding_column(native_country, dimension=8),
  tf.contrib.layers.embedding_column(occupation, dimension=8),
  age, education_num, capital_gain, capital_loss, hours_per_week]



In [4]:
import tempfile
model_dir = tempfile.mkdtemp()
m = tf.contrib.learn.DNNLinearCombinedClassifier(
    model_dir=model_dir,
    linear_feature_columns=wide_columns,
    dnn_feature_columns=deep_columns,
    dnn_hidden_units=[100, 50])

Instructions for updating:
Please set fix_global_step_increment_bug=True and update training steps in your pipeline. See pydoc for details.
Instructions for updating:
Please switch to tf.contrib.estimator.*_head.
Instructions for updating:
Please replace uses of any Estimator from tf.contrib.learn with an Estimator from tf.estimator.*
Instructions for updating:
When switching to tf.estimator.Estimator, use tf.estimator.RunConfig instead.
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_task_type': None, '_task_id': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000001A16C6B68D0>, '_master': '', '_num_ps_replicas': 0, '_num_worker_replicas': 0, '_environment': 'local', '_is_chief': True, '_evaluation_master': '', '_train_distribute': None, '_eval_distribute': None, '_device_fn': None, '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1.0
}
, '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_secs

In [None]:
import pandas as pd
import urllib

# Define the column names for the data sets.
COLUMNS = ["age", "workclass", "fnlwgt", "education", "education_num",
  "marital_status", "occupation", "relationship", "race", "gender",
  "capital_gain", "capital_loss", "hours_per_week", "native_country", "income_bracket"]
LABEL_COLUMN = 'label'
CATEGORICAL_COLUMNS = ["workclass", "education", "marital_status", "occupation",
                       "relationship", "race", "gender", "native_country"]
CONTINUOUS_COLUMNS = ["age", "education_num", "capital_gain", "capital_loss",
                      "hours_per_week"]

# Download the training and test data to temporary files.
# Alternatively, you can download them yourself and change train_file and
# test_file to your own paths.
train_file = 'train_iris' 
test_file = 'test_iris'
urllib.request.urlretrieve("https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data", train_file)
urllib.request.urlretrieve("https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test", test_file)

# Read the training and test data sets into Pandas dataframe.
df_train = pd.read_csv(train_file, names=COLUMNS, skipinitialspace=True)
df_test = pd.read_csv(test_file, names=COLUMNS, skipinitialspace=True, skiprows=1)
df_train[LABEL_COLUMN] = (df_train['income_bracket'].apply(lambda x: '>50K' in x)).astype(int)
df_test[LABEL_COLUMN] = (df_test['income_bracket'].apply(lambda x: '>50K' in x)).astype(int)

In [15]:

def merge_two_dicts(x, y):
    z = x.copy()
    z.update(y)
    return z

def input_fn(df):
  # Creates a dictionary mapping from each continuous feature column name (k) to
  # the values of that column stored in a constant Tensor.
  continuous_cols = {k: tf.constant(df[k].values)
                     for k in CONTINUOUS_COLUMNS}
  # Creates a dictionary mapping from each categorical feature column name (k)
  # to the values of that column stored in a tf.SparseTensor.
  categorical_cols = {k: tf.SparseTensor(
      indices=[[i, 0] for i in range(df[k].size)],
      values=df[k].values,
      dense_shape=[df[k].size, 1])
                      for k in CATEGORICAL_COLUMNS}
  # Merges the two dictionaries into one.
  feature_cols = merge_two_dicts(continuous_cols, categorical_cols)
  # Converts the label column into a constant Tensor.
  label = tf.constant(df[LABEL_COLUMN].values)
  # Returns the feature columns and the label.
  return feature_cols, label

def train_input_fn():
  return input_fn(df_train)

def eval_input_fn():
  return input_fn(df_test)

In [17]:
m.fit(input_fn=train_input_fn, steps=20000)
results = m.evaluate(input_fn=eval_input_fn, steps=1)
for key in sorted(results):
    print("%s: %s" % (key, results[key]))

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\25472\AppData\Local\Temp\tmpqhq7vphi\model.ckpt-202
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 202 into C:\Users\25472\AppData\Local\Temp\tmpqhq7vphi\model.ckpt.
INFO:tensorflow:loss = 0.6355717, step = 202
INFO:tensorflow:global_step/sec: 7.24834
INFO:tensorflow:loss = 0.59581727, step = 402 (23.175 sec)
INFO:tensorflow:global_step/sec: 10.8434
INFO:tensorflow:global_step/sec: 22.5269
INFO:tensorflow:loss = 0.41993442, step = 602 (9.019 sec)
INFO:tensorflow:global_step/sec: 21.8533
INFO:tensorflow:global_step/sec: 22.5172
INFO:tensorflow:loss = 0.36740306, step = 802 (8.912 sec)
INFO:tensorflow:global_step/sec: 22.3988
INFO:tensorflow:global_step/sec: 22.3157
INFO:tensorflow:loss = 0.3686035, step = 1002 (8.944 sec)
INFO:tensorflow:global_step/sec: 22.4332
INFO:tensorflow:globa

INFO:tensorflow:global_step/sec: 19.3406
INFO:tensorflow:global_step/sec: 20.216
INFO:tensorflow:loss = 0.30532143, step = 9602 (10.109 sec)
INFO:tensorflow:global_step/sec: 20.2681
INFO:tensorflow:global_step/sec: 20.6195
INFO:tensorflow:loss = 0.30507264, step = 9802 (9.791 sec)
INFO:tensorflow:global_step/sec: 20.703
INFO:tensorflow:global_step/sec: 21.2891
INFO:tensorflow:loss = 0.3048206, step = 10002 (9.534 sec)
INFO:tensorflow:global_step/sec: 21.3024
INFO:tensorflow:global_step/sec: 21.5902
INFO:tensorflow:loss = 0.30458426, step = 10202 (9.311 sec)
INFO:tensorflow:global_step/sec: 21.9894
INFO:tensorflow:global_step/sec: 21.9376
INFO:tensorflow:loss = 0.3043545, step = 10402 (9.106 sec)
INFO:tensorflow:global_step/sec: 21.9376
INFO:tensorflow:loss = 0.3041301, step = 10602 (9.077 sec)
INFO:tensorflow:global_step/sec: 21.5584
INFO:tensorflow:global_step/sec: 20.7872
INFO:tensorflow:loss = 0.30391645, step = 10802 (9.514 sec)
INFO:tensorflow:global_step/sec: 21.2493
INFO:tensorf

KeyboardInterrupt: 

In [10]:
import tensorflow as tf

data = pd.DataFrame({'col1': range(3000), 'col2': range(3000), 'label': range(3000)})
label = data.pop('label')
def input_fn_train():
    def input_fn():
        dataset = tf.data.Dataset.from_tensors((dict(data), label))
        dataset.repeat()
        dataset.batch(256)
        return dataset.make_one_shot_iterator().get_next()
    return input_fn

train_input_f = input_fn_train()

feature_columns = [
    tf.feature_column.numeric_column(key="col1"),
    tf.feature_column.numeric_column(key="col2")
]
model = tf.estimator.DNNRegressor(hidden_units=[20, 20], feature_columns=feature_columns)
model.train(input_fn=train_input_f, steps=20000)



INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\25472\\AppData\\Local\\Temp\\tmp880ivbik', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000001CB189A7828>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create Checkp

<tensorflow.python.estimator.canned.dnn.DNNRegressor at 0x1cb14307da0>

In [6]:
import pandas as pd 

data = pd.DataFrame({'col1': [1, 2, 3, 4], 'col2': [2, 3, 4, 5], 'label': [0, 1, 2, 3]})

In [7]:
data

Unnamed: 0,col1,col2,label
0,1,2,0
1,2,3,1
2,3,4,2
3,4,5,3
