In [6]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import collections

import numpy as np
import pandas as pd
import tensorflow as tf

URL = "https://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data"

# Order is important for the csv-readers, so we use an OrderedDict here.
COLUMN_TYPES = collections.OrderedDict([
    ("symboling", int),
    ("normalized-losses", float),
    ("make", str),
    ("fuel-type", str),
    ("aspiration", str),
    ("num-of-doors", str),
    ("body-style", str),
    ("drive-wheels", str),
    ("engine-location", str),
    ("wheel-base", float),
    ("length", float),
    ("width", float),
    ("height", float),
    ("curb-weight", float),
    ("engine-type", str),
    ("num-of-cylinders", str),
    ("engine-size", float),
    ("fuel-system", str),
    ("bore", float),
    ("stroke", float),
    ("compression-ratio", float),
    ("horsepower", float),
    ("peak-rpm", float),
    ("city-mpg", float),
    ("highway-mpg", float),
    ("price", float)
])


def raw_dataframe():
  """Load the automobile data set as a pd.DataFrame."""
  # Download and cache the data
  path = tf.keras.utils.get_file(URL.split("/")[-1], URL)

  # Load it into a pandas DataFrame
  df = pd.read_csv(path, names=COLUMN_TYPES.keys(),
                   dtype=COLUMN_TYPES, na_values="?")

  return df


def load_data(y_name="price", train_fraction=0.7, seed=None):
  """Load the automobile data set and split it train/test and features/label.

  A description of the data is available at:
    https://archive.ics.uci.edu/ml/datasets/automobile

  The data itself can be found at:
    https://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data

  Args:
    y_name: the column to return as the label.
    train_fraction: the fraction of the data set to use for training.
    seed: The random seed to use when shuffling the data. `None` generates a
      unique shuffle every run.
  Returns:
    a pair of pairs where the first pair is the training data, and the second
    is the test data:
    `(x_train, y_train), (x_test, y_test) = load_data(...)`
    `x` contains a pandas DataFrame of features, while `y` contains the label
    array.
  """
  # Load the raw data columns.
  data = raw_dataframe()

  # Delete rows with unknowns
  data = data.dropna()

  # Shuffle the data
  np.random.seed(seed)

  # Split the data into train/test subsets.
  x_train = data.sample(frac=train_fraction, random_state=seed)
  x_test = data.drop(x_train.index)

  # Extract the label from the features DataFrame.
  y_train = x_train.pop(y_name)
  y_test = x_test.pop(y_name)

  return (x_train, y_train), (x_test, y_test)


def make_dataset(batch_sz, x, y=None, shuffle=False, shuffle_buffer_size=1000):
    """Create a slice Dataset from a pandas DataFrame and labels"""
    def input_fn():
        if y is not None:
            dataset = tf.data.Dataset.from_tensor_slices((dict(x), y))
        else:
            dataset = tf.data.Dataset.from_tensor_slices(dict(x))
        if shuffle:
            dataset = dataset.shuffle(shuffle_buffer_size).batch(batch_sz).repeat()
        else:
            dataset = dataset.batch(batch_sz).repeat()
        print(dataset)
        return dataset.make_one_shot_iterator().get_next()

    return input_fn


In [7]:
raw_dataframe()

Unnamed: 0,symboling,normalized-losses,make,fuel-type,aspiration,num-of-doors,body-style,drive-wheels,engine-location,wheel-base,...,engine-size,fuel-system,bore,stroke,compression-ratio,horsepower,peak-rpm,city-mpg,highway-mpg,price
0,3,,alfa-romero,gas,std,two,convertible,rwd,front,88.6,...,130.0,mpfi,3.47,2.68,9.00,111.0,5000.0,21.0,27.0,13495.0
1,3,,alfa-romero,gas,std,two,convertible,rwd,front,88.6,...,130.0,mpfi,3.47,2.68,9.00,111.0,5000.0,21.0,27.0,16500.0
2,1,,alfa-romero,gas,std,two,hatchback,rwd,front,94.5,...,152.0,mpfi,2.68,3.47,9.00,154.0,5000.0,19.0,26.0,16500.0
3,2,164.0,audi,gas,std,four,sedan,fwd,front,99.8,...,109.0,mpfi,3.19,3.40,10.00,102.0,5500.0,24.0,30.0,13950.0
4,2,164.0,audi,gas,std,four,sedan,4wd,front,99.4,...,136.0,mpfi,3.19,3.40,8.00,115.0,5500.0,18.0,22.0,17450.0
5,2,,audi,gas,std,two,sedan,fwd,front,99.8,...,136.0,mpfi,3.19,3.40,8.50,110.0,5500.0,19.0,25.0,15250.0
6,1,158.0,audi,gas,std,four,sedan,fwd,front,105.8,...,136.0,mpfi,3.19,3.40,8.50,110.0,5500.0,19.0,25.0,17710.0
7,1,,audi,gas,std,four,wagon,fwd,front,105.8,...,136.0,mpfi,3.19,3.40,8.50,110.0,5500.0,19.0,25.0,18920.0
8,1,158.0,audi,gas,turbo,four,sedan,fwd,front,105.8,...,131.0,mpfi,3.13,3.40,8.30,140.0,5500.0,17.0,20.0,23875.0
9,0,,audi,gas,turbo,two,hatchback,4wd,front,99.5,...,131.0,mpfi,3.13,3.40,7.00,160.0,5500.0,16.0,22.0,


In [8]:

(train_x,train_y), (test_x, test_y) = load_data()


# Provide the training input dataset.
train_input_fn = make_dataset(5000, train_x, train_y, False, 1000)

# Provide the validation input dataset.
test_input_fn = make_dataset(5000, test_x, test_y)

# Use the same categorical columns as in `linear_regression_categorical`
body_style_vocab = ["hardtop", "wagon", "sedan", "hatchback", "convertible"]
body_style_column = tf.feature_column.categorical_column_with_vocabulary_list(
  key="body-style", vocabulary_list=body_style_vocab)
make_column = tf.feature_column.categorical_column_with_hash_bucket(
  key="make", hash_bucket_size=50)





In [9]:
train_x

Unnamed: 0,symboling,normalized-losses,make,fuel-type,aspiration,num-of-doors,body-style,drive-wheels,engine-location,wheel-base,...,num-of-cylinders,engine-size,fuel-system,bore,stroke,compression-ratio,horsepower,peak-rpm,city-mpg,highway-mpg
159,0,91.0,toyota,diesel,std,four,hatchback,fwd,front,95.7,...,four,110.0,idi,3.27,3.35,22.50,56.0,4500.0,38.0,47.0
78,2,161.0,mitsubishi,gas,std,two,hatchback,fwd,front,93.7,...,four,92.0,2bbl,2.97,3.23,9.40,68.0,5500.0,31.0,38.0
170,2,134.0,toyota,gas,std,two,hardtop,rwd,front,98.4,...,four,146.0,mpfi,3.62,3.50,9.30,116.0,4800.0,24.0,30.0
156,0,91.0,toyota,gas,std,four,sedan,fwd,front,95.7,...,four,98.0,2bbl,3.19,3.03,9.00,70.0,4800.0,30.0,37.0
92,1,122.0,nissan,gas,std,four,sedan,fwd,front,94.5,...,four,97.0,2bbl,3.15,3.29,9.40,69.0,5200.0,31.0,37.0
158,0,91.0,toyota,diesel,std,four,sedan,fwd,front,95.7,...,four,110.0,idi,3.27,3.35,22.50,56.0,4500.0,34.0,36.0
115,0,161.0,peugot,gas,std,four,sedan,rwd,front,107.9,...,four,120.0,mpfi,3.46,3.19,8.40,97.0,5000.0,19.0,24.0
160,0,91.0,toyota,gas,std,four,sedan,fwd,front,95.7,...,four,98.0,2bbl,3.19,3.03,9.00,70.0,4800.0,38.0,47.0
175,-1,65.0,toyota,gas,std,four,hatchback,fwd,front,102.4,...,four,122.0,mpfi,3.31,3.54,8.70,92.0,4200.0,27.0,32.0
86,1,125.0,mitsubishi,gas,std,four,sedan,fwd,front,96.3,...,four,122.0,2bbl,3.35,3.46,8.50,88.0,5000.0,25.0,32.0


In [None]:
feature_columns = [
  tf.feature_column.numeric_column(key="curb-weight"),
  tf.feature_column.numeric_column(key="highway-mpg"),
  # Since this is a DNN model, categorical columns must be converted from
  # sparse to dense.
  # Wrap them in an `indicator_column` to create a
  # one-hot vector from the input.
  tf.feature_column.indicator_column(body_style_column),
  # Or use an `embedding_column` to create a trainable vector for each
  # index.
  tf.feature_column.embedding_column(make_column, dimension=3),
]

# Build a DNNRegressor, with 2x20-unit hidden layers, with the feature columns
# defined above as input.
model = tf.estimator.DNNRegressor(
  hidden_units=[20, 20], feature_columns=feature_columns)

# Train the model.
# By default, the Estimators log output every 100 steps.
model.train(input_fn=train_input_fn, steps=2000)

# Evaluate how the model performs on data it has not yet seen.
eval_result = model.evaluate(input_fn=test_input_fn)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\25472\\AppData\\Local\\Temp\\tmp5wts7was', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000026764A01A90>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
<RepeatDataset shapes: ({symboling: (?,), normalized-losses: (?,), make: (?,), fuel-type: (?,), aspira

In [34]:
import tensorflow as tf

data = pd.DataFrame({'col1': [1, 2, 3], 'col2': [1, 2, 3], 'label': [0, 1, 2]})
label = data.pop('label')

print(label)

def input_fn_train():
    def input_fn():
        dataset = tf.data.Dataset.from_tensor_slices((dict(data), label))
        dataset = dataset.batch(256).repeat()
        print(dataset)
        return dataset.make_one_shot_iterator().get_next()
    return input_fn

train_input_f = input_fn_train()

feature_columns = [
    tf.feature_column.numeric_column(key="col1"),
    tf.feature_column.numeric_column(key="col2")
]
model = tf.estimator.DNNRegressor(hidden_units=[20, 20], feature_columns=feature_columns)
model.train(input_fn=train_input_f, steps=20000)


0    0
1    1
2    2
Name: label, dtype: int64
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\25472\\AppData\\Local\\Temp\\tmp6m6keiem', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000013FDE63DBA8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
<RepeatDataset shapes: ({col1: (?,), col2: (?,)}, (?,))

INFO:tensorflow:global_step/sec: 296.649
INFO:tensorflow:loss = 1.1033396e-12, step = 6600 (0.336 sec)
INFO:tensorflow:global_step/sec: 300.203
INFO:tensorflow:loss = 1.1033396e-12, step = 6700 (0.334 sec)
INFO:tensorflow:global_step/sec: 300.202
INFO:tensorflow:loss = 1.1033396e-12, step = 6800 (0.332 sec)
INFO:tensorflow:global_step/sec: 303.841
INFO:tensorflow:loss = 1.1033396e-12, step = 6900 (0.329 sec)
INFO:tensorflow:global_step/sec: 302.011
INFO:tensorflow:loss = 1.1033396e-12, step = 7000 (0.331 sec)
INFO:tensorflow:global_step/sec: 300.202
INFO:tensorflow:loss = 1.1033396e-12, step = 7100 (0.333 sec)
INFO:tensorflow:global_step/sec: 297.53
INFO:tensorflow:loss = 1.1033396e-12, step = 7200 (0.336 sec)
INFO:tensorflow:global_step/sec: 303.841
INFO:tensorflow:loss = 1.1033396e-12, step = 7300 (0.329 sec)
INFO:tensorflow:global_step/sec: 301.104
INFO:tensorflow:loss = 1.1033396e-12, step = 7400 (0.332 sec)
INFO:tensorflow:global_step/sec: 300.202
INFO:tensorflow:loss = 1.1033396e

INFO:tensorflow:loss = 1.1033396e-12, step = 14500 (0.341 sec)
INFO:tensorflow:global_step/sec: 299.306
INFO:tensorflow:loss = 1.1033396e-12, step = 14600 (0.334 sec)
INFO:tensorflow:global_step/sec: 302.923
INFO:tensorflow:loss = 1.1033396e-12, step = 14700 (0.330 sec)
INFO:tensorflow:global_step/sec: 300.202
INFO:tensorflow:loss = 1.1033396e-12, step = 14800 (0.333 sec)
INFO:tensorflow:global_step/sec: 300.203
INFO:tensorflow:loss = 1.1033396e-12, step = 14900 (0.333 sec)
INFO:tensorflow:global_step/sec: 301.104
INFO:tensorflow:loss = 1.1033396e-12, step = 15000 (0.332 sec)
INFO:tensorflow:global_step/sec: 297.53
INFO:tensorflow:loss = 1.1033396e-12, step = 15100 (0.336 sec)
INFO:tensorflow:global_step/sec: 303.841
INFO:tensorflow:loss = 1.1033396e-12, step = 15200 (0.329 sec)
INFO:tensorflow:global_step/sec: 301.104
INFO:tensorflow:loss = 1.1033396e-12, step = 15300 (0.332 sec)
INFO:tensorflow:global_step/sec: 299.306
INFO:tensorflow:loss = 1.1033396e-12, step = 15400 (0.334 sec)
IN

<tensorflow.python.estimator.canned.dnn.DNNRegressor at 0x13fde748518>