[View in Colaboratory](https://colab.research.google.com/github/XinyueZ/tf/blob/master/ipynb/price_rooms_estimator.ipynb)

In [0]:
import tensorflow as tf
from tensorflow.python.data import Dataset
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder

In [0]:
tf.logging.set_verbosity(tf.logging.INFO)

In [9]:
df = pd.read_csv("https://dl.dropbox.com/s/e2tcsgbcp2in5h5/price_rooms_area.tsv", sep="\t")
df = df[pd.notnull(df["PRICE"])]
df = df[pd.notnull(df["ROOMS"])]
df = df[pd.notnull(df["AREA"])]
df = df.drop(["OBJECTCAT"], axis=1)
df = df[~df["MARKETINGTYPE"].isin(["Miete"])]
df.head()

Unnamed: 0,PRICE,ROOMS,AREA,MARKETINGTYPE
1,595000,9,440,Kauf
2,199000,6,13575,Kauf
3,269000,9,20357,Kauf
5,299000,5,140,Kauf
6,685000,8,380,Kauf


In [0]:
def make_dataset_and_labels_and_class_num(df, label_name):
  """This method will prepare dataset, labels for train, evaluation, test and classes.
     Args:
      df: DataFrame format of datasource.
      label_name: The name of column in datasource which will be as target for train.
     Return: 
       Tuple of (ds_train, ds_eval, ds_test, label_train, label_eval, label_test, classes)
      
  """
  target_label_col = "label" #New column name in original table.
  
  encoder = LabelEncoder()
  label = encoder.fit_transform(df[label_name])
  df.insert(8, target_label_col, label) 
  result_fit = encoder.fit(df[label_name])
  
  random_seed = None
  np.random.seed(random_seed)
  
  ds_train = df.sample(frac=0.9, random_state=random_seed)
  label_train = ds_train[target_label_col]
  
  ds_rest = df.drop(ds_train.index)
  
  ds_eval = ds_rest.sample(frac=0.8, random_state=random_seed)
  label_eval = ds_eval[target_label_col]
  
  ds_test = ds_rest.drop(ds_eval.index)
  label_test = ds_test[target_label_col]
  
  return ds_train[["FTHG", "FTAG"]], ds_eval[["FTHG", "FTAG"]], ds_test[["FTHG", "FTAG"]], label_train, label_eval, label_test, result_fit.classes_

In [0]:
x_train, x_eval, x_test, y_train, y_eval, y_test, result_classes = make_dataset_and_labels_and_class_num(df, "FTR")

In [0]:
result_classes

array(['A', 'D', 'H'], dtype=object)

In [0]:
x_train.describe()

Unnamed: 0,FTHG,FTAG
count,6885.0,6885.0
mean,1.664052,1.21801
std,1.335278,1.157785
min,0.0,0.0
25%,1.0,0.0
50%,1.0,1.0
75%,2.0,2.0
max,9.0,9.0


In [0]:
x_eval.describe()

Unnamed: 0,FTHG,FTAG
count,612.0,612.0
mean,1.596405,1.151961
std,1.307984,1.146308
min,0.0,0.0
25%,1.0,0.0
50%,1.0,1.0
75%,2.0,2.0
max,7.0,6.0


In [0]:
x_test.describe()

Unnamed: 0,FTHG,FTAG
count,153.0,153.0
mean,1.732026,1.320261
std,1.292725,1.222883
min,0.0,0.0
25%,1.0,0.0
50%,2.0,1.0
75%,2.0,2.0
max,6.0,5.0


In [0]:
x_train.head()

Unnamed: 0,FTHG,FTAG
1286,3,0
4384,0,0
1833,4,5
7303,1,0
5365,5,1


In [0]:
x_eval.head()

Unnamed: 0,FTHG,FTAG
2946,2,0
3899,1,0
744,0,2
3389,0,2
5859,2,2


In [0]:
x_test.head()

Unnamed: 0,FTHG,FTAG
70,1,2
92,2,1
97,1,3
318,1,1
334,1,3


In [0]:
def input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None):
    """Trains a linear regression model of one feature.
  
    Args:
      features: pandas DataFrame of features
      targets: pandas DataFrame of targets
      batch_size: Size of batches to be passed to the model
      shuffle: True or False. Whether to shuffle the data.
      num_epochs: Number of epochs for which data should be repeated. None = repeat indefinitely
    Returns:
      Tuple of (features, labels) for next data batch
    """
 
    # Construct a dataset, and configure batching/repeating
    features = {key:np.array(value) for key,value in dict(features).items()} 
  
    ds = Dataset.from_tensor_slices((features, targets))
    ds = ds.batch(batch_size).repeat(num_epochs)
    
    # Shuffle the data, if specified
    if shuffle:
      ds = ds.shuffle(buffer_size=10000)
    
    # Return the next batch of data
    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels

In [0]:
train_input_fn = lambda: input_fn(x_train, y_train)

In [0]:
train_perdict_input_fn = lambda: input_fn(x_eval, y_eval, num_epochs=1, shuffle=False)

In [0]:
eval_perdict_input_fn = lambda: input_fn(x_eval, y_eval, num_epochs=1, shuffle=False)

In [0]:
test_perdict_input_fun = lambda: input_fn(x_test, y_test, num_epochs=1, shuffle=False)

In [0]:
STEPS = 5000  # Steps of train loop.
HIDDEN = [1000, 1000, 1000, 1000]
PERIODS = 10
STEPS_PER_PERIOD = STEPS / PERIODS

In [0]:
feature_cols = [
  tf.feature_column.numeric_column("FTHG"),
  tf.feature_column.numeric_column("FTAG")
]

In [0]:
model = tf.estimator.DNNClassifier(
    feature_columns = feature_cols,
    hidden_units = HIDDEN,
    n_classes = len(result_classes)
)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmp6kfudqxm', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fbd9dc44518>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [0]:
for period in range(0, PERIODS):
  model.train(input_fn=train_input_fn, steps=STEPS)
  train_predict = model.predict(input_fn=train_perdict_input_fn)
  eval_predict = model.predict(input_fn=eval_perdict_input_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmp6kfudqxm/model.ckpt.
INFO:tensorflow:loss = 1.094349, step = 0
INFO:tensorflow:global_step/sec: 170.772
INFO:tensorflow:loss = 0.024890399, step = 100 (0.593 sec)
INFO:tensorflow:global_step/sec: 191.328
INFO:tensorflow:loss = 0.008236253, step = 200 (0.517 sec)
INFO:tensorflow:global_step/sec: 186.963
INFO:tensorflow:loss = 1.2636105e-05, step = 300 (0.538 sec)
INFO:tensorflow:global_step/sec: 190.378
INFO:tensorflow:loss = 0.0019343253, step = 400 (0.529 sec)
INFO:tensorflow:global_step/sec: 195.558
INFO:tensorflow:loss = 0.0012399612, step = 500 (0.508 sec)
INFO:tensorflow:global_step/sec: 198.526
INFO:tensorflow:loss = 0.0, step = 600 (0.503 sec)
INFO:tensorflow:global_step/sec: 201.234
INFO:tens

INFO:tensorflow:loss = 0.0, step = 2800 (0.524 sec)
INFO:tensorflow:global_step/sec: 182.276
INFO:tensorflow:loss = 6.6158966e-05, step = 2900 (0.548 sec)
INFO:tensorflow:global_step/sec: 191.565
INFO:tensorflow:loss = 4.9709037e-05, step = 3000 (0.517 sec)
INFO:tensorflow:global_step/sec: 186.838
INFO:tensorflow:loss = 4.8636208e-05, step = 3100 (0.536 sec)
INFO:tensorflow:global_step/sec: 195.556
INFO:tensorflow:loss = 1.1682442e-05, step = 3200 (0.516 sec)
INFO:tensorflow:global_step/sec: 188.53
INFO:tensorflow:loss = 9.417489e-06, step = 3300 (0.530 sec)
INFO:tensorflow:global_step/sec: 195.091
INFO:tensorflow:loss = 0.0, step = 3400 (0.510 sec)
INFO:tensorflow:global_step/sec: 187.928
INFO:tensorflow:loss = 0.00011789104, step = 3500 (0.536 sec)
INFO:tensorflow:global_step/sec: 188.696
INFO:tensorflow:loss = 4.6252135e-05, step = 3600 (0.524 sec)
INFO:tensorflow:global_step/sec: 187.343
INFO:tensorflow:loss = 9.7751135e-06, step = 3700 (0.536 sec)
INFO:tensorflow:global_step/sec: 

INFO:tensorflow:global_step/sec: 200.791
INFO:tensorflow:loss = 0.0, step = 5500 (0.498 sec)
INFO:tensorflow:global_step/sec: 192.661
INFO:tensorflow:loss = 6.413254e-05, step = 5600 (0.515 sec)
INFO:tensorflow:global_step/sec: 198.571
INFO:tensorflow:loss = 2.396078e-05, step = 5700 (0.503 sec)
INFO:tensorflow:global_step/sec: 195.975
INFO:tensorflow:loss = 0.0, step = 5800 (0.512 sec)
INFO:tensorflow:global_step/sec: 193.5
INFO:tensorflow:loss = 4.6491514e-06, step = 5900 (0.520 sec)
INFO:tensorflow:global_step/sec: 195.217
INFO:tensorflow:loss = 0.0, step = 6000 (0.508 sec)
INFO:tensorflow:global_step/sec: 182.515
INFO:tensorflow:loss = 0.00018249277, step = 6100 (0.548 sec)
INFO:tensorflow:global_step/sec: 191.352
INFO:tensorflow:loss = 0.00018058576, step = 6200 (0.527 sec)
INFO:tensorflow:global_step/sec: 191.905
INFO:tensorflow:loss = 0.0, step = 6300 (0.521 sec)
INFO:tensorflow:global_step/sec: 188.264
INFO:tensorflow:loss = 2.324554e-05, step = 6400 (0.529 sec)
INFO:tensorflow

In [0]:
test_predict = model.predict(input_fn=test_perdict_input_fun)

In [0]:
test_predict = np.array([item['classes'][0] for item in test_predict])

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpcvz6rcu9/model.ckpt-50000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [0]:
for clazz in test_predict:
  result = result_classes[int(clazz)]
  print(result)

H
A
D
D
H
D
H
H
A
A
H
D
H
D
A
H
H
H
D
H
H
D
A
H
H
H
A
D
H
H
H
H
D
A
A
A
H
H
D
A
D
D
H
A
H
A
H
D
H
H
A
H
H
A
A
A
D
D
H
A
H
D
H
D
H
A
A
D
H
H
D
H
H
H
H
H
H
D
H
D
H
H
H
H
H
H
D
H
D
H
H
H
H
D
D
H
H
H
A
D
D
H
H
A
H
H
D
A
H
A
H
H
D
A
D
H
A
A
H
D
A
D
H
H
H
D
A
A
H
A
A
H
A
H
D
H
A
A
A
H
H
A
H
H
H
D
H
H
H
A
D
H
A
