[View in Colaboratory](https://colab.research.google.com/github/XinyueZ/tf/blob/master/ipynb/price_rooms_estimator.ipynb)

In [0]:
import tensorflow as tf
from tensorflow.python.data import Dataset
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder

In [0]:
STEPS = 5000  # Steps of train loop.
HIDDEN = [1000, 1000, 1000, 1000]
PERIODS = 10
STEPS_PER_PERIOD = STEPS / PERIODS
LEARNING_RATE = 0.000001

In [0]:
tf.logging.set_verbosity(tf.logging.INFO)

In [466]:
source = pd.read_csv("https://dl.dropbox.com/s/e2tcsgbcp2in5h5/price_rooms_area.tsv", sep="\t")
source.head()

Unnamed: 0,PRICE,ROOMS,AREA,OBJECTCAT,MARKETINGTYPE
0,5,0,249,Sonstiges Büro-/Praxisobjekt,Miete
1,595000,9,440,Besondere Immobilie,Kauf
2,199000,6,13575,Stadthaus,Kauf
3,269000,9,20357,Stadthaus,Kauf
4,562,3,6315,Etagenwohnung,Miete


In [0]:
def clean_data(source):
  df = source
  df = df[pd.notnull(df["PRICE"])]
  df = df[pd.notnull(df["ROOMS"])]
  df = df[pd.notnull(df["AREA"])]
  df = df.drop(["OBJECTCAT"], axis=1)
  df = df[~df["MARKETINGTYPE"].isin(["Miete"])]
  df = df.drop(["MARKETINGTYPE"], axis=1)
  
  df["ROOMS"] = df["ROOMS"].str.replace(',','.').astype(float)
  df["PRICE"] = df["PRICE"].str.replace(',','.').astype(float)
  df["AREA"] = df["AREA"].str.replace(',','.').astype(float)
  df = df[~(df[["PRICE", "ROOMS","AREA"]] == 0)]
  
  df = df[pd.notna(df["PRICE"])]
  df = df[pd.notna(df["ROOMS"])]
  df = df[pd.notna(df["AREA"])]

  return df

In [0]:
df = clean_data(source)

In [469]:
df.head()

Unnamed: 0,PRICE,ROOMS,AREA
1,595000.0,9.0,440.0
2,199000.0,6.0,135.75
3,269000.0,9.0,203.57
5,299000.0,5.0,140.0
6,685000.0,8.0,380.0


In [0]:
def make_dataset_and_labels_and_class_num(df, label_name):
  """This method will prepare dataset, labels for train, evaluation, test.
     Args:
      df: DataFrame format of datasource.
      label_name: The name of column in datasource which will be as target for train.
     Return: 
       Tuple of (ds_train, ds_eval, ds_test, label_train, label_eval, label_test)
      
  """
  target_label_col = label_name
  
  random_seed = None
  np.random.seed(random_seed)
  
  ds_train = df.sample(frac=0.9, random_state=random_seed)
  label_train = ds_train[target_label_col]
  
  ds_rest = df.drop(ds_train.index)
  
  ds_eval = ds_rest.sample(frac=0.8, random_state=random_seed)
  label_eval = ds_eval[target_label_col]
  
  ds_test = ds_rest.drop(ds_eval.index)
  label_test = ds_test[target_label_col]
  
  return ds_train[["ROOMS", "AREA"]], ds_eval[["ROOMS", "AREA"]], ds_test[["ROOMS", "AREA"]], label_train, label_eval, label_test

In [0]:
x_train, x_eval, x_test, y_train, y_eval, y_test = make_dataset_and_labels_and_class_num(df, "PRICE")

In [472]:
x_train.describe()

Unnamed: 0,ROOMS,AREA
count,146844.0,146844.0
mean,5.164263,191.375524
std,83.591309,2848.104817
min,1.0,0.01
25%,3.0,80.3875
50%,4.0,122.0
75%,6.0,180.6175
max,31950.0,710000.0


In [473]:
x_eval.describe()

Unnamed: 0,ROOMS,AREA
count,13053.0,13053.0
mean,4.895082,172.201754
std,5.273202,725.926773
min,1.0,1.0
25%,3.0,80.82
50%,4.0,123.0
75%,6.0,180.0
max,400.0,75000.0


In [474]:
x_test.describe()

Unnamed: 0,ROOMS,AREA
count,3263.0,3263.0
mean,4.906531,162.744986
std,5.169354,191.794803
min,0.01,1.0
25%,3.0,79.825
50%,4.0,121.0
75%,5.5,180.0
max,183.0,4324.0


In [475]:
x_train.head()

Unnamed: 0,ROOMS,AREA
178660,3.0,66.0
48509,3.0,109.84
54125,5.0,134.0
98691,5.0,134.0
9447,3.0,118.0


In [476]:
x_eval.head()

Unnamed: 0,ROOMS,AREA
92752,4.0,85.0
188652,22.0,625.0
19358,4.0,206.0
196155,6.0,267.0
264569,5.0,133.0


In [477]:
x_test.head()

Unnamed: 0,ROOMS,AREA
43,2.0,66.0
123,1.0,30.54
249,14.0,180.0
258,4.0,125.0
277,3.0,74.0


In [0]:
def input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None):
    """Trains a linear regression model of one feature.
  
    Args:
      features: pandas DataFrame of features
      targets: pandas DataFrame of targets
      batch_size: Size of batches to be passed to the model
      shuffle: True or False. Whether to shuffle the data.
      num_epochs: Number of epochs for which data should be repeated. None = repeat indefinitely
    Returns:
      Tuple of (features, labels) for next data batch
    """
 
    # Construct a dataset, and configure batching/repeating
    features = {key:np.array(value) for key,value in dict(features).items()} 
  
    ds = Dataset.from_tensor_slices((features, targets))
    ds = ds.batch(batch_size).repeat(num_epochs)
    
    # Shuffle the data, if specified
    if shuffle:
      ds = ds.shuffle(buffer_size=10000)
    
    # Return the next batch of data
    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels

In [0]:
train_input_fn = lambda: input_fn(x_train, y_train)

In [0]:
train_perdict_input_fn = lambda: input_fn(x_eval, y_eval, num_epochs=1, shuffle=False)

In [0]:
eval_perdict_input_fn = lambda: input_fn(x_eval, y_eval, num_epochs=1, shuffle=False)

In [0]:
test_perdict_input_fun = lambda: input_fn(x_test, y_test, num_epochs=1, shuffle=False)

In [0]:
feature_cols = [
  tf.feature_column.numeric_column("ROOMS"),
  tf.feature_column.numeric_column("AREA")
]

In [0]:
train_optimizer = tf.train.GradientDescentOptimizer(learning_rate=LEARNING_RATE)
train_optimizer = tf.contrib.estimator.clip_gradients_by_norm(train_optimizer, 5.0)

In [485]:
model = tf.estimator.DNNRegressor(
    feature_columns = feature_cols,
    hidden_units = HIDDEN,
    optimizer = train_optimizer
)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmppiit59xr', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f17ea069a20>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [486]:
for period in range(0, PERIODS):
  model.train(input_fn=train_input_fn, steps=STEPS)
  train_predict = model.predict(input_fn=train_perdict_input_fn)
  eval_predict = model.predict(input_fn=eval_perdict_input_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmppiit59xr/model.ckpt.
INFO:tensorflow:loss = 16641044000.0, step = 0
INFO:tensorflow:global_step/sec: 124.637
INFO:tensorflow:loss = 14161316000.0, step = 100 (0.817 sec)
INFO:tensorflow:global_step/sec: 183.307
INFO:tensorflow:loss = 421206850000.0, step = 200 (0.534 sec)
INFO:tensorflow:global_step/sec: 173.541
INFO:tensorflow:loss = 2102505100000.0, step = 300 (0.578 sec)
INFO:tensorflow:global_step/sec: 188.566
INFO:tensorflow:loss = 249003380000.0, step = 400 (0.536 sec)
INFO:tensorflow:global_step/sec: 159.061
INFO:tensorflow:loss = 14400103000.0, step = 500 (0.625 sec)
INFO:tensorflow:global_step/sec: 200.345
INFO:tensorflow:loss = 232324810000.0, step = 600 (0.494 sec)
INFO:tensorflow:global_s

INFO:tensorflow:global_step/sec: 188.233
INFO:tensorflow:loss = 39203870000.0, step = 2800 (0.531 sec)
INFO:tensorflow:global_step/sec: 173.833
INFO:tensorflow:loss = 50624782000.0, step = 2900 (0.576 sec)
INFO:tensorflow:global_step/sec: 155.43
INFO:tensorflow:loss = 476095000000.0, step = 3000 (0.643 sec)
INFO:tensorflow:global_step/sec: 141.892
INFO:tensorflow:loss = 62499725000.0, step = 3100 (0.713 sec)
INFO:tensorflow:global_step/sec: 123.334
INFO:tensorflow:loss = 16640891000.0, step = 3200 (0.805 sec)
INFO:tensorflow:global_step/sec: 177.025
INFO:tensorflow:loss = 35720757000.0, step = 3300 (0.565 sec)
INFO:tensorflow:global_step/sec: 196.063
INFO:tensorflow:loss = 62424506000.0, step = 3400 (0.509 sec)
INFO:tensorflow:global_step/sec: 204.642
INFO:tensorflow:loss = 2915970600.0, step = 3500 (0.489 sec)
INFO:tensorflow:global_step/sec: 193.759
INFO:tensorflow:loss = 476097150000.0, step = 3600 (0.518 sec)
INFO:tensorflow:global_step/sec: 198.564
INFO:tensorflow:loss = 960995000

INFO:tensorflow:global_step/sec: 184.888
INFO:tensorflow:loss = 3135855600.0, step = 5400 (0.542 sec)
INFO:tensorflow:global_step/sec: 193.54
INFO:tensorflow:loss = 32399320000.0, step = 5500 (0.519 sec)
INFO:tensorflow:global_step/sec: 192.865
INFO:tensorflow:loss = 72844660000.0, step = 5600 (0.517 sec)
INFO:tensorflow:global_step/sec: 180.107
INFO:tensorflow:loss = 8648766000.0, step = 5700 (0.557 sec)
INFO:tensorflow:global_step/sec: 193.587
INFO:tensorflow:loss = 121799360000.0, step = 5800 (0.516 sec)
INFO:tensorflow:global_step/sec: 203.903
INFO:tensorflow:loss = 720795300000.0, step = 5900 (0.488 sec)
INFO:tensorflow:global_step/sec: 199.962
INFO:tensorflow:loss = 30624540000.0, step = 6000 (0.504 sec)
INFO:tensorflow:global_step/sec: 153.89
INFO:tensorflow:loss = 9800669000.0, step = 6100 (0.651 sec)
INFO:tensorflow:global_step/sec: 134.638
INFO:tensorflow:loss = 518383700000.0, step = 6200 (0.751 sec)
INFO:tensorflow:global_step/sec: 152.743
INFO:tensorflow:loss = 10239742000

INFO:tensorflow:loss = 4760645000.0, step = 8400 (0.486 sec)
INFO:tensorflow:global_step/sec: 219.394
INFO:tensorflow:loss = 202496150000.0, step = 8500 (0.453 sec)
INFO:tensorflow:global_step/sec: 222.395
INFO:tensorflow:loss = 340463780000.0, step = 8600 (0.454 sec)
INFO:tensorflow:global_step/sec: 218.388
INFO:tensorflow:loss = 5062377600000.0, step = 8700 (0.454 sec)
INFO:tensorflow:global_step/sec: 208.806
INFO:tensorflow:loss = 178080240000.0, step = 8800 (0.484 sec)
INFO:tensorflow:global_step/sec: 220.216
INFO:tensorflow:loss = 3843837700.0, step = 8900 (0.453 sec)
INFO:tensorflow:global_step/sec: 221.517
INFO:tensorflow:loss = 19599309000.0, step = 9000 (0.455 sec)
INFO:tensorflow:global_step/sec: 214.568
INFO:tensorflow:loss = 121797796000.0, step = 9100 (0.465 sec)
INFO:tensorflow:global_step/sec: 210.315
INFO:tensorflow:loss = 29755818000.0, step = 9200 (0.473 sec)
INFO:tensorflow:global_step/sec: 208.697
INFO:tensorflow:loss = 4224681700.0, step = 9300 (0.479 sec)
INFO:ten

INFO:tensorflow:loss = 302495800000.0, step = 11000 (0.474 sec)
INFO:tensorflow:global_step/sec: 210.181
INFO:tensorflow:loss = 53218087000.0, step = 11100 (0.474 sec)
INFO:tensorflow:global_step/sec: 234.616
INFO:tensorflow:loss = 30621774000.0, step = 11200 (0.422 sec)
INFO:tensorflow:global_step/sec: 240.653
INFO:tensorflow:loss = 12098134000.0, step = 11300 (0.418 sec)
INFO:tensorflow:global_step/sec: 245.411
INFO:tensorflow:loss = 96035340000.0, step = 11400 (0.406 sec)
INFO:tensorflow:global_step/sec: 241.685
INFO:tensorflow:loss = 46223600000.0, step = 11500 (0.413 sec)
INFO:tensorflow:global_step/sec: 238.673
INFO:tensorflow:loss = 159515360000.0, step = 11600 (0.420 sec)
INFO:tensorflow:global_step/sec: 234.109
INFO:tensorflow:loss = 72895900000.0, step = 11700 (0.427 sec)
INFO:tensorflow:global_step/sec: 229.54
INFO:tensorflow:loss = 89390610000.0, step = 11800 (0.434 sec)
INFO:tensorflow:global_step/sec: 220.245
INFO:tensorflow:loss = 1520771200.0, step = 11900 (0.454 sec)
I

INFO:tensorflow:global_step/sec: 235.475
INFO:tensorflow:loss = 359978240000.0, step = 14100 (0.424 sec)
INFO:tensorflow:global_step/sec: 243.096
INFO:tensorflow:loss = 7743267300.0, step = 14200 (0.407 sec)
INFO:tensorflow:global_step/sec: 230.167
INFO:tensorflow:loss = 13223992000.0, step = 14300 (0.439 sec)
INFO:tensorflow:global_step/sec: 244.027
INFO:tensorflow:loss = 4760584700.0, step = 14400 (0.407 sec)
INFO:tensorflow:global_step/sec: 231.417
INFO:tensorflow:loss = 1177201900000.0, step = 14500 (0.430 sec)
INFO:tensorflow:global_step/sec: 198.634
INFO:tensorflow:loss = 82939860000.0, step = 14600 (0.504 sec)
INFO:tensorflow:global_step/sec: 215.064
INFO:tensorflow:loss = 487197280000.0, step = 14700 (0.469 sec)
INFO:tensorflow:global_step/sec: 222.098
INFO:tensorflow:loss = 108877170000.0, step = 14800 (0.446 sec)
INFO:tensorflow:global_step/sec: 220.667
INFO:tensorflow:loss = 71280820000.0, step = 14900 (0.455 sec)
INFO:tensorflow:Saving checkpoints for 15000 into /tmp/tmppii

INFO:tensorflow:global_step/sec: 236.278
INFO:tensorflow:loss = 77054340000.0, step = 16700 (0.425 sec)
INFO:tensorflow:global_step/sec: 236.341
INFO:tensorflow:loss = 47127327000.0, step = 16800 (0.424 sec)
INFO:tensorflow:global_step/sec: 236.944
INFO:tensorflow:loss = 31677910000.0, step = 16900 (0.419 sec)
INFO:tensorflow:global_step/sec: 223.612
INFO:tensorflow:loss = 14882835000.0, step = 17000 (0.451 sec)
INFO:tensorflow:global_step/sec: 218.092
INFO:tensorflow:loss = 35716310000.0, step = 17100 (0.453 sec)
INFO:tensorflow:global_step/sec: 233.175
INFO:tensorflow:loss = 1144882100000.0, step = 17200 (0.430 sec)
INFO:tensorflow:global_step/sec: 242.839
INFO:tensorflow:loss = 7289842000000.0, step = 17300 (0.414 sec)
INFO:tensorflow:global_step/sec: 232.181
INFO:tensorflow:loss = 1689897300000.0, step = 17400 (0.436 sec)
INFO:tensorflow:global_step/sec: 217.053
INFO:tensorflow:loss = 95473580000.0, step = 17500 (0.458 sec)
INFO:tensorflow:global_step/sec: 200.491
INFO:tensorflow:l

INFO:tensorflow:loss = 89987250000.0, step = 19700 (0.406 sec)
INFO:tensorflow:global_step/sec: 240.039
INFO:tensorflow:loss = 31932846000.0, step = 19800 (0.421 sec)
INFO:tensorflow:global_step/sec: 236.983
INFO:tensorflow:loss = 65016603000.0, step = 19900 (0.418 sec)
INFO:tensorflow:Saving checkpoints for 20000 into /tmp/tmppiit59xr/model.ckpt.
INFO:tensorflow:Loss for final step: 202491050000.0.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmppiit59xr/model.ckpt-20000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 20000 into /tmp/tmppiit59xr/model.ckpt.
INFO:tensorflow:loss = 624829700.0, step = 20000
INFO:tensorflow:global_step/sec: 203.882
INFO:tensorflow:loss = 117372430000.0, step = 20100 (0.492 sec)
INFO:tensorflow:global_step/sec: 241.001
INFO:tenso

INFO:tensorflow:loss = 16637747000.0, step = 22300 (0.530 sec)


KeyboardInterrupt: ignored

In [0]:
test_predict = model.predict(input_fn=test_perdict_input_fun)

In [0]:
test_predict = np.array([item['prediction'][0] for item in test_predict])

In [0]:
for clazz in test_predict:
  result = result_classes[int(clazz)]
  print(result)