# Ejemplo de Regresión con TensorFlow

In [1]:
import pandas as pd
import tensorflow as tf

In [2]:
casas = pd.read_csv('precios_casas.csv')

In [3]:
casas.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
0,-114.31,34.19,15.0,5612.0,1283.0,1015.0,472.0,1.4936,66900.0
1,-114.47,34.4,19.0,7650.0,1901.0,1129.0,463.0,1.82,80100.0
2,-114.56,33.69,17.0,720.0,174.0,333.0,117.0,1.6509,85700.0
3,-114.57,33.64,14.0,1501.0,337.0,515.0,226.0,3.1917,73400.0
4,-114.57,33.57,20.0,1454.0,326.0,624.0,262.0,1.925,65500.0


In [4]:
casas_x = casas.drop('median_house_value', axis=1)
casas_x.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income
0,-114.31,34.19,15.0,5612.0,1283.0,1015.0,472.0,1.4936
1,-114.47,34.4,19.0,7650.0,1901.0,1129.0,463.0,1.82
2,-114.56,33.69,17.0,720.0,174.0,333.0,117.0,1.6509
3,-114.57,33.64,14.0,1501.0,337.0,515.0,226.0,3.1917
4,-114.57,33.57,20.0,1454.0,326.0,624.0,262.0,1.925


In [5]:
casas_y = casas['median_house_value']
casas_y

0         66900.0
1         80100.0
2         85700.0
3         73400.0
4         65500.0
5         74000.0
6         82400.0
7         48500.0
8         58400.0
9         48100.0
10        86500.0
11        62000.0
12        48600.0
13        70400.0
14        45000.0
15        69100.0
16        94900.0
17        25000.0
18        44000.0
19        27500.0
20        44400.0
21        59200.0
22        50000.0
23        71300.0
24        53500.0
25       100000.0
26        71100.0
27        80900.0
28        68600.0
29        74300.0
           ...   
16970     75500.0
16971     62500.0
16972     70500.0
16973     68300.0
16974     81300.0
16975     82800.0
16976    116100.0
16977     86400.0
16978     70500.0
16979     70200.0
16980     67000.0
16981     72200.0
16982    107000.0
16983     74600.0
16984     70000.0
16985     69000.0
16986     90100.0
16987     68400.0
16988     66900.0
16989     58100.0
16990     78300.0
16991     73200.0
16992     50800.0
16993    106700.0
16994     

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
X_train, X_test, y_train, y_test = train_test_split(casas_x, casas_y, test_size = 0.3)

In [8]:
X_train.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income
5179,-118.13,33.91,35.0,561.0,104.0,261.0,105.0,4.9375
4836,-118.09,33.84,23.0,4412.0,910.0,2380.0,825.0,4.54
13559,-121.97,37.53,35.0,2277.0,420.0,1353.0,413.0,4.75
16222,-122.48,37.76,50.0,2236.0,484.0,1171.0,467.0,4.0977
6391,-118.26,34.01,37.0,2451.0,668.0,2824.0,598.0,1.9074


In [9]:
from sklearn.preprocessing import MinMaxScaler

In [10]:
normalizador = MinMaxScaler()

In [11]:
normalizador.fit(X_train)

MinMaxScaler(copy=True, feature_range=(0, 1))

In [12]:
X_train = pd.DataFrame(data= normalizador.transform(X_train), columns=X_train.columns, index=X_train.index)

In [13]:
X_train

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income
5179,0.619522,0.144681,0.666667,0.017134,0.015831,0.007231,0.016941,0.306037
4836,0.623506,0.137234,0.431373,0.135172,0.140928,0.066622,0.135362,0.278624
13559,0.237052,0.529787,0.666667,0.069732,0.064877,0.037837,0.067599,0.293106
16222,0.186255,0.554255,0.960784,0.068475,0.074810,0.032736,0.076480,0.248121
6391,0.606574,0.155319,0.705882,0.075065,0.103368,0.079066,0.098026,0.097068
13653,0.235060,0.575532,0.313725,0.084414,0.065342,0.034334,0.069737,0.350843
10961,0.343625,0.787234,0.372549,0.041287,0.036629,0.016761,0.033059,0.209776
8647,0.578685,0.173404,0.470588,0.059341,0.070619,0.035791,0.069572,0.238845
5913,0.611554,0.222340,0.294118,0.078805,0.065963,0.035595,0.069737,0.375919
1088,0.721116,0.020213,0.470588,0.020904,0.029179,0.018554,0.030428,0.134653


In [14]:
X_test = pd.DataFrame(data= normalizador.transform(X_test), columns=X_test.columns, index=X_test.index)

In [15]:
X_test.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income
7766,0.594622,0.134043,0.372549,0.060475,0.047183,0.019619,0.047697,0.55511
4364,0.630478,0.148936,0.72549,0.065502,0.065808,0.031811,0.067434,0.2615
5134,0.619522,0.173404,1.0,0.094774,0.080242,0.036604,0.079934,0.408436
5097,0.620518,0.141489,0.686275,0.033134,0.033525,0.015527,0.034211,0.177901
7056,0.600598,0.169149,0.431373,0.140138,0.220549,0.045433,0.163322,0.248038


In [16]:
casas.columns

Index(['longitude', 'latitude', 'housing_median_age', 'total_rooms',
       'total_bedrooms', 'population', 'households', 'median_income',
       'median_house_value'],
      dtype='object')

In [17]:
longitude = tf.feature_column.numeric_column('longitude')
latitude = tf.feature_column.numeric_column('latitude')
housing_median_age = tf.feature_column.numeric_column('housing_median_age')
total_rooms = tf.feature_column.numeric_column('total_rooms')
total_bedrooms = tf.feature_column.numeric_column('total_bedrooms')
population = tf.feature_column.numeric_column('population')
households = tf.feature_column.numeric_column('households')
median_income = tf.feature_column.numeric_column('median_income')

In [18]:
columnas = [longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income]

In [19]:
funcion_entrada = tf.estimator.inputs.pandas_input_fn(x=X_train, y=y_train, batch_size=10, num_epochs=1000, shuffle=True)

In [20]:
modelo = tf.estimator.DNNRegressor(hidden_units=[10,10,10], feature_columns=columnas)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\Cata\\AppData\\Local\\Temp\\tmpjet8is0v', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x00000141D5AA2550>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [21]:
modelo.train(input_fn = funcion_entrada, steps=8000) 

Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
INFO:tensorflow:Calling model_fn.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Use `tf.cast` instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
INFO:tensorflow:Saving checkpoints for 0 into C:\Users\Cata\AppData\Local\Temp\tmpjet8is0v\model.ckpt.
INFO:tensorflow:loss = 457032070000.0, step = 1
INFO:tensorflow:global_step/sec: 203.79
INFO:tensorflow:loss = 617723400000.0, step = 101 (0.491 sec)
INFO:tensorflow:global_step/sec: 464.145
INFO:tensorflow:loss = 456007420000.0, step = 201 (0.212 sec)
INFO:tensorflow:global_step/sec: 470.033
INFO:tensorflow:loss = 352229600000.0, step = 301 (0.214 sec)
INFO:tensorflow:global_step/sec: 490.268
INFO:tensorflow:loss = 106991640000.0, step = 401 (0.202 sec)
INFO:tensorflow:global_step/sec: 484.386
INFO:tensorflow:l

INFO:tensorflow:global_step/sec: 328.745
INFO:tensorflow:loss = 104736326000.0, step = 6501 (0.306 sec)
INFO:tensorflow:global_step/sec: 282.643
INFO:tensorflow:loss = 88852890000.0, step = 6601 (0.356 sec)
INFO:tensorflow:global_step/sec: 277.753
INFO:tensorflow:loss = 29959377000.0, step = 6701 (0.357 sec)
INFO:tensorflow:global_step/sec: 326.964
INFO:tensorflow:loss = 140889750000.0, step = 6801 (0.307 sec)
INFO:tensorflow:global_step/sec: 189.541
INFO:tensorflow:loss = 174631830000.0, step = 6901 (0.528 sec)
INFO:tensorflow:global_step/sec: 253.843
INFO:tensorflow:loss = 137785340000.0, step = 7001 (0.393 sec)
INFO:tensorflow:global_step/sec: 234.269
INFO:tensorflow:loss = 47757330000.0, step = 7101 (0.429 sec)
INFO:tensorflow:global_step/sec: 248.097
INFO:tensorflow:loss = 51555918000.0, step = 7201 (0.401 sec)
INFO:tensorflow:global_step/sec: 262.37
INFO:tensorflow:loss = 67118227000.0, step = 7301 (0.381 sec)
INFO:tensorflow:global_step/sec: 274.705
INFO:tensorflow:loss = 416819

<tensorflow_estimator.python.estimator.canned.dnn.DNNRegressor at 0x141d5ab5f98>

In [22]:
funcion_prediccion = tf.estimator.inputs.pandas_input_fn(x=X_test, batch_size=10, num_epochs=1, shuffle=False)

In [23]:
generador_predicciones = modelo.predict(input_fn = funcion_prediccion)

In [24]:
predicciones = list(generador_predicciones)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from C:\Users\Cata\AppData\Local\Temp\tmpjet8is0v\model.ckpt-8000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [25]:
predicciones

[{'predictions': array([258291.64], dtype=float32)},
 {'predictions': array([230837.17], dtype=float32)},
 {'predictions': array([285300.94], dtype=float32)},
 {'predictions': array([203279.55], dtype=float32)},
 {'predictions': array([226014.53], dtype=float32)},
 {'predictions': array([154002.11], dtype=float32)},
 {'predictions': array([221006.7], dtype=float32)},
 {'predictions': array([290041.28], dtype=float32)},
 {'predictions': array([221602.52], dtype=float32)},
 {'predictions': array([214383.03], dtype=float32)},
 {'predictions': array([222536.98], dtype=float32)},
 {'predictions': array([206161.3], dtype=float32)},
 {'predictions': array([233637.36], dtype=float32)},
 {'predictions': array([195279.61], dtype=float32)},
 {'predictions': array([191935.62], dtype=float32)},
 {'predictions': array([195264.08], dtype=float32)},
 {'predictions': array([227336.48], dtype=float32)},
 {'predictions': array([269795.2], dtype=float32)},
 {'predictions': array([224471.27], dtype=float32

In [26]:
predicciones_finales = []
for prediccion in predicciones:
    predicciones_finales.append(prediccion['predictions'])


In [27]:
predicciones_finales

[array([258291.64], dtype=float32),
 array([230837.17], dtype=float32),
 array([285300.94], dtype=float32),
 array([203279.55], dtype=float32),
 array([226014.53], dtype=float32),
 array([154002.11], dtype=float32),
 array([221006.7], dtype=float32),
 array([290041.28], dtype=float32),
 array([221602.52], dtype=float32),
 array([214383.03], dtype=float32),
 array([222536.98], dtype=float32),
 array([206161.3], dtype=float32),
 array([233637.36], dtype=float32),
 array([195279.61], dtype=float32),
 array([191935.62], dtype=float32),
 array([195264.08], dtype=float32),
 array([227336.48], dtype=float32),
 array([269795.2], dtype=float32),
 array([224471.27], dtype=float32),
 array([181052.94], dtype=float32),
 array([199143.11], dtype=float32),
 array([215778.45], dtype=float32),
 array([271416.94], dtype=float32),
 array([173641.86], dtype=float32),
 array([203106.1], dtype=float32),
 array([213064.12], dtype=float32),
 array([257415.3], dtype=float32),
 array([193231.88], dtype=float32

In [28]:
from sklearn.metrics import mean_squared_error

In [29]:
mean_squared_error(y_test, predicciones_finales)**0.5

99327.4860454822