## Regression Exercise

In [1]:
import tensorflow as tf
import pandas as pd

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

In [3]:
df = pd.read_csv('California_Housing_Data.csv')

In [4]:
df.head()

Unnamed: 0.1,Unnamed: 0,HouseAge,AveRooms,AveBedrms,Population,AveOccup,MedInc,Latitude,Longitude,medianHouseValue
0,0,41.0,6.984127,1.02381,322.0,2.555556,8.3252,37.88,-122.23,4.526
1,1,21.0,6.238137,0.97188,2401.0,2.109842,8.3014,37.86,-122.22,3.585
2,2,52.0,8.288136,1.073446,496.0,2.80226,7.2574,37.85,-122.24,3.521
3,3,52.0,5.817352,1.073059,558.0,2.547945,5.6431,37.85,-122.25,3.413
4,4,52.0,6.281853,1.081081,565.0,2.181467,3.8462,37.85,-122.25,3.422


In [5]:
df.drop(labels=['Unnamed: 0','Longitude', 'Latitude'], inplace=True, axis=1)

In [6]:
df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
HouseAge,20640.0,28.639486,12.585558,1.0,18.0,29.0,37.0,52.0
AveRooms,20640.0,5.429,2.474173,0.846154,4.440716,5.229129,6.052381,141.909091
AveBedrms,20640.0,1.096675,0.473911,0.333333,1.006079,1.04878,1.099526,34.066667
Population,20640.0,1425.476744,1132.462122,3.0,787.0,1166.0,1725.0,35682.0
AveOccup,20640.0,3.070655,10.38605,0.692308,2.429741,2.818116,3.282261,1243.333333
MedInc,20640.0,3.870671,1.899822,0.4999,2.5634,3.5348,4.74325,15.0001
medianHouseValue,20640.0,2.068558,1.153956,0.14999,1.196,1.797,2.64725,5.00001


In [7]:
y_data = df['medianHouseValue'] 

In [8]:
x_data = df.drop(['medianHouseValue'],axis=1)

In [9]:
X_train, X_test, y_train, y_test = train_test_split(x_data,y_data,test_size=0.3)

## Scale the Feature Data

In [10]:
scaler = MinMaxScaler()

In [11]:
scaler.fit(X_train)

MinMaxScaler(copy=True, feature_range=(0, 1))

In [12]:
X_train = pd.DataFrame(data=scaler.transform(X_train),columns = X_train.columns,index=X_train.index)

In [13]:
X_test = pd.DataFrame(data=scaler.transform(X_test),columns = X_test.columns,index=X_test.index)

## Create Feature Columns

In [14]:
df.columns

Index(['HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'MedInc',
       'medianHouseValue'],
      dtype='object')

In [15]:
age = tf.feature_column.numeric_column('HouseAge')
rooms = tf.feature_column.numeric_column('AveRooms')
bedrooms = tf.feature_column.numeric_column('AveBedrms')
population = tf.feature_column.numeric_column('Population')
households = tf.feature_column.numeric_column('AveOccup')
income = tf.feature_column.numeric_column('MedInc')

In [16]:
fcolumns = [age,rooms,bedrooms,population,households,income]

In [17]:
input_function = tf.estimator.inputs.pandas_input_fn(x=X_train, y=y_train, batch_size=10, num_epochs=1000, shuffle=True)

In [18]:
model = tf.estimator.DNNRegressor(hidden_units=[4,6,4,6,4],feature_columns=fcolumns)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\User\\AppData\\Local\\Temp\\tmpp660fo33', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000000E5F4ABB00>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [19]:
model.train(input_fn=input_function,steps=1000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into C:\Users\User\AppData\Local\Temp\tmpp660fo33\model.ckpt.
INFO:tensorflow:loss = 41.621193, step = 1
INFO:tensorflow:global_step/sec: 156.39
INFO:tensorflow:loss = 7.915587, step = 101 (0.641 sec)
INFO:tensorflow:global_step/sec: 293.059
INFO:tensorflow:loss = 9.605822, step = 201 (0.341 sec)
INFO:tensorflow:global_step/sec: 272.298
INFO:tensorflow:loss = 3.1722503, step = 301 (0.366 sec)
INFO:tensorflow:global_step/sec: 272.298
INFO:tensorflow:loss = 13.925393, step = 401 (0.369 sec)
INFO:tensorflow:global_step/sec: 273.79
INFO:tensorflow:loss = 3.4853077, step = 501 (0.364 sec)
INFO:tensorflow:global_step/sec: 274.543
INFO:tensorflow:loss = 2.9765654, step = 601 (0.364 sec)
INFO:tensorflow:global_step/sec: 

<tensorflow.python.estimator.canned.dnn.DNNRegressor at 0xe5f4ab630>

In [20]:
predict_input_func = tf.estimator.inputs.pandas_input_fn(x=X_test, batch_size=10, num_epochs=1, shuffle=False)

In [21]:
preds = model.predict(predict_input_func)

In [22]:
predictions = list(preds)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\User\AppData\Local\Temp\tmpp660fo33\model.ckpt-1000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [23]:
final_pred = []
for pred in predictions:
    final_pred.append(pred['predictions'])

In [24]:
final_pred[0:15]

[array([2.0245152], dtype=float32),
 array([1.8955797], dtype=float32),
 array([3.3947508], dtype=float32),
 array([3.0855725], dtype=float32),
 array([2.6384525], dtype=float32),
 array([1.4795125], dtype=float32),
 array([2.6175663], dtype=float32),
 array([2.3830543], dtype=float32),
 array([3.2079687], dtype=float32),
 array([1.4816821], dtype=float32),
 array([3.9559865], dtype=float32),
 array([3.2280762], dtype=float32),
 array([1.115987], dtype=float32),
 array([1.2571497], dtype=float32),
 array([2.3511052], dtype=float32)]

In [25]:
result = mean_squared_error(y_test,final_pred)**0.5
print('RMSE ', result)

RMSE  0.7857419123353376
