# Simple linear regression model using Tensorflow with low level API.

# 0 - IMPORTS

In [54]:
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import warnings

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler 

%matplotlib inline
warnings.filterwarnings('ignore')

# 1 - DATA LOAD

In [55]:
df1 = pd.read_csv('data\house_prices.csv')
df_shape = df1.shape 
print('Rows: {}\nCols: {}'.format(df_shape[0], df_shape[1]))
df1.head()

Rows: 21613
Cols: 21


Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,7129300520,20141013T000000,221900.0,3,1.0,1180,5650,1.0,0,0,...,7,1180,0,1955,0,98178,47.5112,-122.257,1340,5650
1,6414100192,20141209T000000,538000.0,3,2.25,2570,7242,2.0,0,0,...,7,2170,400,1951,1991,98125,47.721,-122.319,1690,7639
2,5631500400,20150225T000000,180000.0,2,1.0,770,10000,1.0,0,0,...,6,770,0,1933,0,98028,47.7379,-122.233,2720,8062
3,2487200875,20141209T000000,604000.0,4,3.0,1960,5000,1.0,0,0,...,7,1050,910,1965,0,98136,47.5208,-122.393,1360,5000
4,1954400510,20150218T000000,510000.0,3,2.0,1680,8080,1.0,0,0,...,8,1680,0,1987,0,98074,47.6168,-122.045,1800,7503


# 2 - DATA TRANSFORMATION

In [107]:
df2 = df1.copy()

In [91]:
scaler_x = StandardScaler()
df2[['bedrooms', 'bathrooms', 'sqft_living',
       'sqft_lot', 'floors', 'waterfront', 'view', 'condition', 'grade',
       'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated', 'zipcode',
       'lat', 'long']] = scaler_x.fit_transform(df2[['bedrooms', 'bathrooms', 'sqft_living',
       'sqft_lot', 'floors', 'waterfront', 'view', 'condition', 'grade',
       'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated', 'zipcode',
       'lat', 'long']])

In [108]:
scaler_y = StandardScaler()
df2[['price']] = scaler_y.fit_transform(df2[['price']])

# 3 - CREATE TRAIN & TEST DATASET

In [93]:
X = df2.drop('price',axis=1)
y = df2.price

In [94]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

# 5 - MULTIPLE LINEAR REGRESSION MODEL

## 5.1 - Create Features

In [95]:
cols_predict = ['bedrooms', 'bathrooms', 'sqft_living',
       'sqft_lot', 'floors', 'waterfront', 'view', 'condition', 'grade',
       'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated', 'zipcode',
       'lat', 'long']

In [96]:
columns = [ tf.feature_column.numeric_column(key = c) for c in cols_predict ]

## 5.2 - Create Regressor

In [None]:
regressor_model = tf.estimator.LinearRegressor(feature_columns=columns)

## 5.3 - Create Train and Test Function 

In [97]:
function_train = tf.estimator.inputs.pandas_input_fn( x=X_train, y=y_train, batch_size=32, num_epochs=None, shuffle=True )

In [98]:
function_test = tf.estimator.inputs.pandas_input_fn( x=X_test, y=y_test, batch_size=32, num_epochs=1000, shuffle=False )

## 5.4 - Train Regressor

In [None]:
regressor_model.train( input_fn=function_train, steps=10000 )

## 5.5 - Train Evaluation

In [None]:
train_metrics = regressor_model.evaluate( input_fn = function_train, steps=10000 )

## 5.6 - Test Evaluation

In [None]:
test_metrics = regressor_model.evaluate( input_fn = function_test, steps=10000 )

In [102]:
train_metrics, test_metrics

({'average_loss': 0.31333804,
  'label/mean': -0.00015695737,
  'loss': 10.026817,
  'prediction/mean': -0.0013197317,
  'global_step': 20000},
 {'average_loss': 0.28932217,
  'label/mean': 0.00052200485,
  'loss': 9.258309,
  'prediction/mean': 0.010321364,
  'global_step': 20000})

## 5.7 - Model Prediction

In [103]:
# Create prediction function
function_predict = tf.estimator.inputs.pandas_input_fn( x=X_test, shuffle=False )

In [104]:
predict = regressor_model.predict( input_fn = function_predict )

In [None]:
predict_values = []
for p in regressor_model.predict( input_fn = function_predict ):
    predict_values.append(p['predictions'])

In [109]:
predict_values = np.array(predict_values).reshape(-1, 1)
predict_values = scaler_y.inverse_transform(predict_values)

In [110]:
y_test_inverse = y_test.values.reshape(-1,1)
y_test_inverse = scaler_y.inverse_transform(y_test_inverse)

In [112]:
mae = mean_absolute_error(y_test_inverse, predict_values)
std = abs(y_test_inverse - predict_values).std()
mse = mean_squared_error(y_test_inverse, predict_values)

print(mae, std, mse)

129647.12314401446 148987.24400943294 39005575417.045555
