# Apply to Other Datasets

## [Task 4] Create a Model of House Prices

<br />

"House Prices: Advanced Regression Techniques" on Kaggle


https://www.kaggle.com/c/house-prices-advanced-regression-techniques/data

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf

In [2]:
# Read the csv file

df = pd.read_csv("train.csv")

In [3]:
df.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,0,,,,0,12,2008,WD,Normal,250000


In [4]:
# Extract conditions

X = np.log(df[["GrLivArea", "YearBuilt"]])
y = np.log(df["SalePrice"])

y = np.array(y)
X = np.array(X)

y = y.astype(np.int)[:, np.newaxis]

In [5]:
# Split the dataframe into train and test datasets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [6]:
# Split the train dataset into train and validation datasets again

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

In [7]:
class GetMiniBatch:
    """
    Iterator to get mini-batches
    
    Parameters
    ----------
    X : ndarray, shape (n_samples, n_features)
        Train data
    
    y : ndarray, shape (n_samples, 1)
        Correct values
    
    batch_size : int
        Size of a batch
    
    seed : int
        Seed of random values of NumPy
    """
    
    def __init__(self, X, y, batch_size=10, seed=0):
        self.batch_size = batch_size
        np.random.seed(seed)
        shuffle_index = np.random.permutation(np.arange(X.shape[0]))
        self.X = X[shuffle_index]
        self.y = y[shuffle_index]
        self._stop = np.ceil(X.shape[0]/self.batch_size).astype(np.int)
    
    
    def __len__(self):
        return self._stop
    
    
    def __getitem__(self,item):
        p0 = item*self.batch_size
        p1 = item*self.batch_size + self.batch_size
        
        return self.X[p0:p1], self.y[p0:p1]
    
    
    def __iter__(self):
        self._counter = 0
        
        return self
    
    
    def __next__(self):
        if self._counter >= self._stop:
            raise StopIteration()
        
        p0 = self._counter*self.batch_size
        p1 = self._counter*self.batch_size + self.batch_size
        
        self._counter += 1
        
        return self.X[p0:p1], self.y[p0:p1]

In [8]:
# Set hyperparameters

learning_rate = 0.01
batch_size = 100
num_epochs = 100

In [9]:
n_hidden1 = 50
n_hidden2 = 100
n_input = X_train.shape[1]
n_samples = X_train.shape[0]
n_classes = 1

In [10]:
# Decide shapes of arguments given to a dataflow graph

X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])

In [11]:
# Mini-batch interator of train dataset

get_mini_batch_train = GetMiniBatch(X_train, y_train, batch_size=batch_size)

In [12]:
def example_net(x):
    """
    Simple neural network of 3 layers
    """
    
    # Declaration of weights and biases
    weights = {
        'w1': tf.Variable(tf.random_normal([n_input, n_hidden1])),
        'w2': tf.Variable(tf.random_normal([n_hidden1, n_hidden2])),
        'w3': tf.Variable(tf.random_normal([n_hidden2, n_classes]))
    }
    
    biases = {
        'b1': tf.Variable(tf.random_normal([n_hidden1])),
        'b2': tf.Variable(tf.random_normal([n_hidden2])),
        'b3': tf.Variable(tf.random_normal([n_classes]))
    }
    
    # 1st layer
    layer_1 = tf.add(tf.matmul(x, weights['w1']), biases['b1'])
    layer_1 = tf.nn.relu(layer_1)
    
    # 2nd layer
    layer_2 = tf.add(tf.matmul(layer_1, weights['w2']), biases['b2'])
    layer_2 = tf.nn.relu(layer_2)
    
    # 3rd layer
    layer_output = tf.matmul(layer_2, weights['w3']) + biases['b3']
    
    return layer_output

In [13]:
# Read network construction

logits = example_net(X)

In [14]:
# Objective function

loss_op = tf.reduce_mean(tf.losses.mean_squared_error(labels=Y, predictions=logits))

In [15]:
# Optimization method

optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

In [16]:
# Initialize variables

init = tf.global_variables_initializer()

In [17]:
# Run the dataflow graph

with tf.Session() as sess:
    sess.run(init)
    
    # Loop per epoch
    for epoch in range(num_epochs):
        # Loop per mini-batch
        for mini_batch_x, mini_batch_y in get_mini_batch_train:
            sess.run(train_op, feed_dict={X: mini_batch_x, Y: mini_batch_y})
            loss = sess.run(loss_op, feed_dict={X: mini_batch_x, Y: mini_batch_y})
        
        val_loss = sess.run(loss_op, feed_dict={X: X_val, Y: y_val})
        print("Epoch {}, loss : {:.4f}, val_loss : {:.4f}".format(epoch, loss, val_loss))

Epoch 0, loss : 10968.9199, val_loss : 11005.5977
Epoch 1, loss : 4816.8013, val_loss : 4809.9321
Epoch 2, loss : 2016.2709, val_loss : 2030.7031
Epoch 3, loss : 798.6362, val_loss : 796.0389
Epoch 4, loss : 304.6792, val_loss : 311.2219
Epoch 5, loss : 122.6680, val_loss : 123.4213
Epoch 6, loss : 58.7671, val_loss : 62.0462
Epoch 7, loss : 44.7531, val_loss : 46.9819
Epoch 8, loss : 43.5897, val_loss : 45.5079
Epoch 9, loss : 41.1772, val_loss : 43.4616
Epoch 10, loss : 37.8930, val_loss : 39.4873
Epoch 11, loss : 34.5992, val_loss : 36.1622
Epoch 12, loss : 31.9043, val_loss : 33.2464
Epoch 13, loss : 29.2446, val_loss : 30.2027
Epoch 14, loss : 26.3579, val_loss : 27.1030
Epoch 15, loss : 23.0749, val_loss : 23.7653
Epoch 16, loss : 19.1026, val_loss : 19.8356
Epoch 17, loss : 14.4634, val_loss : 15.2764
Epoch 18, loss : 10.0364, val_loss : 10.6936
Epoch 19, loss : 6.3160, val_loss : 6.6783
Epoch 20, loss : 3.3895, val_loss : 3.7833
Epoch 21, loss : 1.1584, val_loss : 1.2494
Epoch 