In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import Sequential, layers
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import ReduceLROnPlateau
from time import time
from keras.models import load_model

In [2]:
#gpu memory growth fix
gpus = tf.config.experimental.list_physical_devices('GPU')

if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
              tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

1 Physical GPUs, 1 Logical GPUs


In [3]:
# Loading data
train_data = pd.read_csv('E:/Python/data/House prices/train.csv')
train_data
test_data = pd.read_csv('E:/Python/data/House prices/test.csv')
test_data
# fill na
str_cols = train_data.select_dtypes(include=['object']).columns
train_data.loc[:, str_cols] = train_data.loc[:, str_cols].fillna('None')
str_cols = test_data.select_dtypes(include=['object']).columns
test_data.loc[:, str_cols] = test_data.loc[:, str_cols].fillna('None')
train_data = train_data.fillna(train_data.median(axis=0))
test_data = test_data.fillna(test_data.median(axis=0))
# train and test split
train_X, train_y = train_data.values[:, 1:-1], train_data.values[:, -1]
test_X = test_data.values[:, 1:]
# One hot encoding
enc = OneHotEncoder(sparse=False, handle_unknown='ignore')
train_X_enc = enc.fit_transform(train_X)
test_X_enc = enc.transform(test_X)
# change type of data for nn to work
train_X_enc=np.asarray(train_X_enc).astype(np.float32)
train_y_enc=np.asarray(train_y).astype(np.float32)
test_X_enc=np.asarray(test_X_enc).astype(np.float32)

In [6]:
from sklearn.model_selection import train_test_split, KFold
import scipy
first_col = True
cross_fold = KFold(n_splits = 7, shuffle=True)
for train_index, test_index in cross_fold.split(train_X_enc):
    validation_X, validation_y = train_X_enc[test_index], train_y_enc[test_index]
    train_X, train_y = train_X_enc[train_index], train_y_enc[train_index]
    
    # determine the number of input features
    n_features = train_X_enc.shape[1]
    # define model
    model = Sequential()
    model.add(layers.Dense(512, kernel_initializer='he_normal', input_shape=(n_features,)))
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('elu'))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(128, kernel_initializer='he_normal'))
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('elu'))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(1))
    # compile the model
    model.compile(optimizer=keras.optimizers.Adam(lr=0.5, \
        beta_1=0.9, beta_2=0.999, epsilon=1e-08), loss='msle')
    #callbacks
    # simple early stopping
    es = EarlyStopping(monitor='val_loss', mode='min', min_delta=0.0001, verbose=1, patience=20)
    mc = ModelCheckpoint('best_model.h5', monitor='val_loss', mode='min', verbose=1, save_best_only=True)

    tensorboard = TensorBoard(log_dir="logs/{}".format(time()))
    
    rlrop = ReduceLROnPlateau(monitor='loss', factor=0.2, patience=5, verbose=1)
    
    # fit model and save the best
    history = model.fit(x=train_X, y=train_y, 
#                         steps_per_epoch=steps_per_epoch, 
                        batch_size=32, 
                        epochs=1000, 
                        validation_data=(validation_X, validation_y), 
#                         validation_steps=validation_steps, 
                        shuffle=True, 
                        callbacks=[tensorboard, es, mc, rlrop]
                       )
    saved_model = load_model('best_model.h5')
    
    predict = saved_model.predict(test_X_enc)
#     predict = probs.argmax(axis=1)
    if first_col:
        pr_values = np.array(predict, ndmin=2)
        pr_values = np.transpose(pr_values)
        first_col = False
    else:
        pr_values = np.insert(pr_values, -1, predict, axis=1)
pr_values= np.mean(pr_values, axis=1)

Epoch 1/1000
Epoch 00001: val_loss improved from inf to 15.94839, saving model to best_model.h5
Epoch 2/1000
Epoch 00002: val_loss improved from 15.94839 to 12.39519, saving model to best_model.h5
Epoch 3/1000
Epoch 00003: val_loss improved from 12.39519 to 9.43003, saving model to best_model.h5
Epoch 4/1000
Epoch 00004: val_loss improved from 9.43003 to 7.42318, saving model to best_model.h5
Epoch 5/1000
Epoch 00005: val_loss improved from 7.42318 to 6.00949, saving model to best_model.h5
Epoch 6/1000
Epoch 00006: val_loss improved from 6.00949 to 4.98356, saving model to best_model.h5
Epoch 7/1000
Epoch 00007: val_loss improved from 4.98356 to 4.16356, saving model to best_model.h5
Epoch 8/1000
Epoch 00008: val_loss improved from 4.16356 to 3.54082, saving model to best_model.h5
Epoch 9/1000
Epoch 00009: val_loss improved from 3.54082 to 3.04133, saving model to best_model.h5
Epoch 10/1000
Epoch 00010: val_loss improved from 3.04133 to 2.63157, saving model to best_model.h5
Epoch 11/

Epoch 33/1000
Epoch 00033: val_loss improved from 0.25189 to 0.23184, saving model to best_model.h5
Epoch 34/1000
Epoch 00034: val_loss improved from 0.23184 to 0.21474, saving model to best_model.h5
Epoch 35/1000
Epoch 00035: val_loss improved from 0.21474 to 0.18757, saving model to best_model.h5
Epoch 36/1000
Epoch 00036: val_loss improved from 0.18757 to 0.17617, saving model to best_model.h5
Epoch 37/1000
Epoch 00037: val_loss improved from 0.17617 to 0.16415, saving model to best_model.h5
Epoch 38/1000
Epoch 00038: val_loss improved from 0.16415 to 0.15238, saving model to best_model.h5
Epoch 39/1000
Epoch 00039: val_loss improved from 0.15238 to 0.14140, saving model to best_model.h5
Epoch 40/1000
Epoch 00040: val_loss improved from 0.14140 to 0.14101, saving model to best_model.h5
Epoch 41/1000
Epoch 00041: val_loss improved from 0.14101 to 0.12750, saving model to best_model.h5
Epoch 42/1000
Epoch 00042: val_loss improved from 0.12750 to 0.11336, saving model to best_model.h5


Epoch 66/1000
Epoch 00066: val_loss improved from 0.04948 to 0.04896, saving model to best_model.h5
Epoch 67/1000
Epoch 00067: val_loss improved from 0.04896 to 0.04882, saving model to best_model.h5
Epoch 68/1000
Epoch 00068: val_loss improved from 0.04882 to 0.04878, saving model to best_model.h5
Epoch 69/1000
Epoch 00069: val_loss improved from 0.04878 to 0.04814, saving model to best_model.h5


KeyboardInterrupt: 