In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os 
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error

from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam, SGD, RMSprop
from keras import regularizers
from keras.wrappers.scikit_learn import KerasRegressor

import tensorflow as tf
from tensorflow.compat.v1.keras.backend import set_session

import statistics
import pickle

import warnings
warnings.filterwarnings("ignore")

## Housing Prices

In [None]:
df = pd.read_csv(r'C://Users/Marco/Downloads/train.csv')

In [None]:
df.loc[df['ExterQual'] == 'Ex','ExterQual'] = 5
df.loc[df['ExterQual'] == 'Gd','ExterQual'] = 4
df.loc[df['ExterQual'] == 'TA','ExterQual'] = 3
df.loc[df['ExterQual'] == 'Fa','ExterQual'] = 2

df.loc[df['BsmtFinType1'] == 'GLQ','BsmtFinType1'] = 6
df.loc[df['BsmtFinType1'] == 'ALQ','BsmtFinType1'] = 5
df.loc[df['BsmtFinType1'] == 'BLQ','BsmtFinType1'] = 4
df.loc[df['BsmtFinType1'] == 'Rec','BsmtFinType1'] = 3
df.loc[df['BsmtFinType1'] == 'LwQ','BsmtFinType1'] = 2
df.loc[df['BsmtFinType1'] == 'Unf','BsmtFinType1'] = 1 

df.loc[df['BsmtQual'] == 'Ex','BsmtQual'] = 5
df.loc[df['BsmtQual'] == 'Gd','BsmtQual'] = 4
df.loc[df['BsmtQual'] == 'TA','BsmtQual'] = 3
df.loc[df['BsmtQual'] == 'Fa','BsmtQual'] = 2
df.loc[df['BsmtQual'] == 'Po','BsmtQual'] = 1  

df.loc[df['Functional'] == 'Typ','Functional'] = 7
df.loc[df['Functional'] == 'Min1','Functional'] = 6
df.loc[df['Functional'] == 'Min2','Functional'] = 5
df.loc[df['Functional'] == 'Mod','Functional'] = 4
df.loc[df['Functional'] == 'Maj1','Functional'] = 3
df.loc[df['Functional'] == 'Maj2','Functional'] = 2
df.loc[df['Functional'] == 'Sev','Functional'] = 1

df.loc[df['HeatingQC'] == 'Ex','HeatingQC'] = 5
df.loc[df['HeatingQC'] == 'Gd','HeatingQC'] = 4
df.loc[df['HeatingQC'] == 'TA','HeatingQC'] = 3
df.loc[df['HeatingQC'] == 'Fa','HeatingQC'] = 2
df.loc[df['HeatingQC'] == 'Po','HeatingQC'] = 1  

df.loc[df['FireplaceQu'] == 'Ex','FireplaceQu'] = 5
df.loc[df['FireplaceQu'] == 'Gd','FireplaceQu'] = 4
df.loc[df['FireplaceQu'] == 'TA','FireplaceQu'] = 3
df.loc[df['FireplaceQu'] == 'Fa','FireplaceQu'] = 2
df.loc[df['FireplaceQu'] == 'Po','FireplaceQu'] = 1 


df1=df.select_dtypes(exclude=['object'])

df_dummies=pd.concat([pd.get_dummies(df['ExterQual'], prefix='ExterQual',drop_first=True),
                      pd.get_dummies(df['BsmtQual'], prefix='BsmtQual',drop_first=True),
                      pd.get_dummies(df['HeatingQC'], prefix='HeatingQC',drop_first=True),
                      pd.get_dummies(df['BsmtFinType1'], prefix='BsmtFinType1',drop_first=True),
                      pd.get_dummies(df['Functional'], prefix='Functional',drop_first=True),
                      pd.get_dummies(df['FireplaceQu'], prefix='FireplaceQu',drop_first=True)],axis=1)

df_metric=df1['LotFrontage LotArea YearBuilt GrLivArea FullBath Fireplaces GarageArea SalePrice'.split()]

df_final=pd.concat([df_dummies, df1['LotFrontage LotArea YearBuilt GrLivArea FullBath Fireplaces GarageArea SalePrice'.split()]],axis=1)
df_final = df_final[df_final['GrLivArea'] < 4000]
df_final.dropna(inplace=True)

In [None]:
X = df_final.drop(['SalePrice'], axis=1)
y_level = df_final['SalePrice']
y_log=np.log(df_final['SalePrice'])
from sklearn.model_selection import train_test_split
x_train , x_test , y_level_train , y_level_test = train_test_split(X, y_level, test_size =0.3, random_state =77)
x_train , x_test , y_log_train , y_log_test = train_test_split(X, y_log, test_size =0.3, random_state =77)

from sklearn.preprocessing import StandardScaler
SC= StandardScaler().fit(x_train['LotFrontage LotArea YearBuilt GrLivArea FullBath Fireplaces GarageArea'.split()])
x_train['LotFrontage LotArea YearBuilt GrLivArea FullBath Fireplaces GarageArea'.split()]=SC.transform(x_train['LotFrontage LotArea YearBuilt GrLivArea FullBath Fireplaces GarageArea'.split()])
x_test['LotFrontage LotArea YearBuilt GrLivArea FullBath Fireplaces GarageArea'.split()]=SC.transform(x_test['LotFrontage LotArea YearBuilt GrLivArea FullBath Fireplaces GarageArea'.split()])

In [None]:
x_train

In [None]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True  
config.log_device_placement = True 
sess = tf.compat.v1.Session(config=config)

set_session(sess)

In [None]:
def run_model(model):
    history = model.fit(x_train, y_level_train, batch_size = 256, verbose=1, epochs=500)

def zero_layer(n1, dropout, opt, learning_rate, regularizer1, reg_rate1):
    model = Sequential()
    model.add(Dense(n1, input_dim=32, kernel_initializer='normal', activation='relu', use_bias=True,
                   kernel_regularizer=regularizer1(reg_rate1)))
    model.add(Dropout(dropout))
    model.add(Dense(1, activation='linear', use_bias=True))
    model.compile(optimizer=opt(lr=learning_rate), loss='mean_absolute_error', metrics=[tf.keras.metrics.MeanSquaredError()])
    return model

def one_layer(n1, n2, dropout1, dropout2, opt, learning_rate, regularizer1, regularizer2, reg_rate1, reg_rate2):
    model = Sequential()
    model.add(Dense(n1, input_dim=32, kernel_initializer='normal', activation='relu', use_bias=True, kernel_regularizer=regularizer1(reg_rate1)))
    model.add(Dropout(dropout1))
    model.add(Dense(n2, kernel_initializer='normal', activation='relu', use_bias=True, kernel_regularizer=regularizer2(reg_rate2)))
    model.add(Dropout(dropout2))
    model.add(Dense(1, activation='linear', use_bias=True))
    model.compile(optimizer=opt(lr=learning_rate), loss='mean_absolute_error', metrics=[tf.keras.metrics.MeanSquaredError()])
    return model

def two_layer(n1, n2, n3, dropout1, dropout2, dropout3, opt, learning_rate, regularizer1, regularizer2, regularizer3):
    model = Sequential()
    model.add(Dense(n1, input_dim=32, kernel_initializer='normal', activation='relu', use_bias=True, kernel_regularizer=regularizer1))
    model.add(Dropout(dropout1))
    model.add(Dense(n2, kernel_initializer='normal', activation='relu', use_bias=True, kernel_regularizer=regularizer2))
    model.add(Dropout(dropout2))
    model.add(Dense(n3, kernel_initializer='normal', activation='relu', use_bias=True, kernel_regularizer=regularizer3))
    model.add(Dropout(dropout3))
    model.add(Dense(1, activation='linear', use_bias=True))
    model.compile(optimizer=opt(lr=learning_rate), loss='mean_absolute_error', metrics=[tf.keras.metrics.MeanSquaredError()])
    return model

#### 1. Wie viele Layer / Neuronen pro Layer?

In [None]:
nlist_0 = [82240 + 16448*i for i in range(5)]

results_0 = pd.DataFrame(columns = ['model', 'mse'])

trials_0 = 5

for j in range(trials_0):
    for n in nlist_0:
        d = {}
        model = zero_layer(n, 0, RMSprop, 0.006, regularizers.l1, 0.00)
        run_model(model)
        pred = model.predict(x_test)
        d['model'] = str(n) + '_neurons'
        d['mse'] = np.sqrt(mean_squared_error(y_level_test, pred))
        results_0 = results_0.append(d, ignore_index=True)
        
pickle.dump(results_0, open("results_0.p", "wb" ))

In [None]:
results0_vals = results_0['model'].unique().tolist()

for model in results0_vals:
    mean = results_0['mse'].loc[results_0['model'] == model].mean()
    std = statistics.stdev(results_0['mse'].loc[results_0['model'] == model])
    print(f'model: {model}, mse: {mean}, std: {std}')

In [None]:
nlist_1 = [(2048, 1024), (1024, 512), (512, 256), (256, 128), (128, 64), (64, 32)]

results_1 = pd.DataFrame(columns=['model', 'mse'])

trials_1 = 5

for j in range(trials_1):
    for (n1, n2) in nlist_1:
        d = {}
        model = one_layer(n1, n2, 0, 0, RMSprop, 0.006, regularizers.l1, regularizers.l1, 0.00, 0.00)
        run_model(model)
        pred = model.predict(x_test)
        d['model'] = str(n1) + 'x' + str(n2) + '_neurons'
        d['mse'] = np.sqrt(mean_squared_error(y_level_test, pred))
        results_1 = results_1.append(d, ignore_index=True)

pickle.dump(results_1, open('results_1', 'wb'))    

In [None]:
results1_vals = results_1['model'].unique().tolist()

for model in results1_vals:
    mean = results_1['mse'].loc[results_1['model'] == model].mean()
    std = statistics.stdev(results_1['mse'].loc[results_1['model'] == model])
    print(f'model: {model}, mse: {mean}, std: {std}')

In [None]:
nlist_2 = [(1024, 512, 256), (512, 256, 128), (256, 128, 64), (128, 64, 32), (64, 32, 16)]

results_2 = pd.DataFrame(columns=['model', 'mse'])

trials_2 = 5

for j in range(trials_2):
    for (n1, n2, n3) in nlist_2:
        d = {}
        model = two_layer(n1, n2, n3, 0, 0, 0, RMSprop, 0.006, regularizers.l1, regularizers.l1, regularizers.l1, 0.00, 0.00, 0.00)
        run_model(model)
        pred = model.predict(x_test)
        d['model'] = str(n1) + 'x' + str(n2) + 'x' + str(n3) + '_neurons'
        d['mse'] = np.sqrt(mean_squared_error(y_level_test, pred))
        results_2 = results_2.append(d, ignore_index=True)
        
pickle.dump(results_2, open('results_2.p', 'wb'))

In [None]:
results2_vals = results_2['model'].unique().tolist()

for model in results2_vals:
    mean = results_2['mse'].loc[results_2['model'] == model].mean()
    std = statistics.stdev(results_2['mse'].loc[results_2['model'] == model])
    print(f'model: {model}, mse: {mean}, std: {std}')

####  2. Dropout

In [None]:
dropout_vals = [0.0, 0.1, 0.2, 0.3]
            
dropout_results0 = pd.DataFrame(columns = ['model', 'mse'])

for j in range(5):
    for i in dropout_vals:
        d = {}
        model = zero_layer(98688, i, RMSprop, 0.006, regularizers.l1, 0.00)
        run_model(model)
        pred = model.predict(x_test)
        d['model'] = 'dropout_' + str(i)
        d['mse'] = np.sqrt(mean_squared_error(y_level_test, pred))
        dropout_results0 = dropout_results0.append(d, ignore_index=True)
        
pickle.dump(dropout_results0, open('dropout_results0', 'wb'))

In [None]:
dropout0_vals = dropout_results0['model'].unique().tolist()

for model in dropout0_vals:
    mean = dropout_results0['mse'].loc[dropout_results0['model'] == model].mean()
    std = statistics.stdev(dropout_results0['mse'].loc[dropout_results0['model'] == model])
    print(f'model: {model}, mse: {mean}, std: {std}')

In [None]:
dropout_pairs = []

for i in dropout_vals:
    for j in dropout_vals:
        list1 = [i, j]
        if list1 not in dropout_pairs:
            dropout_pairs.append(list1)
        list2 = [j, i]
        if list2 not in dropout_pairs:
            dropout_pairs.append(list2)

dropout_results1 = pd.DataFrame(columns = ['model', 'mse'])

for j in range(5):
    for i in dropout_pairs:
        d = {}
        model = one_layer(1024, 512, i[0], i[1], RMSprop, 0.006, regularizers.l1, regularizers.l1, 0.00, 0.00)
        run_model(model)
        pred = model.predict(x_test)
        d['model'] = 'dropout_' + str(i[0]) + '_' + str(i[1])
        d['mse'] = np.sqrt(mean_squared_error(y_level_test, pred))
        dropout_results1 = dropout_results1.append(d, ignore_index=True)
        
pickle.dump(dropout_results1, open('dropoute_results1', 'wb'))

In [None]:
dropout1_vals = dropout_results1['model'].unique().tolist()

for model in dropout1_vals:
    mean = dropout_results1['mse'].loc[dropout_results1['model'] == model].mean()
    std = statistics.stdev(dropout_results1['mse'].loc[dropout_results1['model'] == model])
    print(f'model: {model}, mse: {mean}, std: {std}')

##### 3. Optimizer

In [None]:
optimizers = [Adam, RMSprop]
learning_rates = [0.004, 0.005, 0.006, 0.007]

opt_list = [[i, j] for i in optimizers for j in learning_rates]
            
opt_results0 = pd.DataFrame(columns=['model', 'mse'])
            
for j in range(5):
    for i in opt_list:
        d = {}
        model = zero_layer(98688, 0.1, i[0], i[1], regularizers.l1, 0.00)
        run_model(model)
        pred = model.predict(x_test)
        d['model'] = 'zero_layer_' + str(i[0]).split('.', -1)[-1].split("'")[0] + f'_lr_{i[1]}'
        d['mse'] = np.sqrt(mean_squared_error(y_level_test, pred))
        opt_results0 = opt_results0.append(d, ignore_index=True)
        
pickle.dump(opt_results0, open('opt_results0', 'wb'))

In [None]:
opt0_vals = opt_results0['model'].unique().tolist()

for model in opt0_vals:
    mean = opt_results0['mse'].loc[opt_results0['model'] == model].mean()
    std = statistics.stdev(opt_results0['mse'].loc[opt_results0['model'] == model])
    print(f'model: {model}, mse: {mean}, std: {std}')

In [None]:
opt_results1 = pd.DataFrame(columns=['model', 'mse'])

for j in range(5):
    for i in opt_list:
        d = {}
        model = one_layer(1024, 512, 0.1, 0.3, i[0], i[1], regularizers.l1, regularizers.l1, 0.00, 0.00)
        run_model(model)
        pred = model.predict(x_test)
        d['model'] = 'one_layer_' + str(i[0]).split('.', -1)[-1].split("'")[0] + f'_lr_{i[1]}'
        d['mse'] = np.sqrt(mean_squared_error(y_level_test, pred))
        opt_results1 = opt_results1.append(d, ignore_index=True)
        
pickle.dump(opt_results1, open('opt_result1.p', 'wb'))

In [None]:
opt1_vals = opt_results1['model'].unique().tolist()

for model in opt1_vals:
    mean = opt_results1['mse'].loc[opt_results1['model'] == model].mean()
    std = statistics.stdev(opt_results1['mse'].loc[opt_results1['model'] == model])
    print(f'model: {model}, mse: {mean}, std: {std}')

##### 4. Regularisierung

In [None]:
regul = [regularizers.l1, regularizers.l2]
reg_vals = [0.007, 0.008, 0.009, 0.01]

reg_list = [[r, j] for r in regul for j in reg_vals]

reg_results0 = pd.DataFrame(columns=['model', 'mse'])

for j in range(5):
    for i in reg_list:
        d = {}
        model = zero_layer(98688, 0.1, RMSprop, 0.007, i[0], i[1], regularizers.l1, 0.00)
        run_model(model)
        pred = model.predict(x_test)
        d['model'] = 'zero_layer_' + str(i[0]).split('.', -1)[-1].split("'")[0] + f'_({i[1]})'
        d['mse'] = np.sqrt(mean_squared_error(y_level_test, pred))
        reg_results0 = reg_results0.append(d, ignore_index=True)

pickle.dump(reg_results0, open('reg_results0.p', 'wb'))

In [None]:
reg_vals0 = reg_results0['model'].unique().tolist()

for model in reg_vals0:
    mean = reg_results0['mse'].loc[reg_results0['model'] == model].mean()
    std = statistics.stdev(reg_results0['mse'].loc[reg_results0['model'] == model])
    print(f'model: {model}, mean: {mean}, std: {std}')

In [None]:
reg_list1 = [[r, s, j, i] for r in regul for s in regul for j in reg_vals for i in reg_vals]

reg_results1 = pd.DataFrame(columns=['model', 'mse'])

for j in range(5):
    for i in reg_list1:
        d = {}
        model = one_layer(1024, 512, 0.1, 0.3, RMSprop, 0.006, i[0], i[1], i[2], i[3], regularizers.l1, regularizers.l1, 0.00, 0.00)
        run_model(model)
        pred = model.predict(x_test)
        d['model'] = 'one_layer_' + str(i[0]).split('.', -1)[-1].split("'")[0] + f'({i[2]})_' + str(i[1]).split('.', -1)[-1].split("'")[0] + f'({i[3]})'
        d['mse'] = np.sqrt(mean_squared_error(y_level_test, pred))
        reg_results1 = reg_results1.append(d, ignore_index=True)
        
pickle.dump(reg_results1, open('reg_results1.p', 'wb'))

In [None]:
reg_vals1 = reg_results1['model'].unique().tolist()

for model in reg_vals1:
    mean = reg_results1['mse'].loc[reg_results1['model'] == model].mean()
    std = statistics.stdev(reg_results1['mse'].loc[reg_results1['model'] == model])
    print(f'model: {model}, mean: {mean}, std: {std}')

##### 5. Epochs / Batchsize

In [None]:
def run_model2(model, batch_size, epochs):
    history = model.fit(x_train, y_level_train, batch_size=batch_size, verbose=0, epochs=epochs)

ep = [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]

epoch_results1 = pd.DataFrame(columns=['epochs', 'mse'])
    
for j in range(5):
    print(f'full_iteration: {j+1} of 5')
    for i in ep:
        d = {}
        model = one_layer(1024, 512, 0.1, 0.3, RMSprop, 0.006, regularizers.l1, regularizers.l1, 0.01, 0.009)
        run_model2(model, 256, i)
        pred = model.predict(x_test)
        d['epochs'] = int(i)
        d['mse'] = np.sqrt(mean_squared_error(y_level_test, pred))
        epoch_results1 = epoch_results1.append(d, ignore_index=True)

In [None]:
epoch_vals1 = epoch_results1['epochs'].unique().tolist()

for val in epoch_vals1:
    mean = epoch_results1['mse'].loc[epoch_results1['epochs'] == val].mean()
    std = statistics.stdev(epoch_results1['mse'].loc[epoch_results1['epochs'] == val])
    print(f'epochs: {val}, mse: {mean}, std: {std} ')

In [None]:
epoch_results0 = pd.DataFrame(columns=['epochs', 'mse'])


for j in range(5):
    print(f'full_iteration: {j+1} of 5')
    for i in ep:
        d = {}
        model = zero_layer(98688, 0.1, RMSprop, 0.007, regularizers.l1, 0)
        run_model2(model, 256, i)
        pred = model.predict(x_test)
        d['epochs'] = int(i)
        d['mse'] = np.sqrt(mean_squared_error(y_level_test, pred))
        epoch_results0 = epoch_results0.append(d, ignore_index=True)

In [None]:
epoch_vals0 = epoch_results0['epochs'].unique().tolist()

for val in epoch_vals0:
    mean = epoch_results0['mse'].loc[epoch_results0['epochs'] == val].mean()
    std = statistics.stdev(epoch_results0['mse'].loc[epoch_results0['epochs'] == val])
    print(f'epochs: {val}, mse: {mean}, std: {std} ')

In [None]:
batch_sizes = [128, 256, 512, 1024]

batch_results = pd.DataFrame(columns=['batch_size', 'mse'])

for j in range(15):
    print(f'full_iteration: {j+1} of 15')
    for i in batch_sizes:
        d = {}
        model = one_layer(1024, 512, 0.1, 0.3, RMSprop, 0.006, regularizers.l1, regularizers.l1, 0.01, 0.009)
        run_model2(model, i, 400)
        pred = model.predict(x_test)
        d['batch_size'] = int(i)
        d['mse'] = np.sqrt(mean_squared_error(y_level_test, pred))
        batch_results = batch_results.append(d, ignore_index=True)

In [None]:
batch_vals = batch_results['batch_size'].unique().tolist()

for val in batch_vals:
    mean = batch_results['mse'].loc[batch_results['batch_size'] == val].mean()
    std = statistics.stdev(batch_results['mse'].loc[batch_results['batch_size'] == val])
    print(f'batch_size: {val}, mse: {mean}, std: {std} ')

##### 6. Ergebnis

In [None]:
model = one_layer(1024, 512, 0.1, 0.3, RMSprop, 0.006, regularizers.l1, regularizers.l1, 0.01, 0.009)
run_model2(model, 256, 400)
pred = model.predict(x_test)
mse = np.sqrt(mean_squared_error(y_level_test, pred))

In [None]:
print(mse)