In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

from sklearn.pipeline import Pipeline 
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import KFold

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers,losses
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Activation,Dense,Dropout,BatchNormalization,Input,Flatten
from tensorflow.keras.metrics import Recall

In [27]:
df = pd.read_csv('Input//train.csv')
test = pd.read_csv('Input//train.csv')

In [29]:
cat_columns=['Alley','MasVnrType','BsmtQual','BsmtCond','BsmtExposure','BsmtFinType1','BsmtFinType2','Electrical','FireplaceQu','GarageType','GarageFinish','GarageQual','GarageCond','PoolQC','Fence','MiscFeature']
num_columns=['LotFrontage','MasVnrArea','GarageYrBlt']

for cat_ in cat_columns:
    df[cat_]=df[cat_].fillna(-1)
    
df['LotFrontage']=df['LotFrontage'].fillna(df['LotFrontage'].mean())
df['MasVnrArea']=df['MasVnrArea'].fillna(df['MasVnrArea'].mean())
df['GarageYrBlt']=df['GarageYrBlt'].fillna(1979.0)

num_var = [key for key in dict(df.dtypes)
             if dict(df.dtypes)[key] in ['float64','int64']]
cat_var = [key for key in dict(df.dtypes)
             if dict(df.dtypes)[key] in ['object']]
le = LabelEncoder()
for col in cat_var:
    df[col] = le.fit_transform(df[col].astype(str))

In [30]:
#fig1, ax = plt.subplots(figsize= (35,35))
#sns.heatmap(df.corr(), annot=True)
#fig1.savefig("correlation_heatmap_before.png")
#df.corr().style.background_gradient(cmap='coolwarm')

In [31]:
column_corr=['Id','MSSubClass','LandContour','Street','Utilities','LotConfig','LandSlope','Condition2','OverallCond','MasVnrType','BsmtFinType1','BsmtFinSF2','LowQualFinSF','BsmtHalfBath','3SsnPorch','PoolQC','MiscFeature','MiscVal','MoSold','YrSold','SaleType']
for col1 in column_corr:
    df.drop(col1,axis=1, inplace=True)
    
#fig, ax = plt.subplots(figsize= (35,35))
#sns.heatmap(X_train.corr(), annot=True)
#fig.savefig("correlation_heatmap_after1.png")
#X_train.corr().style.background_gradient(cmap='coolwarm')

In [32]:
corr_column=['1stFlrSF','TotRmsAbvGrd','FireplaceQu','GarageArea','GarageQual']
for col2 in corr_column:
    df.drop(col2,axis=1, inplace=True)

#fig, ax = plt.subplots(figsize= (35,35))
#sns.heatmap(df.corr(), annot=True)
#fig.savefig("correlation_heatmap_after2.png")
#df.corr().style.background_gradient(cmap='coolwarm')

In [33]:
colu=['Alley','Condition1','BldgType','RoofMatl','Exterior1st','Exterior2nd','ExterCond','Heating','Functional','EnclosedPorch','ScreenPorch','PoolArea']
for col_ in colu:
    df.drop(col_,axis=1, inplace=True)

#fig, ax = plt.subplots(figsize= (35,35))
#sns.heatmap(df.corr(), annot=True)
#fig.savefig("correlation_heatmap_after3.png")
#df.corr().style.background_gradient(cmap='coolwarm')

In [34]:
X = df.iloc[:,0:42]
y = df.iloc[:,42]


df.loc[:, 'kfold'] = -1
df = df.sample(frac=1).reset_index(drop=True)
kf = KFold(n_splits=10)

for fold, (trn_, val_) in enumerate(kf.split(X=df)):
    df.loc[val_, 'kfold'] = fold

In [35]:
model = Sequential([
    Input(42),
    layers.BatchNormalization(),
    Dense(42, activation=tf.nn.leaky_relu),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    Dense(64, activation=tf.nn.leaky_relu),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    Dense(32, activation=tf.nn.leaky_relu),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    Dense(16, activation=tf.nn.leaky_relu),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    Dense(8, activation =tf.nn.leaky_relu),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    Dense(4, activation=tf.nn.leaky_relu),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    Dense(2, activation =tf.nn.leaky_relu),
    layers.BatchNormalization(),
    Dense(1, activation =tf.nn.relu)
    
    
])
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.8,beta_1=0.3),
    loss='mean_squared_error',
    metrics=tf.keras.metrics.RootMeanSquaredError())

In [36]:
def function(arg):
    arg = tf.convert_to_tensor(arg, dtype=tf.float32)
    return arg

In [37]:
class CallBack(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('val_root_mean_squared_error') <27000.0000):
            self.model.stop_training=True
callback = CallBack()

In [52]:
def run(fold):
    if fold==0:
        epoch=1000
    else:
        epoch=1000
    df_train= df[df.kfold != 0]
    df_valid=df[df.kfold==0]
    X_train=df_train.iloc[: , :42].values
    y_train=df_train.iloc[:,42].values
    X_valid=df_valid.iloc[: , :42].values
    y_valid=df_valid.iloc[:,42].values
    history=model.fit(X_train,y_train , verbose=0, epochs=epoch, 
         validation_data=(X_valid,y_valid),batch_size=2048,callbacks=[callback])

In [53]:
%%time
for fold in range(10):
    run(fold)

Wall time: 5.93 s


In [54]:
df_test = pd.read_csv('Input\\test.csv')
Id = df_test['Id']

In [55]:
column_corr=['Id','MSSubClass','LandContour','Street','Utilities','LotConfig','LandSlope','Condition2','OverallCond','MasVnrType','BsmtFinType1','BsmtFinSF2','LowQualFinSF','BsmtHalfBath','3SsnPorch','PoolQC','MiscFeature','MiscVal','MoSold','YrSold','SaleType','1stFlrSF','TotRmsAbvGrd','FireplaceQu','GarageArea','GarageQual','Alley','Condition1','BldgType','RoofMatl','Exterior1st','Exterior2nd','ExterCond','Heating','Functional','EnclosedPorch','ScreenPorch','PoolArea']
for col in column_corr:
    df_test.drop(col,axis=1, inplace=True)

In [56]:
num_var = [key for key in dict(df_test.dtypes)
             if dict(df_test.dtypes)[key] in ['float64','int64']]
cat_var = [key for key in dict(df_test.dtypes)
             if dict(df_test.dtypes)[key] in ['object'] ]
df_test[cat_var]=df_test[cat_var].fillna(-1)

In [57]:
df_test['GarageYrBlt']=df_test['GarageYrBlt'].fillna(1979)

In [58]:
num_var = [key for key in dict(df_test.dtypes)
             if dict(df_test.dtypes)[key] in ['float64','int64']]
for num_ in num_var:
    df_test[num_]=df_test[num_].fillna(df_test[num_].mean())

In [59]:
cat_var = [key for key in dict(df_test.dtypes)
             if dict(df_test.dtypes)[key] in ['object'] ]

In [60]:
label_encoder = LabelEncoder()
for col in cat_var:
    df_test[col] = label_encoder.fit_transform(df_test[col].astype(str))

In [61]:
test_preds = model.predict(df_test)

In [62]:
test_preds=np.array(test_preds).tolist()
output=[]
import itertools

def oneDArray(x):
    return list(itertools.chain(*x))
test_preds=oneDArray(test_preds)

In [63]:
output = pd.DataFrame({'Id': Id})
output['SalePrice']=test_preds

In [64]:
output.to_csv('Output\\submission.csv', index=False)