# House Price Prediction

### Import Libraries

In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

### Load Train Dataset

In [None]:
dataset = pd.read_csv(r"E:\DEEP LEARNING\Projects\House Price Predictions\train.csv")
dataset.head()

### Load Test Dataset

In [None]:
test_data = pd.read_csv(r"E:\DEEP LEARNING\Projects\House Price Predictions\test.csv")
test_data.head()

In [None]:
test_data.shape

### Assign Predictors and Targets

In [None]:
X = dataset.drop(labels = ['SalePrice', 'Id'], axis = 1)
y = dataset['SalePrice']

In [None]:
y.head()

### Handle Missing Data on train dataset

In [None]:
X['LotFrontage'].fillna(value = X['LotFrontage'].mean(), inplace=True)
X['BsmtQual'].fillna(value = 'Not Available', inplace=True)
X['BsmtCond'].fillna(value = 'Not Available', inplace=True)
X['BsmtExposure'].fillna(value = 'Not Available', inplace=True)
X['BsmtFinType1'].fillna(value = 'Not Available', inplace=True)
X['BsmtFinType2'].fillna(value = 'Not Available', inplace=True)
X['GarageType'].fillna(value = 'Not Available', inplace=True)
X['GarageYrBlt'].fillna(value = 'Not Available', inplace=True)
X['GarageFinish'].fillna(value = 'Not Available', inplace=True)
X['GarageQual'].fillna(value = 'Not Available', inplace=True)
X['GarageCond'].fillna(value = 'Not Available', inplace=True)
X['PoolQC'].fillna(value = 'Not Available', inplace=True)

In [None]:
X[X.columns[X.isnull().any()]].isnull().sum()

#### Drop MSSubclass, LotFrontage, LotArea,  Alley, LotShape, LandContour, LotConfig, Condition1, Condition2, 'RoofStyle' ,RoofMatl', 'Exterior1st', 'Exterior2nd', 'Heating', 'HeatingQC', 'CentralAir', '1stFlrSF', '2ndFlrSF',  'LowQualFinSF', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr','Fireplaces', 'FireplaceQu', 'GarageYrBlt', 'GarageFinish', 'GarageArea','WoodDeckSF', 'OpenPorchSF',  'EnclosedPorch', '3SsnPorch', 'ScreenPorch',PoolArea, 'Fence', 'MiscVal','SaleType', 'SaleCondition', 'MasVnrType' , 'MasVnrArea' , 'MiscFeature' , 'Electrical'

In [None]:
X_cleaned = X.drop(labels = ['MSSubClass', 'LotFrontage', 'LotArea', 'Alley', 'LotShape', 'LandContour',
                                           'LotConfig', 'Condition1', 'Condition2', 'RoofStyle' ,'RoofMatl', 'Exterior1st',
                                           'Exterior2nd', 'Heating', 'HeatingQC', 'CentralAir', '1stFlrSF', '2ndFlrSF', 
                                           'LowQualFinSF', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath',
                                           'BedroomAbvGr', 'KitchenAbvGr','Fireplaces', 'FireplaceQu', 'GarageYrBlt',
                                           'GarageFinish', 'GarageArea','WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch',
                                           '3SsnPorch', 'ScreenPorch','PoolArea', 'Fence', 'MiscVal','SaleType',
                                           'SaleCondition', 'MasVnrType' , 'MasVnrArea' , 'MiscFeature' , 'Electrical'], axis = 1)

In [None]:
X_cleaned.columns

In [None]:
#Visual Perception of Null Data
sns.heatmap(X_cleaned.isnull(), cmap='viridis')

In [None]:
X_cleaned.shape

### Handling Missing Data on Test Dataset

In [None]:
Y = test_data.drop(labels = ['Id','MSSubClass', 'LotFrontage', 'LotArea', 'Alley', 'LotShape', 'LandContour',
                                           'LotConfig', 'Condition1', 'Condition2', 'RoofStyle' ,'RoofMatl', 'Exterior1st',
                                           'Exterior2nd', 'Heating', 'HeatingQC', 'CentralAir', '1stFlrSF', '2ndFlrSF', 
                                           'LowQualFinSF', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath',
                                           'BedroomAbvGr', 'KitchenAbvGr','Fireplaces', 'FireplaceQu', 'GarageYrBlt',
                                           'GarageFinish', 'GarageArea','WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch',
                                           '3SsnPorch', 'ScreenPorch','PoolArea', 'Fence', 'MiscVal','SaleType',
                                           'SaleCondition', 'MasVnrType' , 'MasVnrArea' , 'MiscFeature' , 'Electrical'], axis = 1)

In [None]:
Y.head()

In [None]:
#returns columns having null data
Y[Y.columns[Y.isnull().any()]].isnull().sum()

In [None]:
sns.heatmap(Y.isnull(), cmap='viridis')

In [None]:
Y['MSZoning'].fillna(value = Y['MSZoning'].mode()[0], inplace=True)
Y['Utilities'].fillna(value = Y['Utilities'].mode()[0], inplace=True)
Y['BsmtQual'].fillna(value = 'Not Available', inplace=True)
Y['BsmtCond'].fillna(value = 'Not Available', inplace=True)
Y['BsmtExposure'].fillna(value = 'Not Available', inplace=True)
Y['BsmtFinType1'].fillna(value = 'Not Available', inplace=True)
Y['BsmtFinSF1'].fillna(value = Y['BsmtFinSF1'].mean(), inplace=True)
Y['BsmtFinType2'].fillna(value = 'Not Available', inplace=True)
Y['BsmtFinSF2'].fillna(value = Y['BsmtFinSF2'].mean(), inplace=True)
Y['BsmtUnfSF'].fillna(value = Y['BsmtUnfSF'].mean(), inplace=True)
Y['TotalBsmtSF'].fillna(value = Y['TotalBsmtSF'].mean(), inplace=True)
Y['KitchenQual'].fillna(value = Y['KitchenQual'].mode()[0], inplace=True)
Y['Functional'].fillna(value = Y['Functional'].mode()[0], inplace=True)
Y['GarageType'].fillna(value = 'Not Available', inplace=True)
Y['GarageCars'].fillna(value = Y['GarageCars'].mean(), inplace=True)
Y['GarageQual'].fillna(value = 'Not Available', inplace=True)
Y['GarageCond'].fillna(value = 'Not Available', inplace=True)
Y['PoolQC'].fillna(value = 'Not Available', inplace=True)

In [None]:
sns.heatmap(Y.isnull(), cmap='viridis')

In [None]:
Y[Y.columns[Y.isnull().any()]].isnull().sum()

In [None]:
Y.columns

In [None]:
#Concatenate both dataset row wise to create dummy variables
combined_df = pd.concat([X_cleaned, Y], axis=0)
combined_df.head()

In [None]:
combined_df.shape

In [None]:
combined_df = pd.get_dummies(combined_df, drop_first=True, columns=['MSZoning', 'Street', 'Utilities', 'LandSlope', 'Neighborhood',
                                                                'BldgType', 'HouseStyle', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual',
                                                                'BsmtCond', 'BsmtExposure', 'BsmtFinType1','BsmtFinType2',
                                                                'Functional','KitchenQual', 'GarageType', 'GarageQual',
                                                               'GarageCond', 'PavedDrive', 'PoolQC'])

In [None]:
combined_df.shape

In [None]:
#to check any duplicated columns are created
combined_df = combined_df.loc[:, ~combined_df.columns.duplicated()]

In [None]:
#Split the combine dataset back to respective training and test dataset
X_cleaned = combined_df.iloc[:1460,:]
Y_encoded = combined_df.iloc[1460:,:]

In [None]:
X_cleaned.shape

In [None]:
Y_encoded.shape

In [None]:
y_cleaned = y

In [None]:
y_cleaned

### Split data into Training and Dev set

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_cleaned, y_cleaned, test_size=.20, random_state = 0)

In [None]:
#Standardization
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
Y_encoded = scaler.transform(Y_encoded)

In [None]:
X_train.shape[1]

# ANN

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout

In [None]:
model = Sequential()
model.add(Dense(20, activation = 'relu', input_shape=(X_train.shape[1],)))
model.add(Dense(128, activation = 'relu'))
model.add(Dropout(0.5))
model.add(Dense(32, activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(1))

In [None]:
model.summary()

In [None]:
model.compile(optimizer = 'Adam', loss = 'mean_squared_error', metrics = ['mean_squared_error'])

In [None]:
from keras.callbacks import EarlyStopping
early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)

In [None]:
history = model.fit(x = X_train, y = y_train.to_numpy(), batch_size = 32, epochs = 250, validation_data=(X_test,y_test.to_numpy()), 
                   callbacks=[early_stop])

In [None]:
from sklearn.metrics import mean_squared_error,mean_absolute_error
predictions = model.predict(X_test)

In [None]:
mean_absolute_error(y_test,predictions)

In [None]:
np.sqrt(mean_squared_error(y_test,predictions))

In [None]:
losses = pd.DataFrame(model.history.history)
losses.plot()

In [None]:
print("Evaluate on test data")
results = model.evaluate(X_test, y_test, batch_size=10)
results

In [None]:
history.history.keys()

In [None]:
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'dev'], loc='best')
plt.show()

In [None]:
# summarize history for mean_squared_error
plt.plot(history.history['mean_squared_error'])
plt.plot(history.history['val_mean_squared_error'])
plt.title('model mean_squared_error')
plt.ylabel('mean_squared_error')
plt.xlabel('epoch')
plt.legend(['train', 'dev'], loc='best')
plt.show()

# Predictions on given Test Set

In [None]:
Test_set_predictions = model.predict(Y_encoded)

In [None]:
type(Test_set_predictions)

In [None]:
Test_set_predictions

In [None]:
#Create Submission Dataframe
pred = pd.DataFrame(Test_set_predictions)
sub_df = pd.read_csv(r"E:\DEEP LEARNING\Projects\House Price Predictions\sample_submission.csv")
dataset_submit = pd.concat([sub_df['Id'], pred], axis = 1)

In [None]:
dataset_submit.columns = ['Id','SalePrice']
dataset_submit.to_csv('Submission_ANN.csv',index=False)