In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('ggplot')
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression 
from sklearn.model_selection import train_test_split

In [2]:
# Importing the data
housing = pd.read_csv('All_Data.csv')

In [3]:
housing.head(2)

Unnamed: 0,PID,GrLivArea,SalePrice,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,Prop_Addr
0,526301100,1656,215000,20,RL,141.0,31770,Pave,,IR1,...,0,,,,0,5,2010,WD,Normal,3126 NORTHWOOD DR
1,526302030,1293,149900,20,RL,,11027,Pave,,IR1,...,0,,,,0,5,2006,WD,Normal,3115 NORTHWOOD DR


In [4]:
housing.columns

Index(['PID', 'GrLivArea', 'SalePrice', 'MSSubClass', 'MSZoning',
       'LotFrontage', 'LotArea', 'Street', 'Alley', 'LotShape', 'LandContour',
       'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1',
       'Condition2', 'BldgType', 'HouseStyle', 'OverallQual', 'OverallCond',
       'YearBuilt', 'YearRemodAdd', 'RoofStyle', 'RoofMatl', 'Exterior1st',
       'Exterior2nd', 'MasVnrType', 'MasVnrArea', 'ExterQual', 'ExterCond',
       'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1',
       'BsmtFinSF1', 'BsmtFinType2', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF',
       'Heating', 'HeatingQC', 'CentralAir', 'Electrical', '1stFlrSF',
       '2ndFlrSF', 'LowQualFinSF', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath',
       'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'KitchenQual',
       'TotRmsAbvGrd', 'Functional', 'Fireplaces', 'FireplaceQu', 'GarageType',
       'GarageYrBlt', 'GarageFinish', 'GarageCars', 'GarageArea', 'GarageQual',
       'GarageCond

In [5]:
# Removing Unused Columns
housing.drop(['PID','Prop_Addr'],inplace = True, axis = 1)

In [6]:
# Finding Null Values
null_columns = housing.columns[housing.isnull().any()]
housing[null_columns].isnull().sum()

LotFrontage      459
Alley           2411
MasVnrType        14
MasVnrArea        14
BsmtQual          69
BsmtCond          69
BsmtExposure      71
BsmtFinType1      69
BsmtFinSF1         1
BsmtFinType2      70
BsmtFinSF2         1
BsmtUnfSF          1
TotalBsmtSF        1
Electrical         1
BsmtFullBath       2
BsmtHalfBath       2
FireplaceQu     1241
GarageType       127
GarageYrBlt      129
GarageFinish     129
GarageCars         1
GarageArea         1
GarageQual       129
GarageCond       129
PoolQC          2570
Fence           2054
MiscFeature     2482
dtype: int64

## Fixing null values 

### Changing Categorical Features

In [7]:
housing['Alley'] = housing['Alley'].fillna('None')
housing['MiscFeature'] = housing['MiscFeature'].fillna('None')
housing['Fence'] = housing['Fence'].fillna('None')
housing['PoolQC'] = housing['PoolQC'].fillna('None')
housing['MasVnrType'] = housing['MasVnrType'].fillna('None')
housing['FireplaceQu'] = housing['FireplaceQu'].fillna('None')
housing['Electrical'] = housing['Electrical'].fillna('None')

for col in ['BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'BsmtQual']:
    housing[col] = housing[col].fillna('None')
    
for col in ['GarageFinish', 'GarageType', 'GarageQual', 'GarageCond']:
    housing[col] = housing[col].fillna('None')

### Changing Numeric Features

In [8]:
housing['MasVnrArea'] = housing['MasVnrArea'].fillna(0)
housing['LotFrontage'] = housing.groupby('Neighborhood')['LotFrontage'].transform(lambda x: x.fillna(x.median()))

for col in ['BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'BsmtHalfBath', 'BsmtFullBath']:
    housing[col] = housing[col].fillna(0)

for col in ['GarageArea', 'GarageCars', 'GarageYrBlt']:
    housing[col] = housing[col].fillna(0)

In [9]:
null_columns = housing.columns[housing.isnull().any()]
housing[null_columns].isnull().sum()

Series([], dtype: float64)

## Feature Engineering