In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math
import statsmodels.api as sm

from sklearn.linear_model import LinearRegression, LassoCV, RidgeCV
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import RFE
from sklearn.model_selection import train_test_split
from sklearn import metrics
from statsmodels.formula.api import ols

In [13]:
# Reading the training dataset into dataframe train
train = pd.read_csv('datasets/train.csv')

In [16]:
# Cleaning the name of the train column headers
for col in train.columns:
    train.rename(columns={col: col.lower().replace(' ','_')}, inplace=True)

In [17]:
# Checking which datasets columns are null
train.isnull().sum().sort_values(ascending=False).head(n=27)

pool_qc           2042
misc_feature      1986
alley             1911
fence             1651
fireplace_qu      1000
lot_frontage       330
garage_finish      114
garage_cond        114
garage_qual        114
garage_yr_blt      114
garage_type        113
bsmt_exposure       58
bsmtfin_type_2      56
bsmtfin_type_1      55
bsmt_cond           55
bsmt_qual           55
mas_vnr_type        22
mas_vnr_area        22
bsmt_half_bath       2
bsmt_full_bath       2
garage_cars          1
garage_area          1
bsmt_unf_sf          1
bsmtfin_sf_2         1
total_bsmt_sf        1
bsmtfin_sf_1         1
overall_cond         0
dtype: int64

## Explanation of Null Values 

* For features with null values compromising over half their total observations, I decided against using the feature in my model.
* I only imputed values other than 0 for features 

|Feature with Null Value|What Null Value Means|Imputed Value|
| --- | --- | --- |
|fireplace_qu|No fireplace|0|
|lot_frontage|House not on road|0|
|garage_finish|No garage|0|
|fireplace_qu|No fireplace|0|
|garage_cond|No garage|0|
|garage_qual|No garage|0|
|garage_yr_blt|No garage|Median year built|
|garage_type|No garage|0|
|bsmt_exposure|No basement|0|
|bsmtfin_type_2|No basement|0|
|bsmtfin_type_1|No basement|0|
|bsmt_cond|No basement|0|
|bsmt_qual|No basement|0|
|mas_vnr_type|No veneer|0|
|mas_vnr_area|No veneer|0|
|bsmt_half_bath|No basement|0|
|bsmt_full_bath|No basement|0|
|garage_cars|No garage|0|
|garage_area|No garage|0|
|bsmt_unf_sf|No basement|0|
|bsmtfin_sf_2|No basement|0|
|total_bsmt_sf|No basement|0|
|bsmtfin_sf_1|No basement|0|

