# House Hunters Linear Model

Linear regression model of human-interpretable features that will make recommendations for people looking to buy or sell a house.

## Pre-Processing

In [138]:
# import necessary packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# set display options
pd.set_option('display.max_columns', None)

In [139]:
# import dataset
housing = pd.read_csv('Ames_HousePrice.csv', index_col=0)
# real_estate = pd.read_csv('./data/Ames_Real_Estate_Data.csv')

#### Feature Cleaning

In [140]:
#Remove Outliers
housing = housing[np.logical_and(housing.SalePrice >= 40000, housing.SalePrice <= 750000)]

# #Remove Bad Classes
housing = housing[housing.Neighborhood != 'Landmrk']
housing.SaleType = housing.SaleType.astype('string')
housing. SaleType = housing.SaleType.str.strip()
housing = housing[housing.SaleType == 'WD']
housing = housing[housing.SaleCondition == 'Normal']


#Replace NAs
housing = housing.fillna(0)

#### Feature Engineering

In [141]:
#Log Transforms
housing['LogSalePrice'] = np.log(housing.SalePrice)
# housing['LogGrLivArea'] = np.log(housing.GrLivArea)

#Area Calculations
housing['PorchTotSF'] = housing.OpenPorchSF + housing.EnclosedPorch + housing['3SsnPorch'] + housing.ScreenPorch

#Binary HasBLANK Categories
housing['HasGarage'] = np.where(housing.GarageCars > 0, 1, 0)
housing['HasPool'] = np.where(housing.PoolArea > 0, 1, 0)
housing['HasPorch'] = np.where(housing.PorchTotSF > 0, 1, 0)
housing['HasDeck'] = np.where(housing.WoodDeckSF > 0, 1, 0)
housing['HasFinBsmt'] = np.where(housing.BsmtFinSF1 > 0, 1, 0)
housing['HasFireplace'] = np.where(housing.Fireplaces > 0, 1, 0)
housing['HasFence'] = np.where(housing.Fence.notna(), 1, 0)
housing.Neighborhood = housing.Neighborhood.replace({'MeadowV':1,'BrDale':2, 'IDOTRR':3, 'BrkSide':4, 'OldTown':5, 'Edwards':6, 'SWISU':7, 'Landmrk':8, 'Sawyer':9,\
                           'NPkVill':10, 'Blueste':11, 'NAmes':12, 'Mitchel':13, 'SawyerW':14, 'Gilbert':15, 'NWAmes':16, 'Greens':17, 'Blmngtn':18,\
                           'CollgCr':19, 'Crawfor':20, 'ClearCr':21, 'Somerst':22, 'Timber':23, 'Veenker':24, 'GrnHill':25, 'StoneBr':26,'NridgHt':27, 'NoRidge':28})

#Binary Quality/Cond Categories
housing['GarageFinish_Fin']= np.where(housing.GarageFinish == 'Unf', 0, 1)

#### Feature Selection

In [142]:
keep = ['LogSalePrice', 'GrLivArea', 'HasGarage', 'HasPool','HasFireplace','OverallQual','OverallCond','Neighborhood','GarageCars']
housing = housing[keep]


# , 'HasPorch','HasFence'

In [143]:
housing

Unnamed: 0,LogSalePrice,GrLivArea,HasGarage,HasPool,HasFireplace,OverallQual,OverallCond,Neighborhood,GarageCars
1,11.744037,856,1,0,1,6,6,7,2.0
2,11.845820,1049,1,0,0,5,5,6,1.0
3,11.735269,1001,1,0,0,5,9,3,1.0
4,11.643954,1039,1,0,0,4,8,5,1.0
5,12.332705,1665,1,0,0,8,6,16,2.0
...,...,...,...,...,...,...,...,...,...
759,11.703546,952,1,0,1,6,6,4,1.0
760,11.846536,1733,1,0,1,3,5,6,2.0
761,11.884489,2002,1,0,0,5,6,20,3.0
762,12.289954,1842,1,0,1,7,5,19,2.0


## Model Generation

In [145]:
from sklearn.linear_model import LinearRegression
from sklearn import model_selection

In [146]:
y = housing['LogSalePrice']
x = housing.drop('LogSalePrice', axis=1)

x_train, x_test, y_train, y_test = model_selection.train_test_split(x, y, test_size=.2, random_state=0)

lm = LinearRegression()
lm.fit(x_train, y_train)

print(lm.score(x_train, y_train))
print(lm.score(x_test, y_test))
print(lm.coef_)

0.8645743184439679
0.8520100375641695
[0.0002462  0.0105743  0.05976766 0.06164137 0.08697679 0.03761453
 0.01574873 0.0805717 ]


## Model Deployment

#### Buyer Profile:

In [248]:
Budget = 150000
LivArea = 1000
HasGarage = 1
HasPool = 0
HasPorch = 1
HasFireplace = 1
HasFence = 1
OverallQual = 6
OverallCond = 6
Neighborhood = 3
GarageCars =1

buyer_data = [[np.log(Budget), LivArea, HasGarage, HasPool, HasFireplace, OverallQual, OverallCond,Neighborhood,GarageCars]]

buyer = pd.DataFrame(data = buyer_data, columns = keep)

In [249]:
# buyer_x = buyer.drop('LogSalePrice', axis=1)
# base_value = np.exp(lm.predict(buyer_x)[0])

buyer

Unnamed: 0,LogSalePrice,GrLivArea,HasGarage,HasPool,HasFireplace,OverallQual,OverallCond,Neighborhood,GarageCars
0,11.918391,1000,1,0,1,6,6,3,1


#### Buyer Recomendation Tool:

In [252]:
from math import exp

def buyer_recommendation(buyer, model):
    
    #generate intial estimate
    budget = buyer['LogSalePrice']
    buyer_x = buyer.drop('LogSalePrice', axis=1)
    predicted_price = np.exp(lm.predict(buyer_x)[0])
    
    print('Your budget is: ', np.exp(budget[0]))
    print('Based on your profile, the house you want will cost: ', predicted_price)
    
    #search for ways to find a good deal
    recommendation = []
    discard_features = ['HasPool','HasGarage','HasFireplace']
    removed_feature = []

#     Overbudget!! Lower our cost
    if predicted_price>np.exp(budget[0]):
        print('')
        print('\nYou are over budget')
        for feature in discard_features:
            if buyer_x[feature][0]>0:
                buyer_x[feature]=0
                updated_price = np.exp(lm.predict(buyer_x)[0])
                difference = predicted_price-updated_price
                removed_feature.append(feature)
                print('\nIf you remove these features:',removed_feature)
                print('Saving: ',difference)
                print('Updated price: ',updated_price)
            

#   Underbudget!! Increase features
    if predicted_price<np.exp(budget[0]):
        print('\nYou are underbudget')
        for feature in discard_features:
            if buyer_x[feature][0]==0:
                buyer_x[feature]=1
                updated_price = np.exp(lm.predict(buyer_x)[0])
                difference = predicted_price-updated_price
                removed_feature.append(feature)
                print('\nIf you add these features:',removed_feature)
                print('Increased cost: ',difference)
                print('Updated price: ',updated_price)
        
    
buyer_recommendation(buyer,lm)

Your budget is:  149999.99999999994
Based on your profile, the house you want will cost:  121245.30727384084
you are underbudget

If you add these features: ['HasPool']
Increased cost:  -7467.48205302532
Updated price:  128712.78932686616


#### Seller Profile:

In [None]:
LivArea = 100000
HasGarage = 0
HasPool = 1
HasPorch = 1

seller_data = [np.log(LivArea), HasGarage, HasPool, HasPorch]
seller_cols = keep[1:]

buyer = pd.DataFrame(data = seller_data, columns = seller_cols)

#### Seller Recommedation Tool:

In [None]:
def seller_recommendation(seller, model):
    
    #generate intial estimate
    budget = buyer['LogSalePrice']
    buyer = buyer.drop('LogSalePrice', axis=1)
    base_value = exp(model.predict(buyer))
    
    print('Your budget is: ', budget)
    print('Based on your profile, the house you want will cost: ', base_value)
    
    #search for ways to find a good deal
    recommendation = []
    
    #if over budget, do a grid search for ways to reduce cost
        #for feature in list:
            #if buyer[feature] == 1:
                buyer[feature] = 0
                new_value = exp(model.predict(buyer))
                savings = base_value - new_value
                recommendation.append(savings, feature)
        print('If you are willing to compromise on these features, you could save this much money:')
        print(recommendation)
        
    #if under budget, do a grid search for way to optimize cost
        #for feature in list:
            #if buyer[feature] == 0:
                buyer[feature] = 1
                new_value = exp(model.predict(buyer))
                if new_value < budget:
                    stretch = new_value - base_value
                    recommendation.append(stretch, feature)
        print('If you want, you could add these features without going over budget:')
        print(recommendation)
        
    #neighborhood search
    
    #square footage search
    
    #month of year search