## Estimate Home Values in Zillow

Faith Kane
10/18/2019

### Import Modules

In [1]:
import csv
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
from statsmodels.sandbox.regression.predstd import wls_prediction_std
from sklearn.metrics import mean_squared_error,r2_score,explained_variance_score
from math import sqrt


import env
import util
import wrangle_zillow
import split_scale_zillow
import features_zillow
import model_zillow

#### Read In, Clean and Transform the Data Using wrangle_zillow()

In [2]:
df = wrangle_zillow.wrangle_zillow()

#### Summarize the Data Using tell_me_about(df)

In [3]:
util.tell_me_about(df)

DataFrame Shape:

(15956, 7)

Info about:

<class 'pandas.core.frame.DataFrame'>
Int64Index: 15956 entries, 0 to 15955
Data columns (total 7 columns):
bedrooms               15956 non-null float64
bathrooms              15956 non-null float64
square_feet            15956 non-null int64
taxes                  15956 non-null float64
home_value             15956 non-null float64
propertylandusedesc    15956 non-null category
fips_number            15956 non-null int64
dtypes: category(1), float64(4), int64(2)
memory usage: 888.3 KB
None

Describe:

           bedrooms     bathrooms   square_feet          taxes    home_value  \
count  15956.000000  15956.000000  15956.000000   15956.000000  1.595600e+04   
mean       3.253384      2.215844   1837.197794    5681.839846  4.634078e+05   
std        0.949643      1.026662    984.838795    7466.645847  6.434229e+05   
min        1.000000      1.000000    320.000000      49.180000  1.216800e+04   
25%        3.000000      2.000000   1233.000000 

#### Split the Data Using split_my_data(df) and Assign Features (x) and Target (y)

In [4]:
train, test = split_scale_zillow.split_my_data(df)

In [5]:
x_train = train[["bedrooms", "bathrooms", "square_feet"]]
y_train = train[["home_value"]]
x_test = test[["bedrooms", "bathrooms", "square_feet"]]
y_test = test[["home_value"]]

#### Create Baseline Linear Regression Model Using modeling_function()

In [6]:
predictions = model_zillow.modeling_function(x_train, y_train)
predictions

Unnamed: 0,actual,lm1,baseline
0,475946.0,6.372533e+05,466158.614463
1,250799.0,2.482175e+05,466158.614463
2,199479.0,2.052506e+05,466158.614463
3,1252995.0,8.897366e+05,466158.614463
4,107124.0,4.586284e+05,466158.614463
5,53793.0,1.218262e+05,466158.614463
6,337404.0,8.732689e+05,466158.614463
7,172673.0,4.625023e+05,466158.614463
8,484848.0,4.171228e+05,466158.614463
9,372791.0,1.868394e+05,466158.614463


### Evaluate:  Compare Model Performance Compared to Baseline Value

Baseline Error

In [7]:
MSE_baseline = mean_squared_error(predictions.actual, predictions.baseline)
SSE_baseline = MSE_baseline*len(predictions.actual)
RMSE_baseline = sqrt(MSE_baseline)
r2_baseline = r2_score(predictions.actual, predictions.baseline)
print(MSE_baseline,SSE_baseline,RMSE_baseline,r2_baseline)

434952376131.3264 5551732128940250.0 659509.1933637669 0.0


Model Error

In [9]:
MSE_1 = mean_squared_error(predictions.actual, predictions.lm1)
SSE_1 = MSE_1*len(predictions.actual)
RMSE_1 = sqrt(MSE_1)
r2_1 = r2_score(predictions.actual, predictions.lm1)
print(MSE_1,SSE_1,RMSE_1,r2_1)

248861731613.72168 3176471142317543.5 498860.433000776 0.42784142524472113
