## Advanced Model

In [134]:
from sqlalchemy import create_engine
import pandas as pd
import numpy as np
engine = create_engine("postgresql:///kc_housing")
import statsmodels.api as sm
from statsmodels.formula.api import ols
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression
from scipy.stats import pearsonr

In [135]:
def pullsqldata():
    """This function pulls the necessary columns and rows from the PostGRES DB into a Pandas Dataframe in order 
    to continue with our EDA """
    
    engine = create_engine("postgresql:///kc_housing")
    query = """
                SELECT *
                FROM rpsale AS s
                INNER JOIN resbldg AS b ON CONCAT(s.Major,s.Minor) = CONCAT(b.Major, b.Minor)
                INNER JOIN parcel AS p ON CONCAT(s.Major,s.Minor) = CONCAT(p.Major,p.Minor)
                WHERE EXTRACT(YEAR FROM CAST(documentdate AS DATE)) = 2018
                    AND p.proptype = 'R'
                ;"""
    kc_df = pd.read_sql(sql = query, con = engine)
    return kc_df

def clean_data_intial(df):
    """ This function cleans the housing data by removing outliers, sale price == 0, and irrelevant columns. 
    """
    #We chose a minimum sale vale of 10000 and a maximium sale value of 2 sigma
    df_clean = df[(df['saleprice']>10000) & (df['saleprice'] <  (2*df['saleprice'].std())+df['saleprice'].mean())]
    #These are irrelevant or highly covariant columns
    columns_to_drop = ['documentdate',
                       'excisetaxnbr',
                       'recordingnbr',
                       'volume',
                       'page',
                       'platnbr',
                       'plattype',
                       'platlot',
                       'platblock',
                        'sellername',
                        'buyername',
                        'streetname',
                        'streettype',
                        'directionsuffix',
                        'buildingnumber',
                        'major',
                        'minor',
                        'bldggradevar',
                        'sqfthalffloor',
                        'sqft2ndfloor',
                        'sqftupperfloor',
                        'sqftunfinfull',
                        'sqftunfinhalf',
                        'sqfttotbasement',
                        'sqftfinbasement',
                        'brickstone',
                        'viewutilization',
                        'propname',
                        'platname',
                        'platlot',
                        'platblock',
                        'range',
                        'township',
                        'section',
                        'quartersection',
                        'area',
                        'subarea',
                        'specarea',
                        'specsubarea',
                        'levycode',
                        'districtname',
                        'currentzoning',
                        'topography',
                        'currentusedesignation',
                        'salewarning',
                        'wetland',
                        'stream',
                        'seismichazard',
                        'landslidehazard',
                        'address',
                        'airportnoise',
                        'contamination',
                        'dnrlease',
                         'coalminehazard',
                         'criticaldrainage',
                         'erosionhazard',
                         'landfillbuffer',
                         'hundredyrfloodplain',
                         'steepslopehazard',
                         'speciesofconcern',
                         'sensitiveareatract',
                         'daylightbasement',
                         'fraction',
                        'directionprefix', 'proptype','unbuildable', 'bldgnbr']
    df_clean.drop(columns=columns_to_drop, inplace = True)
    #The columns with Y or N need to be 1 or 0 to model
    df_clean['othernuisances'] = [i.strip() for i in df_clean['othernuisances']]
    df_clean.replace(('Y', 'N'), (1, 0), inplace=True)
    
    #To model the houses that take up more space of thier plot (smaller yard) we need a ratio feature
    #We assume an acturate metric of the house's footprint is the first floor plus any attached garage. This 
    #unfortunatley may not account for detached garages
    df_clean['footprint_ratio']=(df_clean['sqft1stfloor']+df_clean['sqftgarageattached'])/df_clean['sqftlot']
#     df_clean.drop(columns = 'sqft1stfloor', inplace = True)

    return df_clean

def find_highest_correlation(df, dep ='saleprice'):
    try:
        X = df.drop([dep], axis = 1)
    except:
        X = df
    Y = df[dep]
    
    num = 0
    for element in X.columns:
        a = pearsonr(list(X[element]),list(Y))
        if np.abs(a[0]) > num:
            num = a[0]
    for element in X.columns:
        a = pearsonr(list(X[element]),list(Y))
        if np.abs(a[0]) == num:
            return element

In [136]:
df = pullsqldata()
df_clean = clean_data_intial(df)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  method=method,
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [137]:
# drops triplexes from the dataframe
triplex = df_clean.loc[df_clean['nbrlivingunits'] >= 3]
df_clean.drop(triplex.index, inplace= True, axis=0) 
# create a duplex column, value of 1 is duplex, 0 is
df_clean['duplex'] = df_clean['nbrlivingunits'] - 1       

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


In [138]:
df_clean['duplex'].value_counts()

0.0    28305
1.0      352
Name: duplex, dtype: int64

In [139]:
df_clean.shape

(28657, 86)

In [140]:
df['othernuisances'] = [x.strip() for x in df['othernuisances']]
df.replace(('Y', 'N'), (1, 0), inplace=True)
df_clean.shape

(28657, 86)

### Base Model

In [141]:
base_model = df_clean[['saleprice','sqfttotliving', 'sqftlot', 'footprint_ratio', 'duplex']]


In [142]:
Ybase = base_model['saleprice']
Xbase = base_model.drop(['saleprice'], axis=1)

In [143]:
Xbase_int = sm.add_constant(Xbase)
model_base = sm.OLS(Ybase, Xbase_int).fit()
model_base.summary()

0,1,2,3
Dep. Variable:,saleprice,R-squared:,0.304
Model:,OLS,Adj. R-squared:,0.304
Method:,Least Squares,F-statistic:,3125.0
Date:,"Wed, 04 Dec 2019",Prob (F-statistic):,0.0
Time:,15:31:31,Log-Likelihood:,-402620.0
No. Observations:,28657,AIC:,805300.0
Df Residuals:,28652,BIC:,805300.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,1.895e+05,5524.371,34.309,0.000,1.79e+05,2e+05
sqfttotliving,218.2217,2.037,107.114,0.000,214.228,222.215
sqftlot,0.1568,0.033,4.711,0.000,0.092,0.222
footprint_ratio,2.947e+05,1.5e+04,19.602,0.000,2.65e+05,3.24e+05
duplex,6.649e+04,1.64e+04,4.052,0.000,3.43e+04,9.87e+04

0,1,2,3
Omnibus:,3272.926,Durbin-Watson:,0.894
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8767.244
Skew:,0.644,Prob(JB):,0.0
Kurtosis:,5.385,Cond. No.,545000.0


### determining the correlation between 'independent' functions

In [144]:
correlation_cut_off = .5

df_X = df_clean.drop(['saleprice'], axis = 1)



corr = df_X.corr() > correlation_cut_off


corr_list = []
for col in corr.columns:
    corr_v = corr[col][corr[col] == True].index.tolist()
    if len(corr_v) > 1:
        corr_list.append((col, corr_v))
    
corr_list
# for col in corr.columns:
#     if corr[col].sum() > 1:

#         corr_list.append(col)

[('nbrlivingunits', ['nbrlivingunits', 'duplex']),
 ('bldggrade', ['bldggrade', 'sqfttotliving', 'yrbuilt']),
 ('sqft1stfloor', ['sqft1stfloor', 'sqfttotliving']),
 ('sqfttotliving',
  ['bldggrade', 'sqft1stfloor', 'sqfttotliving', 'bedrooms', 'bathfullcount']),
 ('bedrooms', ['sqfttotliving', 'bedrooms']),
 ('bathfullcount', ['sqfttotliving', 'bathfullcount']),
 ('fpmultistory', ['fpmultistory', 'fpadditional']),
 ('fpadditional', ['fpmultistory', 'fpadditional']),
 ('yrbuilt', ['bldggrade', 'yrbuilt']),
 ('olympics', ['olympics', 'pugetsound']),
 ('territorial', ['territorial', 'pugetsound']),
 ('pugetsound', ['olympics', 'territorial', 'pugetsound']),
 ('smalllakerivercreek', ['smalllakerivercreek', 'wfntlocation']),
 ('wfntlocation',
  ['smalllakerivercreek', 'wfntlocation', 'wfntfootage', 'wfntbank']),
 ('wfntfootage', ['wfntlocation', 'wfntfootage', 'wfntbank']),
 ('wfntbank',
  ['wfntlocation',
   'wfntfootage',
   'wfntbank',
   'wfntrestrictedaccess',
   'tidelandshoreland']),

In [145]:
df_zip = df_clean[df_clean['zipcode'].str.contains ('98')]
num = '98075-8010'
df_zip['zipcode'] = df_clean['zipcode'].map(lambda x: x[0:5])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [146]:
df_zip.zipcode.value_counts()

98042    801
98023    714
98038    683
98058    641
98115    590
        ... 
98068      5
98000      2
98009      1
98422      1
98134      1
Name: zipcode, Length: 82, dtype: int64

In [150]:
df_zip_test = df_zip[['saleprice','zipcode']]


df_zip['zipcode'] = pd.Categorical(df_zip['zipcode'])
df_zip_test = pd.get_dummies(df_zip['zipcode'], prefix = 'zip')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


In [151]:
df_test = df_zip[['saleprice']]
df_test.join(df_zip_test, how = 'left')

Xzip = df_zip_test.iloc[:,1:]
Xzip

Unnamed: 0,zip_98001,zip_98002,zip_98003,zip_98004,zip_98005,zip_98006,zip_98007,zip_98008,zip_98009,zip_98010,...,zip_98168,zip_98177,zip_98178,zip_98188,zip_98198,zip_98199,zip_98224,zip_98288,zip_98354,zip_98422
0,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43732,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
43735,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
43736,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
43737,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [152]:
X_int = sm.add_constant(Xzip)
model = sm.OLS(df_zip[['saleprice']], X_int).fit()
model.summary()

  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return pt

0,1,2,3
Dep. Variable:,saleprice,R-squared:,0.493
Model:,OLS,Adj. R-squared:,0.492
Method:,Least Squares,F-statistic:,281.9
Date:,"Wed, 04 Dec 2019",Prob (F-statistic):,0.0
Time:,15:38:06,Log-Likelihood:,-326300.0
No. Observations:,23535,AIC:,652800.0
Df Residuals:,23453,BIC:,653400.0
Df Model:,81,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,1.245e+06,1.8e+05,6.917,0.000,8.92e+05,1.6e+06
zip_98001,-8.63e+05,1.8e+05,-4.784,0.000,-1.22e+06,-5.09e+05
zip_98002,-9.359e+05,1.8e+05,-5.185,0.000,-1.29e+06,-5.82e+05
zip_98003,-8.542e+05,1.8e+05,-4.733,0.000,-1.21e+06,-5e+05
zip_98004,9.01e+04,1.81e+05,0.497,0.619,-2.65e+05,4.46e+05
zip_98005,-1.911e+04,1.81e+05,-0.105,0.916,-3.75e+05,3.36e+05
zip_98006,-1.596e+05,1.8e+05,-0.884,0.376,-5.13e+05,1.94e+05
zip_98007,-3.623e+05,1.81e+05,-1.997,0.046,-7.18e+05,-6646.957
zip_98008,-3.654e+05,1.81e+05,-2.023,0.043,-7.19e+05,-1.14e+04

0,1,2,3
Omnibus:,4467.801,Durbin-Watson:,1.341
Prob(Omnibus):,0.0,Jarque-Bera (JB):,17853.56
Skew:,0.901,Prob(JB):,0.0
Kurtosis:,6.868,Cond. No.,991.0


In [153]:
av_model1 = Xzip.join(base_model, how = 'left')

In [154]:
x_1 = av_model1.drop(['saleprice'], axis = 1)

X_int = sm.add_constant(x_1)
model = sm.OLS(av_model1[['saleprice']], X_int).fit()
model.summary()

0,1,2,3
Dep. Variable:,saleprice,R-squared:,0.674
Model:,OLS,Adj. R-squared:,0.672
Method:,Least Squares,F-statistic:,569.2
Date:,"Wed, 04 Dec 2019",Prob (F-statistic):,0.0
Time:,15:38:09,Log-Likelihood:,-321130.0
No. Observations:,23535,AIC:,642400.0
Df Residuals:,23449,BIC:,643100.0
Df Model:,85,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,3.701e+05,1.45e+05,2.549,0.011,8.55e+04,6.55e+05
zip_98001,-3.472e+05,1.45e+05,-2.388,0.017,-6.32e+05,-6.23e+04
zip_98002,-3.304e+05,1.45e+05,-2.271,0.023,-6.16e+05,-4.53e+04
zip_98003,-3.225e+05,1.45e+05,-2.218,0.027,-6.07e+05,-3.75e+04
zip_98004,4.964e+05,1.46e+05,3.397,0.001,2.1e+05,7.83e+05
zip_98005,3.436e+05,1.46e+05,2.352,0.019,5.72e+04,6.3e+05
zip_98006,2.261e+05,1.45e+05,1.555,0.120,-5.88e+04,5.11e+05
zip_98007,1.299e+05,1.46e+05,0.889,0.374,-1.57e+05,4.17e+05
zip_98008,1.187e+05,1.46e+05,0.815,0.415,-1.67e+05,4.04e+05

0,1,2,3
Omnibus:,3954.503,Durbin-Watson:,1.488
Prob(Omnibus):,0.0,Jarque-Bera (JB):,82207.864
Skew:,-0.129,Prob(JB):,0.0
Kurtosis:,12.152,Cond. No.,58500000.0


### checking to see if footprint ratio is better predictor than lotsize and footprint

In [156]:
for col in df_clean.columns:
    print(col)

saleprice
propertytype
principaluse
saleinstrument
afforestland
afcurrentuseland
afnonprofituse
afhistoricproperty
salereason
propertyclass
nbrlivingunits
zipcode
stories
bldggrade
sqft1stfloor
sqfttotliving
finbasementgrade
sqftgaragebasement
sqftgarageattached
sqftopenporch
sqftenclosedporch
sqftdeck
heatsystem
heatsource
bedrooms
bathhalfcount
bath3qtrcount
bathfullcount
fpsinglestory
fpmultistory
fpfreestanding
fpadditional
yrbuilt
yrrenovated
pcntcomplete
obsolescence
pcntnetcondition
condition
addnlcost
hbuasifvacant
hbuasimproved
presentuse
sqftlot
watersystem
sewersystem
access
streetsurface
restrictiveszshape
inadequateparking
pcntunusable
mtrainier
olympics
cascades
territorial
seattleskyline
pugetsound
lakewashington
lakesammamish
smalllakerivercreek
otherview
wfntlocation
wfntfootage
wfntbank
wfntpoorquality
wfntrestrictedaccess
wfntaccessrights
wfntproximityinfluence
tidelandshoreland
lotdepthfactor
trafficnoise
powerlines
othernuisances
nbrbldgsites
adjacentgolffairway
ad

In [157]:
# r^2 is 9
new_base_model = df_clean[['saleprice','sqfttotliving', 'sqftlot', 'sqft1stfloor', 'duplex']]

av_model2 = Xzip.join(new_base_model, how = 'left')

In [171]:
x_2 = av_model2.drop(['saleprice'], axis = 1)

X_int = sm.add_constant(x_3)
avmodel2 = sm.OLS(av_model3[['saleprice']], X_int).fit()
avmodel2.summary()

0,1,2,3
Dep. Variable:,saleprice,R-squared:,0.674
Model:,OLS,Adj. R-squared:,0.672
Method:,Least Squares,F-statistic:,569.2
Date:,"Wed, 04 Dec 2019",Prob (F-statistic):,0.0
Time:,15:51:25,Log-Likelihood:,-321130.0
No. Observations:,23535,AIC:,642400.0
Df Residuals:,23449,BIC:,643100.0
Df Model:,85,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,3.701e+05,1.45e+05,2.549,0.011,8.55e+04,6.55e+05
zip_98001,-3.472e+05,1.45e+05,-2.388,0.017,-6.32e+05,-6.23e+04
zip_98002,-3.304e+05,1.45e+05,-2.271,0.023,-6.16e+05,-4.53e+04
zip_98003,-3.225e+05,1.45e+05,-2.218,0.027,-6.07e+05,-3.75e+04
zip_98004,4.964e+05,1.46e+05,3.397,0.001,2.1e+05,7.83e+05
zip_98005,3.436e+05,1.46e+05,2.352,0.019,5.72e+04,6.3e+05
zip_98006,2.261e+05,1.45e+05,1.555,0.120,-5.88e+04,5.11e+05
zip_98007,1.299e+05,1.46e+05,0.889,0.374,-1.57e+05,4.17e+05
zip_98008,1.187e+05,1.46e+05,0.815,0.415,-1.67e+05,4.04e+05

0,1,2,3
Omnibus:,3954.503,Durbin-Watson:,1.488
Prob(Omnibus):,0.0,Jarque-Bera (JB):,82207.864
Skew:,-0.129,Prob(JB):,0.0
Kurtosis:,12.152,Cond. No.,58500000.0


no noticable effect when switching the footprint lot ratio with the footprint and the lot individually. Will probably drop all in future test

### determine list of best correlating data

In [172]:
df_clean['bldggrade'] = df_clean['bldggrade'].astype(int) 
df_b = df_clean[['saleprice']]

df['bldggrade'] = pd.Categorical(df['bldggrade'])
df_bldg = pd.get_dummies(df['bldggrade'], prefix = 'bldg')
df_bldg

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,bldg_1.0,bldg_2.0,bldg_3.0,bldg_4.0,bldg_5.0,bldg_6.0,bldg_7.0,bldg_8.0,bldg_9.0,bldg_10.0,bldg_11.0,bldg_12.0,bldg_13.0,bldg_20.0
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
1,0,0,0,0,0,0,1,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,1,0,0,0,0,0,0
3,0,0,0,0,0,0,0,1,0,0,0,0,0,0
4,0,0,0,0,0,0,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43736,0,0,0,0,1,0,0,0,0,0,0,0,0,0
43737,0,0,0,0,0,0,0,0,1,0,0,0,0,0
43738,0,0,0,0,0,0,0,0,1,0,0,0,0,0
43739,0,0,0,0,0,0,0,1,0,0,0,0,0,0


In [174]:
av_model3 = av_model1.copy()
av_model3 = av_model3.join(df_bldg, how = 'left')


In [175]:
x_3 = av_model3.drop(['saleprice'], axis = 1)

X_int = sm.add_constant(x_3)
avmodel3 = sm.OLS(av_model3[['saleprice']], X_int).fit()
avmodel3.summary()

  return self.params / self.bse
  return (a < x) & (x < b)
  return (a < x) & (x < b)
  cond2 = cond0 & (x <= _a)


0,1,2,3
Dep. Variable:,saleprice,R-squared:,0.704
Model:,OLS,Adj. R-squared:,0.703
Method:,Least Squares,F-statistic:,587.7
Date:,"Wed, 04 Dec 2019",Prob (F-statistic):,0.0
Time:,15:53:49,Log-Likelihood:,-319970.0
No. Observations:,23535,AIC:,640100.0
Df Residuals:,23439,BIC:,640900.0
Df Model:,95,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,4.523e+05,1.28e+05,3.541,0.000,2.02e+05,7.03e+05
zip_98001,-2.575e+05,1.38e+05,-1.861,0.063,-5.29e+05,1.37e+04
zip_98002,-2.432e+05,1.38e+05,-1.756,0.079,-5.15e+05,2.82e+04
zip_98003,-2.508e+05,1.38e+05,-1.811,0.070,-5.22e+05,2.06e+04
zip_98004,5.495e+05,1.39e+05,3.950,0.000,2.77e+05,8.22e+05
zip_98005,3.638e+05,1.39e+05,2.615,0.009,9.11e+04,6.36e+05
zip_98006,2.381e+05,1.38e+05,1.720,0.085,-3.32e+04,5.09e+05
zip_98007,1.783e+05,1.39e+05,1.281,0.200,-9.45e+04,4.51e+05
zip_98008,1.865e+05,1.39e+05,1.346,0.178,-8.51e+04,4.58e+05

0,1,2,3
Omnibus:,4334.027,Durbin-Watson:,1.511
Prob(Omnibus):,0.0,Jarque-Bera (JB):,96579.692
Skew:,-0.262,Prob(JB):,0.0
Kurtosis:,12.91,Cond. No.,1.06e+16
