# Making affluent neighborhoods financially accessible to low-income earners.

In [127]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import fiona
import geopandas as gpd
from shapely.geometry import Point
import pyproj
import haversine as hs
import warnings
warnings.filterwarnings('ignore')

# wgs84 = pyproj.Proj(init="epsg:4326")
# InputGrid = pyproj.Proj(projparams = 'NAD83')

### Load housing data

In [128]:
df = pd.read_csv('../data/kc_house_data.csv')

In [129]:
df.head()

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,7129300520,10/13/2014,221900.0,3,1.0,1180,5650,1.0,,NONE,...,7 Average,1180,0.0,1955,0.0,98178,47.5112,-122.257,1340,5650
1,6414100192,12/9/2014,538000.0,3,2.25,2570,7242,2.0,NO,NONE,...,7 Average,2170,400.0,1951,1991.0,98125,47.721,-122.319,1690,7639
2,5631500400,2/25/2015,180000.0,2,1.0,770,10000,1.0,NO,NONE,...,6 Low Average,770,0.0,1933,,98028,47.7379,-122.233,2720,8062
3,2487200875,12/9/2014,604000.0,4,3.0,1960,5000,1.0,NO,NONE,...,7 Average,1050,910.0,1965,0.0,98136,47.5208,-122.393,1360,5000
4,1954400510,2/18/2015,510000.0,3,2.0,1680,8080,1.0,NO,NONE,...,8 Good,1680,0.0,1987,0.0,98074,47.6168,-122.045,1800,7503


In [130]:
df['coord'] = list(zip(df.long,df.lat))
df['geometry'] = df.apply(lambda x: Point((x.long, x.lat)), axis = 1)

In [131]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21597 entries, 0 to 21596
Data columns (total 23 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             21597 non-null  int64  
 1   date           21597 non-null  object 
 2   price          21597 non-null  float64
 3   bedrooms       21597 non-null  int64  
 4   bathrooms      21597 non-null  float64
 5   sqft_living    21597 non-null  int64  
 6   sqft_lot       21597 non-null  int64  
 7   floors         21597 non-null  float64
 8   waterfront     19221 non-null  object 
 9   view           21534 non-null  object 
 10  condition      21597 non-null  object 
 11  grade          21597 non-null  object 
 12  sqft_above     21597 non-null  int64  
 13  sqft_basement  21597 non-null  object 
 14  yr_built       21597 non-null  int64  
 15  yr_renovated   17755 non-null  float64
 16  zipcode        21597 non-null  int64  
 17  lat            21597 non-null  float64
 18  long  

#### Transform housing data to a geodataframe

In [132]:
df.head()

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15,coord,geometry
0,7129300520,10/13/2014,221900.0,3,1.0,1180,5650,1.0,,NONE,...,0.0,1955,0.0,98178,47.5112,-122.257,1340,5650,"(-122.257, 47.5112)",POINT (-122.257 47.5112)
1,6414100192,12/9/2014,538000.0,3,2.25,2570,7242,2.0,NO,NONE,...,400.0,1951,1991.0,98125,47.721,-122.319,1690,7639,"(-122.319, 47.721)",POINT (-122.319 47.721)
2,5631500400,2/25/2015,180000.0,2,1.0,770,10000,1.0,NO,NONE,...,0.0,1933,,98028,47.7379,-122.233,2720,8062,"(-122.233, 47.7379)",POINT (-122.233 47.7379)
3,2487200875,12/9/2014,604000.0,4,3.0,1960,5000,1.0,NO,NONE,...,910.0,1965,0.0,98136,47.5208,-122.393,1360,5000,"(-122.393, 47.5208)",POINT (-122.393 47.5208)
4,1954400510,2/18/2015,510000.0,3,2.0,1680,8080,1.0,NO,NONE,...,0.0,1987,0.0,98074,47.6168,-122.045,1800,7503,"(-122.045, 47.6168)",POINT (-122.045 47.6168)


In [133]:
housing_gdf = gpd.GeoDataFrame(df)
housing_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 21597 entries, 0 to 21596
Data columns (total 23 columns):
 #   Column         Non-Null Count  Dtype   
---  ------         --------------  -----   
 0   id             21597 non-null  int64   
 1   date           21597 non-null  object  
 2   price          21597 non-null  float64 
 3   bedrooms       21597 non-null  int64   
 4   bathrooms      21597 non-null  float64 
 5   sqft_living    21597 non-null  int64   
 6   sqft_lot       21597 non-null  int64   
 7   floors         21597 non-null  float64 
 8   waterfront     19221 non-null  object  
 9   view           21534 non-null  object  
 10  condition      21597 non-null  object  
 11  grade          21597 non-null  object  
 12  sqft_above     21597 non-null  int64   
 13  sqft_basement  21597 non-null  object  
 14  yr_built       21597 non-null  int64   
 15  yr_renovated   17755 non-null  float64 
 16  zipcode        21597 non-null  int64   
 17  lat            21597 no

### Load shapefile data for cities in King county

In [62]:
kc_cities_gdf = gpd.read_file('../data/Cities_and_Unincorporated_King_County___city_kc_area/Cities_and_Unincorporated_King_County___city_kc_area.shp')

In [63]:
kc_cities_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   OBJECTID    100 non-null    int64   
 1   JURIS       100 non-null    object  
 2   CITYNAME    100 non-null    object  
 3   SHAPE_Leng  100 non-null    float64 
 4   SHAPE_Area  100 non-null    float64 
 5   geometry    100 non-null    geometry
dtypes: float64(2), geometry(1), int64(1), object(2)
memory usage: 4.8+ KB


In [64]:
kc_cities_gdf.head()

Unnamed: 0,OBJECTID,JURIS,CITYNAME,SHAPE_Leng,SHAPE_Area,geometry
0,1,AL,Algona,38144.667179,36411050.0,"POLYGON ((-122.24022 47.29353, -122.24307 47.2..."
1,2,AU,Auburn,209231.046799,785689200.0,"POLYGON ((-122.22820 47.35395, -122.22546 47.3..."
2,3,BA,Beaux Arts,6720.785802,2274707.0,"POLYGON ((-122.20276 47.58679, -122.20276 47.5..."
3,4,BD,Black Diamond,6629.569588,2412697.0,"POLYGON ((-121.96191 47.28836, -121.96191 47.2..."
4,5,BD,Black Diamond,85786.107116,198366100.0,"POLYGON ((-122.04854 47.34368, -122.04412 47.3..."


"Unincorporated" cities in King County is labeled "King County" in the dataset. King County's official definition of Unincorporated cities is as follows:

Rural unincorporated King County covers central and eastern King County and Vashon Island – areas outside the Urban Growth Boundary. These areas are desig- nated Rural, Agricultural or Forest Resource by the King County Comprehensive Plan.

We will rename unincorporated cities of king county to "Uincorporated City"

Data source: https://gis-kingcounty.opendata.arcgis.com/datasets/kingcounty::cities-and-unincorporated-king-county-city-kc-area/explore?location=47.430582%2C-121.809200%2C10.02

In [65]:
kc_cities_gdf['CITYNAME'].value_counts()

King County         38
Enumclaw             7
Kent                 6
Carnation            3
Snoqualmie           3
Redmond              3
Black Diamond        2
Bothell              2
Des Moines           2
Duvall               2
Renton               2
Skykomish            2
Newcastle            1
Normandy Park        1
Pacific              1
Algona               1
Seattle              1
Shoreline            1
Maple Valley         1
Sammamish            1
SeaTac               1
Tukwila              1
Woodinville          1
North Bend           1
Lake Forest Park     1
Milton               1
Mercer Island        1
Medina               1
Auburn               1
Kenmore              1
Kirkland             1
Issaquah             1
Hunts Point          1
Federal Way          1
Covington            1
Clyde Hill           1
Burien               1
Bellevue             1
Beaux Arts           1
Yarrow Point         1
Name: CITYNAME, dtype: int64

In [66]:
kc_cities_gdf['CITYNAME'] = kc_cities_gdf['CITYNAME'].str.replace('King County', 'Unincorporated City')
kc_cities_gdf['CITYNAME'].value_counts()

Unincorporated City    38
Enumclaw                7
Kent                    6
Carnation               3
Snoqualmie              3
Redmond                 3
Black Diamond           2
Bothell                 2
Des Moines              2
Duvall                  2
Renton                  2
Skykomish               2
Newcastle               1
Normandy Park           1
Pacific                 1
Algona                  1
Seattle                 1
Shoreline               1
Maple Valley            1
Sammamish               1
SeaTac                  1
Tukwila                 1
Woodinville             1
North Bend              1
Lake Forest Park        1
Milton                  1
Mercer Island           1
Medina                  1
Auburn                  1
Kenmore                 1
Kirkland                1
Issaquah                1
Hunts Point             1
Federal Way             1
Covington               1
Clyde Hill              1
Burien                  1
Bellevue                1
Beaux Arts  

In [77]:
kc_cities_gdf

Unnamed: 0,OBJECTID,JURIS,CITYNAME,SHAPE_Leng,SHAPE_Area,geometry
0,1,AL,Algona,38144.667179,3.641105e+07,"POLYGON ((-122.24022 47.29353, -122.24307 47.2..."
1,2,AU,Auburn,209231.046799,7.856892e+08,"POLYGON ((-122.22820 47.35395, -122.22546 47.3..."
2,3,BA,Beaux Arts,6720.785802,2.274707e+06,"POLYGON ((-122.20276 47.58679, -122.20276 47.5..."
3,4,BD,Black Diamond,6629.569588,2.412697e+06,"POLYGON ((-121.96191 47.28836, -121.96191 47.2..."
4,5,BD,Black Diamond,85786.107116,1.983661e+08,"POLYGON ((-122.04854 47.34368, -122.04412 47.3..."
...,...,...,...,...,...,...
95,96,SN,Snoqualmie,114948.303878,2.010589e+08,"POLYGON ((-121.82842 47.54880, -121.82824 47.5..."
96,97,ST,SeaTac,101640.458450,2.866419e+08,"POLYGON ((-122.30332 47.48876, -122.30305 47.4..."
97,98,TU,Tukwila,125946.874691,2.683084e+08,"POLYGON ((-122.29128 47.51595, -122.29134 47.5..."
98,99,WO,Woodinville,89644.620707,1.577311e+08,"POLYGON ((-122.11284 47.76882, -122.11284 47.7..."


In [121]:
# for row in kc_cities_gdf.itertuples():
#     print(type(row[6]))
# #     print(row[3])

In [114]:
# def within_kc_city(coord, poly, city):
#     if coord.within(poly): #row[6]
#         cityname = city #row[3]
#     else:
#         cityname = city
#     return cityname





In [101]:
housing_gdf.head()

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15,coord,geometry,city
0,7129300520,10/13/2014,221900.0,3,1.0,1180,5650,1.0,,NONE,...,1955,0.0,98178,47.5112,-122.257,1340,5650,"(-122.257, 47.5112)",POINT (-122.25700 47.51120),
1,6414100192,12/9/2014,538000.0,3,2.25,2570,7242,2.0,NO,NONE,...,1951,1991.0,98125,47.721,-122.319,1690,7639,"(-122.319, 47.721)",POINT (-122.31900 47.72100),
2,5631500400,2/25/2015,180000.0,2,1.0,770,10000,1.0,NO,NONE,...,1933,,98028,47.7379,-122.233,2720,8062,"(-122.233, 47.7379)",POINT (-122.23300 47.73790),
3,2487200875,12/9/2014,604000.0,4,3.0,1960,5000,1.0,NO,NONE,...,1965,0.0,98136,47.5208,-122.393,1360,5000,"(-122.393, 47.5208)",POINT (-122.39300 47.52080),
4,1954400510,2/18/2015,510000.0,3,2.0,1680,8080,1.0,NO,NONE,...,1987,0.0,98074,47.6168,-122.045,1800,7503,"(-122.045, 47.6168)",POINT (-122.04500 47.61680),


In [118]:

# for housing_row in housing_gdf.itertuples():
#     for city_row in kc_cities_gdf.itertuples():
#         housing_gdf['city'][housing_row[0]] = within_kc_city(housing_row[-2],city_row[6],city_row[3])

In [155]:
housing_city_gdf = None
temp_gdf = None

temp_gdf = housing_gdf[housing_gdf.geometry.within(kc_cities_gdf['geometry'][0])]
temp_gdf['city'] = kc_cities_gdf['CITYNAME'][0]
housing_city_gdf = temp_gdf

for i in range(1,len(kc_cities_gdf)):
    temp_df=None
    temp_gdf = housing_gdf[housing_gdf.geometry.within(kc_cities_gdf['geometry'][i])]
    temp_gdf['city'] = kc_cities_gdf['CITYNAME'][i]
    housing_city_gdf = pd.concat([housing_city_gdf,temp_gdf])
    
housing_city_gdf.head()

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15,coord,geometry,city
995,8856004730,9/17/2014,199950.0,2,2.75,1590,20917,1.5,NO,NONE,...,1920,,98001,47.2786,-122.25,1310,6000,"(-122.25, 47.2786)",POINT (-122.25000 47.27860),Algona
1689,3356406510,5/30/2014,196440.0,3,2.0,1560,7352,1.0,NO,NONE,...,1992,0.0,98001,47.2804,-122.251,1120,7950,"(-122.251, 47.2804)",POINT (-122.25100 47.28040),Algona
1798,3356404330,11/19/2014,206000.0,4,2.0,1720,7560,1.0,NO,NONE,...,1959,0.0,98001,47.2845,-122.25,1750,7988,"(-122.25, 47.2845)",POINT (-122.25000 47.28450),Algona
2357,128500260,5/8/2014,262000.0,4,2.5,2020,6236,2.0,NO,NONE,...,2002,,98001,47.2796,-122.247,1940,5076,"(-122.247, 47.2796)",POINT (-122.24700 47.27960),Algona
2404,8856004400,9/2/2014,235000.0,4,1.0,1610,24000,1.5,NO,NONE,...,1947,0.0,98001,47.2751,-122.252,1270,9600,"(-122.252, 47.2751)",POINT (-122.25200 47.27510),Algona


### [Seattle Neighborhood Data](https://data-seattlecitygis.opendata.arcgis.com/datasets/neighborhood-map-atlas-districts/explore?location=47.628714%2C-122.338313%2C11.43])

In [147]:
neighborhood_gdf = gpd.read_file('../data/Neighborhood_Map_Atlas_Districts/Neighborhood_Map_Atlas_Districts.shp')

In [148]:
neighborhood_gdf.head()

Unnamed: 0,OBJECTID,L_HOOD,S_HOOD_ALT,Shape__Are,Shape__Len,geometry
0,28,Ballard,"Loyal Heights, Adams, Whittier Heights, West W...",104603500.0,59248.568772,"POLYGON ((-122.40266 47.69602, -122.40236 47.6..."
1,29,Beacon Hill,"North Beacon Hill, Mid-Beacon Hill, South Beac...",147989400.0,78783.502732,"POLYGON ((-122.31281 47.59581, -122.31216 47.5..."
2,30,Capitol Hill,"Portage Bay, Montlake, Madison Park, Broadway,...",106856200.0,75147.170565,"MULTIPOLYGON (((-122.29599 47.64184, -122.2960..."
3,31,Cascade,"Westlake, Eastlake, South Lake Union",27179200.0,48680.288698,"POLYGON ((-122.32124 47.65231, -122.32113 47.6..."
4,32,Central Area,"Madrona, Harrison/Denny-Blaine, Minor, Leschi,...",95272580.0,52688.475894,"POLYGON ((-122.27911 47.62786, -122.27915 47.6..."


In [150]:
neighborhood_gdf['L_HOOD'].unique()

array(['Ballard', 'Beacon Hill', 'Capitol Hill', 'Cascade',
       'Central Area', 'Delridge', 'Downtown', 'Interbay', 'Lake City',
       'Magnolia', 'Greater Duwamish', 'Northeast', 'Northgate',
       'Queen Anne', 'Rainier Valley', 'Seward Park', 'West Seattle',
       'Northwest', 'North Central', 'University District'], dtype=object)

In [156]:
housing_city_neigh_gdf = None
temp_df=None

temp_gdf = housing_city_gdf[housing_city_gdf.geometry.within(neighborhood_gdf['geometry'][0])]
temp_gdf['neighborhood'] = neighborhood_gdf['L_HOOD'][0]
housing_city_neigh_gdf = temp_gdf

for i in range(1,len(neighborhood_gdf)):
    temp_df=None
    temp_gdf = housing_city_gdf[housing_city_gdf.geometry.within(kc_cities_gdf['geometry'][i])]
    temp_gdf['neighborhood'] = neighborhood_gdf['L_HOOD'][i]
    housing_city_neigh_gdf = pd.concat([housing_city_neigh_gdf,temp_gdf])
    
housing_city_neigh_gdf.head()

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15,coord,geometry,city,neighborhood
14,1175000570,3/12/2015,530000.0,5,2.0,1810,4850,1.5,NO,NONE,...,0.0,98107,47.67,-122.394,1360,4850,"(-122.394, 47.67)",POINT (-122.39400 47.67000),Seattle,Ballard
32,461000390,6/24/2014,687500.0,4,1.75,2330,5000,1.5,NO,NONE,...,0.0,98117,47.6823,-122.368,1460,5000,"(-122.368, 47.6823)",POINT (-122.36800 47.68230),Seattle,Ballard
33,7589200193,11/10/2014,535000.0,3,1.0,1090,3000,1.5,NO,NONE,...,0.0,98117,47.6889,-122.375,1570,5080,"(-122.375, 47.6889)",POINT (-122.37500 47.68890),Seattle,Ballard
37,2768000400,12/30/2014,640000.0,4,2.0,2360,6000,2.0,NO,NONE,...,0.0,98107,47.6702,-122.362,1730,4700,"(-122.362, 47.6702)",POINT (-122.36200 47.67020),Seattle,Ballard
52,7518505990,12/31/2014,600000.0,3,1.75,1410,4080,1.0,NO,NONE,...,,98117,47.6808,-122.384,1410,4080,"(-122.384, 47.6808)",POINT (-122.38400 47.68080),Seattle,Ballard


In [157]:
housing_neigh = housing_city_neigh_gdf[['id','neighborhood']]

In [158]:
housing_neigh

Unnamed: 0,id,neighborhood
14,1175000570,Ballard
32,461000390,Ballard
33,7589200193,Ballard
37,2768000400,Ballard
52,7518505990,Ballard
...,...,...
21281,7787920080,Northwest
21347,3782760080,Northwest
21432,255550100,Northwest
21445,7787920230,Northwest


In [160]:
housing_city_gdf.head()

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15,coord,geometry,city
995,8856004730,9/17/2014,199950.0,2,2.75,1590,20917,1.5,NO,NONE,...,1920,,98001,47.2786,-122.25,1310,6000,"(-122.25, 47.2786)",POINT (-122.25000 47.27860),Algona
1689,3356406510,5/30/2014,196440.0,3,2.0,1560,7352,1.0,NO,NONE,...,1992,0.0,98001,47.2804,-122.251,1120,7950,"(-122.251, 47.2804)",POINT (-122.25100 47.28040),Algona
1798,3356404330,11/19/2014,206000.0,4,2.0,1720,7560,1.0,NO,NONE,...,1959,0.0,98001,47.2845,-122.25,1750,7988,"(-122.25, 47.2845)",POINT (-122.25000 47.28450),Algona
2357,128500260,5/8/2014,262000.0,4,2.5,2020,6236,2.0,NO,NONE,...,2002,,98001,47.2796,-122.247,1940,5076,"(-122.247, 47.2796)",POINT (-122.24700 47.27960),Algona
2404,8856004400,9/2/2014,235000.0,4,1.0,1610,24000,1.5,NO,NONE,...,1947,0.0,98001,47.2751,-122.252,1270,9600,"(-122.252, 47.2751)",POINT (-122.25200 47.27510),Algona


In [165]:
housing_neigh_gdf = housing_city_gdf.join(housing_neigh,on='id', how='left',rsuffix = 'n_')
housing_neigh_gdf = housing_neigh_gdf.drop('idn_',axis=1)

In [169]:
housing_neigh_gdf['in_seattle'] = housing_neigh_gdf['city'] == 'Seattle'

In [173]:
housing_gdf_complete = housing_neigh_gdf

In [174]:
housing_gdf_complete.to_csv('housing_gdf_complete.csv.gz', index=True) 