# Making affluent neighborhoods financially accessible to low-income earners.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import fiona
import geopandas as gpd
from shapely.geometry import Point
import pyproj
import haversine as hs
import warnings
warnings.filterwarnings('ignore')

# wgs84 = pyproj.Proj(init="epsg:4326")
# InputGrid = pyproj.Proj(projparams = 'NAD83')

### Load housing data

In [2]:
df = pd.read_csv('../data/kc_house_data.csv')

In [3]:
df.head()

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,7129300520,10/13/2014,221900.0,3,1.0,1180,5650,1.0,,NONE,...,7 Average,1180,0.0,1955,0.0,98178,47.5112,-122.257,1340,5650
1,6414100192,12/9/2014,538000.0,3,2.25,2570,7242,2.0,NO,NONE,...,7 Average,2170,400.0,1951,1991.0,98125,47.721,-122.319,1690,7639
2,5631500400,2/25/2015,180000.0,2,1.0,770,10000,1.0,NO,NONE,...,6 Low Average,770,0.0,1933,,98028,47.7379,-122.233,2720,8062
3,2487200875,12/9/2014,604000.0,4,3.0,1960,5000,1.0,NO,NONE,...,7 Average,1050,910.0,1965,0.0,98136,47.5208,-122.393,1360,5000
4,1954400510,2/18/2015,510000.0,3,2.0,1680,8080,1.0,NO,NONE,...,8 Good,1680,0.0,1987,0.0,98074,47.6168,-122.045,1800,7503


In [4]:
df['coord'] = list(zip(df.long,df.lat))
df['geometry'] = df.apply(lambda x: Point((x.long, x.lat)), axis = 1)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21597 entries, 0 to 21596
Data columns (total 23 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             21597 non-null  int64  
 1   date           21597 non-null  object 
 2   price          21597 non-null  float64
 3   bedrooms       21597 non-null  int64  
 4   bathrooms      21597 non-null  float64
 5   sqft_living    21597 non-null  int64  
 6   sqft_lot       21597 non-null  int64  
 7   floors         21597 non-null  float64
 8   waterfront     19221 non-null  object 
 9   view           21534 non-null  object 
 10  condition      21597 non-null  object 
 11  grade          21597 non-null  object 
 12  sqft_above     21597 non-null  int64  
 13  sqft_basement  21597 non-null  object 
 14  yr_built       21597 non-null  int64  
 15  yr_renovated   17755 non-null  float64
 16  zipcode        21597 non-null  int64  
 17  lat            21597 non-null  float64
 18  long  

#### Transform housing data to a geodataframe

In [6]:
df.head()

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15,coord,geometry
0,7129300520,10/13/2014,221900.0,3,1.0,1180,5650,1.0,,NONE,...,0.0,1955,0.0,98178,47.5112,-122.257,1340,5650,"(-122.257, 47.5112)",POINT (-122.257 47.5112)
1,6414100192,12/9/2014,538000.0,3,2.25,2570,7242,2.0,NO,NONE,...,400.0,1951,1991.0,98125,47.721,-122.319,1690,7639,"(-122.319, 47.721)",POINT (-122.319 47.721)
2,5631500400,2/25/2015,180000.0,2,1.0,770,10000,1.0,NO,NONE,...,0.0,1933,,98028,47.7379,-122.233,2720,8062,"(-122.233, 47.7379)",POINT (-122.233 47.7379)
3,2487200875,12/9/2014,604000.0,4,3.0,1960,5000,1.0,NO,NONE,...,910.0,1965,0.0,98136,47.5208,-122.393,1360,5000,"(-122.393, 47.5208)",POINT (-122.393 47.5208)
4,1954400510,2/18/2015,510000.0,3,2.0,1680,8080,1.0,NO,NONE,...,0.0,1987,0.0,98074,47.6168,-122.045,1800,7503,"(-122.045, 47.6168)",POINT (-122.045 47.6168)


In [7]:
housing_gdf = gpd.GeoDataFrame(df)
housing_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 21597 entries, 0 to 21596
Data columns (total 23 columns):
 #   Column         Non-Null Count  Dtype   
---  ------         --------------  -----   
 0   id             21597 non-null  int64   
 1   date           21597 non-null  object  
 2   price          21597 non-null  float64 
 3   bedrooms       21597 non-null  int64   
 4   bathrooms      21597 non-null  float64 
 5   sqft_living    21597 non-null  int64   
 6   sqft_lot       21597 non-null  int64   
 7   floors         21597 non-null  float64 
 8   waterfront     19221 non-null  object  
 9   view           21534 non-null  object  
 10  condition      21597 non-null  object  
 11  grade          21597 non-null  object  
 12  sqft_above     21597 non-null  int64   
 13  sqft_basement  21597 non-null  object  
 14  yr_built       21597 non-null  int64   
 15  yr_renovated   17755 non-null  float64 
 16  zipcode        21597 non-null  int64   
 17  lat            21597 no

### Load shapefile data for cities in King county

In [37]:
kc_cities_gdf = gpd.read_file('../data/Cities_and_Unincorporated_King_County___city_kc_area/Cities_and_Unincorporated_King_County___city_kc_area.shp')
kc_cities_gdf.to_crs(epsg = 32148, inplace = True)

In [38]:
kc_cities_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   OBJECTID    100 non-null    int64   
 1   JURIS       100 non-null    object  
 2   CITYNAME    100 non-null    object  
 3   SHAPE_Leng  100 non-null    float64 
 4   SHAPE_Area  100 non-null    float64 
 5   geometry    100 non-null    geometry
dtypes: float64(2), geometry(1), int64(1), object(2)
memory usage: 4.8+ KB


In [39]:
kc_cities_gdf.head()

Unnamed: 0,OBJECTID,JURIS,CITYNAME,SHAPE_Leng,SHAPE_Area,geometry
0,1,AL,Algona,38144.667179,36411050.0,"POLYGON ((393586.141 33607.604, 393361.732 331..."
1,2,AU,Auburn,209231.046799,785689200.0,"POLYGON ((394617.054 40308.323, 394823.902 403..."
2,3,BA,Beaux Arts,6720.785802,2274707.0,"POLYGON ((396999.450 66157.055, 396999.687 661..."
3,4,BD,Black Diamond,6629.569588,2412697.0,"POLYGON ((414627.020 32686.155, 414626.838 326..."
4,5,BD,Black Diamond,85786.107116,198366100.0,"POLYGON ((408171.066 38935.667, 408505.289 389..."


"Unincorporated" cities in King County is labeled "King County" in the dataset. King County's official definition of Unincorporated cities is as follows:

Rural unincorporated King County covers central and eastern King County and Vashon Island – areas outside the Urban Growth Boundary. These areas are desig- nated Rural, Agricultural or Forest Resource by the King County Comprehensive Plan.

We will rename unincorporated cities of king county to "Uincorporated City"

Data source: https://gis-kingcounty.opendata.arcgis.com/datasets/kingcounty::cities-and-unincorporated-king-county-city-kc-area/explore?location=47.430582%2C-121.809200%2C10.02

In [40]:
kc_cities_gdf['CITYNAME'].value_counts()

King County         38
Enumclaw             7
Kent                 6
Carnation            3
Snoqualmie           3
Redmond              3
Black Diamond        2
Bothell              2
Des Moines           2
Duvall               2
Renton               2
Skykomish            2
Newcastle            1
Normandy Park        1
Pacific              1
Algona               1
Seattle              1
Shoreline            1
Maple Valley         1
Sammamish            1
SeaTac               1
Tukwila              1
Woodinville          1
North Bend           1
Lake Forest Park     1
Milton               1
Mercer Island        1
Medina               1
Auburn               1
Kenmore              1
Kirkland             1
Issaquah             1
Hunts Point          1
Federal Way          1
Covington            1
Clyde Hill           1
Burien               1
Bellevue             1
Beaux Arts           1
Yarrow Point         1
Name: CITYNAME, dtype: int64

In [41]:
kc_cities_gdf['CITYNAME'] = kc_cities_gdf['CITYNAME'].str.replace('King County', 'Unincorporated City')
kc_cities_gdf['CITYNAME'].value_counts()

Unincorporated City    38
Enumclaw                7
Kent                    6
Carnation               3
Snoqualmie              3
Redmond                 3
Black Diamond           2
Bothell                 2
Des Moines              2
Duvall                  2
Renton                  2
Skykomish               2
Newcastle               1
Normandy Park           1
Pacific                 1
Algona                  1
Seattle                 1
Shoreline               1
Maple Valley            1
Sammamish               1
SeaTac                  1
Tukwila                 1
Woodinville             1
North Bend              1
Lake Forest Park        1
Milton                  1
Mercer Island           1
Medina                  1
Auburn                  1
Kenmore                 1
Kirkland                1
Issaquah                1
Hunts Point             1
Federal Way             1
Covington               1
Clyde Hill              1
Burien                  1
Bellevue                1
Beaux Arts  

In [42]:
kc_cities_gdf

Unnamed: 0,OBJECTID,JURIS,CITYNAME,SHAPE_Leng,SHAPE_Area,geometry
0,1,AL,Algona,38144.667179,3.641105e+07,"POLYGON ((393586.141 33607.604, 393361.732 331..."
1,2,AU,Auburn,209231.046799,7.856892e+08,"POLYGON ((394617.054 40308.323, 394823.902 403..."
2,3,BA,Beaux Arts,6720.785802,2.274707e+06,"POLYGON ((396999.450 66157.055, 396999.687 661..."
3,4,BD,Black Diamond,6629.569588,2.412697e+06,"POLYGON ((414627.020 32686.155, 414626.838 326..."
4,5,BD,Black Diamond,85786.107116,1.983661e+08,"POLYGON ((408171.066 38935.667, 408505.289 389..."
...,...,...,...,...,...,...
95,96,SN,Snoqualmie,114948.303878,2.010589e+08,"POLYGON ((425098.899 61501.002, 425112.006 614..."
96,97,ST,SeaTac,101640.458450,2.866419e+08,"POLYGON ((389228.352 55399.807, 389248.975 553..."
97,98,TU,Tukwila,125946.874691,2.683084e+08,"POLYGON ((390193.459 58404.853, 390176.499 577..."
98,99,WO,Woodinville,89644.620707,1.577311e+08,"POLYGON ((404098.674 86275.309, 404098.232 862..."


In [43]:
kc_cities_gdf['centroid'] = kc_cities_gdf.centroid

In [44]:
kc_cities_gdf.head()

Unnamed: 0,OBJECTID,JURIS,CITYNAME,SHAPE_Leng,SHAPE_Area,geometry,centroid
0,1,AL,Algona,38144.667179,36411050.0,"POLYGON ((393586.141 33607.604, 393361.732 331...",POINT (392791.391 32339.325)
1,2,AU,Auburn,209231.046799,785689200.0,"POLYGON ((394617.054 40308.323, 394823.902 403...",POINT (395834.289 35069.659)
2,3,BA,Beaux Arts,6720.785802,2274707.0,"POLYGON ((396999.450 66157.055, 396999.687 661...",POINT (397138.139 65993.350)
3,4,BD,Black Diamond,6629.569588,2412697.0,"POLYGON ((414627.020 32686.155, 414626.838 326...",POINT (414505.834 32499.483)
4,5,BD,Black Diamond,85786.107116,198366100.0,"POLYGON ((408171.066 38935.667, 408505.289 389...",POINT (410359.775 35532.648)


In [45]:
#Seattle centroid

seattle_centroid = kc_cities_gdf[kc_cities_gdf['CITYNAME']=='Seattle']
seattle_centroid = seattle_centroid.iloc[0,-1]
type(seattle_centroid)

shapely.geometry.point.Point

In [47]:
kc_cities_gdf['distance_seattle'] = kc_cities_gdf['centroid'].distance(seattle_centroid)
kc_cities_gdf.head()

Unnamed: 0,OBJECTID,JURIS,CITYNAME,SHAPE_Leng,SHAPE_Area,geometry,centroid,distance_seattle
0,1,AL,Algona,38144.667179,36411050.0,"POLYGON ((393586.141 33607.604, 393361.732 331...",POINT (392791.391 32339.325),38210.789361
1,2,AU,Auburn,209231.046799,785689200.0,"POLYGON ((394617.054 40308.323, 394823.902 403...",POINT (395834.289 35069.659),36245.361339
2,3,BA,Beaux Arts,6720.785802,2274707.0,"POLYGON ((396999.450 66157.055, 396999.687 661...",POINT (397138.139 65993.350),11935.852118
3,4,BD,Black Diamond,6629.569588,2412697.0,"POLYGON ((414627.020 32686.155, 414626.838 326...",POINT (414505.834 32499.483),47120.537344
4,5,BD,Black Diamond,85786.107116,198366100.0,"POLYGON ((408171.066 38935.667, 408505.289 389...",POINT (410359.775 35532.648),42216.811494


In [49]:
kc_cities_gdf[kc_cities_gdf['CITYNAME'] == 'Seattle']

Unnamed: 0,OBJECTID,JURIS,CITYNAME,SHAPE_Leng,SHAPE_Area,geometry,centroid,distance_seattle
88,89,SE,Seattle,341855.700552,3997737000.0,"POLYGON ((393111.096 60469.629, 393186.531 599...",POINT (385865.708 69917.236),0.0


In [48]:
# Check what the distance units are in
print(kc_cities_gdf.crs.axis_info[0].unit_name)

#Check what 
print(kc_cities_gdf.crs)

metre
epsg:32148


### 

### Add city column to housing data frame

In [50]:
housing_city_gdf = None
temp_gdf = None

temp_gdf = housing_gdf[housing_gdf.geometry.within(kc_cities_gdf['geometry'][0])]
temp_gdf['city'] = kc_cities_gdf['CITYNAME'][0]
housing_city_gdf = temp_gdf

for i in range(1,len(kc_cities_gdf)):
    temp_df=None
    temp_gdf = housing_gdf[housing_gdf.geometry.within(kc_cities_gdf['geometry'][i])]
    temp_gdf['city'] = kc_cities_gdf['CITYNAME'][i]
    housing_city_gdf = pd.concat([housing_city_gdf,temp_gdf])
    
housing_city_gdf.head()

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15,coord,geometry,city


### [Seattle Neighborhood Data](https://data-seattlecitygis.opendata.arcgis.com/datasets/neighborhood-map-atlas-districts/explore?location=47.628714%2C-122.338313%2C11.43])

In [51]:
neighborhood_gdf = gpd.read_file('../data/Neighborhood_Map_Atlas_Districts/Neighborhood_Map_Atlas_Districts.shp')

In [52]:
neighborhood_gdf.head()

Unnamed: 0,OBJECTID,L_HOOD,S_HOOD_ALT,Shape__Are,Shape__Len,geometry
0,28,Ballard,"Loyal Heights, Adams, Whittier Heights, West W...",104603500.0,59248.568772,"POLYGON ((-122.40266 47.69602, -122.40236 47.6..."
1,29,Beacon Hill,"North Beacon Hill, Mid-Beacon Hill, South Beac...",147989400.0,78783.502732,"POLYGON ((-122.31281 47.59581, -122.31216 47.5..."
2,30,Capitol Hill,"Portage Bay, Montlake, Madison Park, Broadway,...",106856200.0,75147.170565,"MULTIPOLYGON (((-122.29599 47.64184, -122.2960..."
3,31,Cascade,"Westlake, Eastlake, South Lake Union",27179200.0,48680.288698,"POLYGON ((-122.32124 47.65231, -122.32113 47.6..."
4,32,Central Area,"Madrona, Harrison/Denny-Blaine, Minor, Leschi,...",95272580.0,52688.475894,"POLYGON ((-122.27911 47.62786, -122.27915 47.6..."


In [53]:
neighborhood_gdf['L_HOOD'].unique()

array(['Ballard', 'Beacon Hill', 'Capitol Hill', 'Cascade',
       'Central Area', 'Delridge', 'Downtown', 'Interbay', 'Lake City',
       'Magnolia', 'Greater Duwamish', 'Northeast', 'Northgate',
       'Queen Anne', 'Rainier Valley', 'Seward Park', 'West Seattle',
       'Northwest', 'North Central', 'University District'], dtype=object)

In [54]:
housing_city_neigh_gdf = None
temp_df=None

temp_gdf = housing_city_gdf[housing_city_gdf.geometry.within(neighborhood_gdf['geometry'][0])]
temp_gdf['neighborhood'] = neighborhood_gdf['L_HOOD'][0]
housing_city_neigh_gdf = temp_gdf

for i in range(1,len(neighborhood_gdf)):
    temp_df=None
    temp_gdf = housing_city_gdf[housing_city_gdf.geometry.within(kc_cities_gdf['geometry'][i])]
    temp_gdf['neighborhood'] = neighborhood_gdf['L_HOOD'][i]
    housing_city_neigh_gdf = pd.concat([housing_city_neigh_gdf,temp_gdf])
    
housing_city_neigh_gdf.head()

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15,coord,geometry,city,neighborhood


In [55]:
housing_neigh = housing_city_neigh_gdf[['id','neighborhood']]

In [56]:
housing_neigh

Unnamed: 0,id,neighborhood


In [57]:
housing_city_gdf.head()

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15,coord,geometry,city


In [58]:
housing_neigh_gdf = housing_city_gdf.join(housing_neigh,on='id', how='left',rsuffix = 'n_')
housing_neigh_gdf = housing_neigh_gdf.drop('idn_',axis=1)

In [59]:
housing_neigh_gdf['in_seattle'] = housing_neigh_gdf['city'] == 'Seattle'

In [60]:
housing_gdf_complete = housing_neigh_gdf

In [61]:
housing_gdf_complete.to_csv('housing_gdf_complete.csv.gz', index=True) 

In [62]:
neigh

NameError: name 'neigh' is not defined

#### Non-working Function Dumps

In [None]:
# for row in kc_cities_gdf.itertuples():
#     print(type(row[6]))
# #     print(row[3])

In [None]:
# def within_kc_city(coord, poly, city):
#     if coord.within(poly): #row[6]
#         cityname = city #row[3]
#     else:
#         cityname = city
#     return cityname





In [None]:

# for housing_row in housing_gdf.itertuples():
#     for city_row in kc_cities_gdf.itertuples():
#         housing_gdf['city'][housing_row[0]] = within_kc_city(housing_row[-2],city_row[6],city_row[3])