In [3]:
import pandas as pd
import json
from pandas.io.json import json_normalize
from shapely.geometry import shape, Point, MultiPoint
from shapely.ops import nearest_points

In [4]:
with open('./results-7-31-2019.json') as f:
    # flatten json structures
    data = json_normalize(json.load(f))
    # Remove non-Chicago entries
    data = data[data['address'].str.contains('Chicago, IL')]

In [5]:
with open('./CTA_RailStations.geojson') as f:
    stations_geo = json.load(f)

In [6]:
with open('./chicago_neighborhoods.geojson') as f:
    neighborhoods_geo = json.load(f)

In [7]:
# Make the neighborhood json easier to work with
neighborhood_polygons = [{'geometry': shape(feature['geometry']), 'properties': feature['properties']} for feature in neighborhoods_geo['features']]

In [8]:
def get_neighborhood(row):
    lon = row['hdpData.homeInfo.longitude']
    lat = row['hdpData.homeInfo.latitude']
    hoods = [polygon['properties']['pri_neigh'] for polygon in neighborhood_polygons if polygon['geometry'].contains(Point(lon, lat))]
    return hoods[0] if len(hoods) > 0 else None

# Add neighborhoods to entries
data['neighborhood'] = data.apply(get_neighborhood), axis=1)

In [9]:
# Remove anything not in a neighborhood (probably incorrectly labeled as being in Chicago
data = data[data['neighborhood'].notnull()]
data[['address', 'neighborhood']]

Unnamed: 0,address,neighborhood
192,"12841 S Union Ave, Chicago, IL",West Pullman
193,"12816 S Sangamon St, Chicago, IL",West Pullman
194,"901 W 129th Pl, Chicago, IL",West Pullman
199,"915 W Vermont Ave, Chicago, IL",West Pullman
202,"12907 S Normal Ave, Chicago, IL",West Pullman
203,"442 W 129th Pl, Chicago, IL",West Pullman
207,"12766 S Union Ave, Chicago, IL",West Pullman
209,"1000 W 129th Pl, Chicago, IL",West Pullman
215,"552 W 129th Pl, Chicago, IL",West Pullman
216,"12819 S Sangamon St, Chicago, IL",West Pullman


In [105]:
stations = {(point['geometry']['coordinates'][0], point['geometry']['coordinates'][1]): {
    'coords': Point(point['geometry']['coordinates'][0], point['geometry']['coordinates'][1]),
    'line': point['properties']['LINES'],
    'station': point['properties']['Name']
    } for point in stations_geo['features']}
station_points = [station['coords'] for station in stations.values()]

In [106]:
# Get nearest CTA stations
stations_multipoint = MultiPoint(station_points)
def get_station(row):
    lon = row['hdpData.homeInfo.longitude']
    lat = row['hdpData.homeInfo.latitude']
    p = Point(lon, lat)
    np = nearest_points(p, MultiPoint(station_points))
    nearest_station = stations[(np[1].x, np[1].y)]
    return nearest_station
data[['coords', 'line', 'station']] = data.apply(get_station, axis=1, result_type='expand')
data

Unnamed: 0,zpid,id,imgSrc,detailUrl,statusType,statusText,price,festimate,pricePerSqft,address,...,hdpData.homeInfo.rentalRefreshTime,hdpData.homeInfo.group_type,hdpData.homeInfo.grouping_id,hdpData.homeInfo.grouping_name,hdpData.homeInfo.priceSuffix,hdpData.homeInfo.title,neighborhood,coords,line,station
192,4164693,4164693,https://photos.zillowstatic.com/p_e/ISa9dxgv5a...,https://www.zillow.com/homedetails/12841-S-Uni...,FOR_SALE,House for sale,"$58,000",,,"12841 S Union Ave, Chicago, IL",...,,,,,,,West Pullman,POINT (-87.62441474538349 41.72237598330533),Red Line,95/Dan Ryan
193,4163908,4163908,https://photos.zillowstatic.com/p_e/IS66c39lgk...,https://www.zillow.com/homedetails/12816-S-San...,FOR_SALE,Foreclosure,"$49,900",,,"12816 S Sangamon St, Chicago, IL",...,,,,,,,West Pullman,POINT (-87.62441474538349 41.72237598330533),Red Line,95/Dan Ryan
194,4164104,4164104,https://photos.zillowstatic.com/p_e/ISq5l1g32p...,https://www.zillow.com/homedetails/901-W-129th...,FOR_SALE,House for sale,"$799,900",,,"901 W 129th Pl, Chicago, IL",...,,,,,,,West Pullman,POINT (-87.62441474538349 41.72237598330533),Red Line,95/Dan Ryan
199,4163957,4163957,https://photos.zillowstatic.com/p_e/ISrd7gghw4...,https://www.zillow.com/homedetails/915-W-Vermo...,FOR_SALE,House for sale,"$94,900",,,"915 W Vermont Ave, Chicago, IL",...,,,,,,,West Pullman,POINT (-87.62441474538349 41.72237598330533),Red Line,95/Dan Ryan
202,4164866,4164866,https://photos.zillowstatic.com/p_e/IS2rpwruw8...,https://www.zillow.com/homedetails/12907-S-Nor...,FOR_SALE,House for sale,"$59,900",,,"12907 S Normal Ave, Chicago, IL",...,,,,,,,West Pullman,POINT (-87.62441474538349 41.72237598330533),Red Line,95/Dan Ryan
203,4164853,4164853,https://photos.zillowstatic.com/p_e/ISe83ksptl...,https://www.zillow.com/homedetails/442-W-129th...,FOR_SALE,House for sale,"$100,000",,,"442 W 129th Pl, Chicago, IL",...,,,,,,,West Pullman,POINT (-87.62441474538349 41.72237598330533),Red Line,95/Dan Ryan
207,4164437,4164437,https://photos.zillowstatic.com/p_e/ISif7jtlqw...,https://www.zillow.com/homedetails/12766-S-Uni...,FOR_SALE,House for sale,"$84,900",,,"12766 S Union Ave, Chicago, IL",...,,,,,,,West Pullman,POINT (-87.62441474538349 41.72237598330533),Red Line,95/Dan Ryan
209,4163879,4163879,https://photos.zillowstatic.com/p_e/ISewdc03oj...,https://www.zillow.com/homedetails/1000-W-129t...,FOR_SALE,Auction,"$104,701",,,"1000 W 129th Pl, Chicago, IL",...,,,,,,,West Pullman,POINT (-87.62441474538349 41.72237598330533),Red Line,95/Dan Ryan
215,4164776,4164776,https://photos.zillowstatic.com/p_e/ISm6jllt2j...,https://www.zillow.com/homedetails/552-W-129th...,FOR_SALE,Auction,"$92,461",,,"552 W 129th Pl, Chicago, IL",...,,,,,,,West Pullman,POINT (-87.62441474538349 41.72237598330533),Red Line,95/Dan Ryan
216,4163962,4163962,https://photos.zillowstatic.com/p_e/ISnmzcu0i3...,https://www.zillow.com/homedetails/12819-S-San...,FOR_SALE,Foreclosure,"$124,900",,,"12819 S Sangamon St, Chicago, IL",...,,,,,,,West Pullman,POINT (-87.62441474538349 41.72237598330533),Red Line,95/Dan Ryan


In [107]:
# save cleaned data
data.to_csv('results-7-31-2019-cleaned.csv')