In [1]:
import pickle
import pandas as pd
import geopandas
from shapely.geometry import Point
import matplotlib.pyplot as plt

# Below is our project

In [2]:
# read scraped CHA data
with open("data/CHA_rental_data.obj", "rb") as f:       
    d = pickle.load(f)
cha = pd.DataFrame.from_dict(data = d, orient = "index")

In [3]:
# clean CHA data
cols = ['Address','Monthly Rent','Property Type','Bath','Bed',
        'Voucher Necessary','Availability','Contact','URL','Lat','Long']
cha = cha[cols]
cha.Long = -1 * cha.Long

# correct error
cha.loc["4545145", "Long"] = -87.66593 
cha.loc["4545145", "Lat"] = 41.772175

In [4]:
cha.head()

Unnamed: 0,Address,Monthly Rent,Property Type,Bath,Bed,Voucher Necessary,Availability,Contact,URL,Lat,Long
1288108,"1718 W 66th St 1, Chicago, IL 60636",800,Apt,1.0,2.0,Yes,Check Availability,,http://chicagoha.gosection8.com/Section-8-hous...,41.774,-87.6673
4012748,"6130 S Eberhart Ave 1, Chicago, IL 60637",1200,Apt,2.0,3.0,Yes,Available Now,(773) 961-4367,http://chicagoha.gosection8.com/Section-8-hous...,41.783,-87.6136
4017021,"4827 S Seeley Ave , Chicago, IL 60609",600,Apt,1.0,1.0,Yes,Check Availability,,http://chicagoha.gosection8.com/Section-8-hous...,41.8058,-87.6756
4036551,"828 E 88th Pl 2, Chicago, IL 60619",875,Apt,1.0,3.0,Yes,Check Availability,,http://chicagoha.gosection8.com/Section-8-hous...,41.73419,-87.603472
4036578,"4620 S Evans Ave 1, Chicago, IL 60653",1150,Apt,1.0,3.0,Yes,Check Availability,,http://chicagoha.gosection8.com/Section-8-hous...,41.810646,-87.608165


In [5]:
cha.dtypes

Address               object
Monthly Rent           int64
Property Type         object
Bath                 float64
Bed                  float64
Voucher Necessary     object
Availability          object
Contact               object
URL                   object
Lat                  float64
Long                 float64
dtype: object

In [6]:
# convert to GeoDataFrame
cha['Coordinates'] = list(zip(cha.Long, cha.Lat))

In [7]:
cha['Coordinates'] = cha['Coordinates'].apply(Point)

In [8]:
gcha = geopandas.GeoDataFrame(cha, geometry='Coordinates')

In [9]:
gcha[203:204]

Unnamed: 0,Address,Monthly Rent,Property Type,Bath,Bed,Voucher Necessary,Availability,Contact,URL,Lat,Long,Coordinates
4302747,"218 E 79th St 2, Chicago, IL 60619",1300,Apt,1.5,4.0,Yes,Check Availability,,http://chicagoha.gosection8.com/Section-8-hous...,41.751146,-87.618888,POINT (-87.6188885 41.751146)


In [11]:
# read block group geojson
blocks_full = geopandas.read_file("data/block-groups.geojson")

In [12]:
blocks = blocks_full[['geometry', 'GEOID']]

In [13]:
blocks.head()

Unnamed: 0,geometry,GEOID
0,"(POLYGON ((-87.655192 41.983802, -87.655914999...",170310306041
1,(POLYGON ((-87.65111443516709 41.9840326124377...,170310306042
2,"(POLYGON ((-87.656031 41.987344, -87.655303 41...",170310306043
3,"(POLYGON ((-87.66006899999999 41.983622, -87.6...",170310307011
4,"(POLYGON ((-87.65454699999999 41.980646, -87.6...",170310307021


In [14]:
cha_with_geoid = geopandas.sjoin(gcha, blocks, how="left", op='intersects')

  warn('CRS of frames being joined does not match!')


In [16]:
cha_with_geoid.head()

Unnamed: 0,Address,Monthly Rent,Property Type,Bath,Bed,Voucher Necessary,Availability,Contact,URL,Lat,Long,Coordinates,index_right,GEOID
1288108,"1718 W 66th St 1, Chicago, IL 60636",800,Apt,1.0,2.0,Yes,Check Availability,,http://chicagoha.gosection8.com/Section-8-hous...,41.774,-87.6673,POINT (-87.6673 41.774),3028,170316712002
4012748,"6130 S Eberhart Ave 1, Chicago, IL 60637",1200,Apt,2.0,3.0,Yes,Available Now,(773) 961-4367,http://chicagoha.gosection8.com/Section-8-hous...,41.783,-87.6136,POINT (-87.61360000000001 41.783),3375,170314206001
4017021,"4827 S Seeley Ave , Chicago, IL 60609",600,Apt,1.0,1.0,Yes,Check Availability,,http://chicagoha.gosection8.com/Section-8-hous...,41.8058,-87.6756,POINT (-87.6756 41.8058),557,170316115001
4036551,"828 E 88th Pl 2, Chicago, IL 60619",875,Apt,1.0,3.0,Yes,Check Availability,,http://chicagoha.gosection8.com/Section-8-hous...,41.73419,-87.603472,POINT (-87.603472 41.73419),2756,170314408001
4036578,"4620 S Evans Ave 1, Chicago, IL 60653",1150,Apt,1.0,3.0,Yes,Check Availability,,http://chicagoha.gosection8.com/Section-8-hous...,41.810646,-87.608165,POINT (-87.608165 41.810646),1178,170318436002


In [18]:
index_to_geoid = cha_with_geoid["GEOID"]

In [20]:
index_to_geoid.head()

1288108    170316712002
4012748    170314206001
4017021    170316115001
4036551    170314408001
4036578    170318436002
Name: GEOID, dtype: object