# Problem 1: Data Acquisition and Analysis

In [None]:
from sodapy import Socrata
import pandas as pd
import geopandas
from shapely.geometry import Point
from shapely.geometry import Polygon
from shapely.geometry import MultiPolygon

## Download reported crime data from the Chicago open data portal for 2017 and 2018.

In [150]:
client = Socrata("data.cityofchicago.org", '6sr95dE6LHGM6Ga2Z2kOU2OfL')
results = client.get("6zsd-86xi", limit=600000, where='year == 2017 OR year == 2018')
crime_df = geopandas.GeoDataFrame(results)

# Problem 2: Data Augmentation and APIs

In [None]:
census_blocks = client.get("bt9m-d2mf")
blocks_df = geopandas.GeoDataFrame(census_blocks)

In [152]:
def convert_point(row):
    '''
    Given a row from a dataframe, takes the longitude and latitude columns
    and returns a shapely Point object.
    '''
    if pd.isnull(row['longitude']) or pd.isnull(row['latitude']):
        return None
    return Point(float(row['longitude']), float(row['latitude']))

In [153]:
def convert_polygon(row):
    '''
    Given a row from a dataframe, takes the 'the_geom' columns
    and returns a shapely Polygon or MultiPolygon object.
    '''
    coordinates_list = row['the_geom'].get('coordinates')
    polygon_list = []
    for coordinates in coordinates_list:
        while type(coordinates[0][0]) is list:
            coordinates = coordinates[0]
        polygon_list.append(Polygon(coordinates))
    return MultiPolygon(polygon_list)

In [155]:
crime_df['geometry'] = crime_df.apply(convert_point, axis=1)
blocks_df['geometry'] = blocks_df.apply(convert_polygon, axis=1)
crime_block_df = geopandas.sjoin(crime_df.loc[crime_df.geometry.notna()], blocks_df, how='left')

In [162]:
blocks_df.head()

Unnamed: 0,blockce10,countyfp10,geoid10,name10,statefp10,the_geom,tract_bloc,tractce10,geometry
0,2010,31,170316903002010,Block 2010,17,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",6903002010,690300,(POLYGON ((-87.62906799941059 41.7690860000715...
1,3007,31,170316809003007,Block 3007,17,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",6809003007,680900,(POLYGON ((-87.6341179992764 41.77446599958471...
2,3013,31,170316809003013,Block 3013,17,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",6809003013,680900,(POLYGON ((-87.63485400018324 41.7726330002261...
3,4019,31,170312909004019,Block 4019,17,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",2909004019,290900,(POLYGON ((-87.73841099998789 41.8591310003707...
4,4016,31,170312925004016,Block 4016,17,"{'type': 'MultiPolygon', 'coordinates': [[[[-8...",2925004016,292500,(POLYGON ((-87.73217300001724 41.8547640004652...


In [173]:
crime_df.groupby(by='year').size()

year
2017    268098
2018    266272
dtype: int64

Total crimes in 2017 was 268,098 and total crimes in 2018 was 266,272.