# HW 1 - Carlos Alvarado

### Problem 1 - CODE

In [3]:
import os
import pandas as pd
import requests

def get_and_load_data(data_info):
    '''
    Loads "data_name.csv" from local disk, or downloads it if it's not present
    
    Input: dictionary with information about required data
    Returns: pandas data_frame for "data_name"
    '''
    
    filepath = './data/{}.csv'.format(data_info['source'])
    
    if os.path.exists(filepath):
        data = pd.read_csv(filepath)
    else:
        data = helper_download_data(filepath, data_name)
    
    for old_var, new_var in data_info['rename']:
        data.rename(index=str, columns={old_var: new_var}, inplace=True)
        
    return data
    
def helper_download_data(filepath, data_name):
    offset = 0
    limit = 50000
    data = pd.DataFrame({})
    
    while True:
        
        url = 'https://data.cityofchicago.org/resource/{}.json?$limit={}&$offset={}'.format(
            data_name, limit, offset)
        print('getting data from', url)
    
        response = requests.get(url)
        response_code = response.status_code
        
        if response_code != 200: 
            print('Failed to download data')
            break
            
        json_data = response.content

        if len(json_data) > 4:
            data = pd.concat([data, pd.read_json(json_data)])
        
            offset = offset + limit
        else:
            print(json_data)
            break
            
    #save data to csv for future use
    data.to_csv(filepath)
    
    return data    

In [43]:
DATA1 = {'name': 'Graffiti Removal',
         'source': 'hec5-y4x5',
         'rename': [('where_is_the_graffiti_located_', 'Sub Type')]    
        }

DATA2 = {'name': 'Vacant and Abandoned Buildings Reported',
         'source': '7nii-7srd',
         'rename': [('is_building_open_or_boarded_', 'Sub Type'), 
                    ('date_service_request_was_received', 'creation_date')]
        }

DATA3 = {'name': 'Pot Holes Reported',
         'source': '7as2-ds3y',
         'rename': [('zip', 'zip_code'), 
                    ('type_of_service_request', 'Sub Type')]
        }
    
DATA4 = {'name': 'Sanitation Code Complaints',
         'source': 'me59-5fac',
         'rename': [('what_is_the_nature_of_this_code_violation_', 'Sub Type')]
        }

SOURCES = [DATA1, DATA2, DATA3, DATA4]

complaints = pd.DataFrame({})

for db_data in SOURCES:
    data = get_and_load_data(db_data)
    print(data.columns)
    data['Complaint Type'] = db_data['name']
    complaints = pd.concat([complaints, data], ignore_index=True)

Index(['Unnamed: 0', 'community_area', 'completion_date', 'creation_date',
       'latitude', 'location', 'longitude', 'police_district',
       'service_request_number', 'ssa', 'status', 'street_address',
       'type_of_service_request', 'ward',
       'what_type_of_surface_is_the_graffiti_on_', 'Sub Type', 'x_coordinate',
       'y_coordinate', 'zip_code'],
      dtype='object')
Index(['Unnamed: 0', 'address_street_direction', 'address_street_name',
       'address_street_number', 'address_street_suffix',
       'any_people_using_property_homeless_childen_gangs_', 'community_area',
       'creation_date', 'if_the_building_is_open_where_is_the_entry_point_',
       'Sub Type', 'is_the_building_currently_vacant_or_occupied_',
       'is_the_building_dangerous_or_hazardous_',
       'is_the_building_vacant_due_to_fire_', 'latitude', 'location',
       'location_of_building_on_the_lot_if_garage_change_type_code_to_bgd_',
       'longitude', 'police_district', 'service_request_number',
 

  if self.run_code(code, result):


In [44]:
#create sample database to play with
complaints.sample(1000).to_csv('./data/sample.csv')

In [2]:
import pandas as pd
import geopandas as gpd
area_names_file = "./data/boundaries/Boundaries - Community Areas (current).geojson"
area_names = gpd.read_file(area_names_file)
area_names['area_numbe'] = pd.to_numeric(area_names.area_numbe, errors='coerce')

complaints = pd.read_csv('./data/sample.csv')

complaints = complaints.merge(area_names, left_on='community_area', right_on='area_numbe', how='left')

In [3]:
complaints.community.value_counts()

SOUTH LAWNDALE            75
WEST TOWN                 46
BELMONT CRAGIN            43
BRIGHTON PARK             42
LOWER WEST SIDE           41
LOGAN SQUARE              31
PORTAGE PARK              26
GAGE PARK                 26
AVONDALE                  25
LAKE VIEW                 25
WEST RIDGE                24
IRVING PARK               24
NEW CITY                  24
ARCHER HEIGHTS            23
NORTH CENTER              23
WEST LAWN                 22
AUSTIN                    20
LINCOLN PARK              20
GARFIELD RIDGE            19
MCKINLEY PARK             18
ALBANY PARK               18
ROGERS PARK               17
NEAR NORTH SIDE           16
WEST ELSDON               15
NEAR WEST SIDE            14
SOUTH SHORE               14
CHICAGO LAWN              13
LINCOLN SQUARE            13
NORTH PARK                13
HUMBOLDT PARK             12
                          ..
ARMOUR SQUARE              7
CALUMET HEIGHTS            7
JEFFERSON PARK             7
UPTOWN        

In [3]:
#number of complaints by type
ctcounts = complaints['Complaint Type'].value_counts()
ctcounts

Graffiti Removal                           854622
Pot Holes Reported                         457442
Sanitation Code Complaints                 118761
Vacant and Abandoned Buildings Reported     58716
Name: Complaint Type, dtype: int64

In [76]:
#complaints by type and neighboorhood
gb = complaints.groupby(['community', 'Complaint Type']).size().to_frame()
print(gb)

                                                             0
community          Complaint Type                             
ALBANY PARK        Graffiti Removal                         14
                   Pot Holes Reported                        2
ARCHER HEIGHTS     Graffiti Removal                         15
                   Sanitation Code Complaints                2
ARMOUR SQUARE      Graffiti Removal                          5
                   Pot Holes Reported                        1
                   Sanitation Code Complaints                1
ASHBURN            Graffiti Removal                         10
                   Pot Holes Reported                        7
                   Sanitation Code Complaints                1
                   Vacant and Abandoned Buildings Reported   1
AUBURN GRESHAM     Graffiti Removal                          1
                   Pot Holes Reported                        7
                   Sanitation Code Complaints          

In [4]:
gb = complaints.groupby(['Complaint Type', 'Sub Type']).size().to_frame()
print(gb)

                                                                                         0
Complaint Type                          Sub Type                                          
Graffiti Removal                        Alley                                        35316
                                        Bench                                         2722
                                        Door                                         22303
                                        Dumpster                                     14342
                                        Express Way Job                               1610
                                        Fence                                        16245
                                        Front                                       310521
                                        Garage                                      100033
                                        Garbage Cart                                  6293

In [None]:
gb_community_area = complaints.groupby(['Complaint Type', 'community_area']).size().to_frame()
select = gb_community_area['0'] 
print(gb_community_area)

### Problem 2

In [24]:
# Adding Block ID to 311 Requests data 
import geopandas as gpd
import pandas as pd
from geopandas import GeoDataFrame
from geopandas.tools import sjoin
from shapely.geometry import Point

#this file contains polygons at the FIPS_12 level (block group)
blocks = gpd.read_file('./data/cb_2015_17_bg_500k/cb_2015_17_bg_500k.shp')

complaints = pd.read_csv('./data/sample.csv')

clean_data = complaints[complaints['latitude'].notnull() & complaints['longitude'].notnull()]

geometry = [Point(xy) for xy in zip(clean_data.longitude, clean_data.latitude)]
clean_data = clean_data.drop(['latitude', 'longitude'], axis=1)

crs = {'init': 'epsg:4269'}
geo_complaints = GeoDataFrame(clean_data, crs=crs, geometry=geometry)

data_with_blockid = sjoin(geo_complaints, blocks, how="left")

In [29]:
blocks

Unnamed: 0,AFFGEOID,ALAND,AWATER,BLKGRPCE,COUNTYFP,GEOID,LSAD,NAME,STATEFP,TRACTCE,geometry
0,1500000US171859574001,8255601,1146247,1,185,171859574001,BG,1,17,957400,"POLYGON ((-87.761735 38.453187, -87.7591149999..."
1,1500000US170310103003,104823,0,3,031,170310103003,BG,3,17,010300,"POLYGON ((-87.667823 42.014969, -87.6623570669..."
2,1500000US170315204002,199288,0,2,031,170315204002,BG,2,17,520400,"POLYGON ((-87.53165799999999 41.702716, -87.52..."
3,1500000US170310301032,46757,101282,2,031,170310301032,BG,2,17,030103,"POLYGON ((-87.65767699999999 41.995441, -87.65..."
4,1500000US170310307061,347373,186155,1,031,170310307061,BG,1,17,030706,POLYGON ((-87.65576399999999 41.97821099999999...
5,1500000US170310315023,43192,0,3,031,170310315023,BG,3,17,031502,"POLYGON ((-87.654787 41.96543, -87.65374199999..."
6,1500000US171659558004,677251,0,4,165,171659558004,BG,4,17,955800,"POLYGON ((-88.558978 37.731532, -88.5589339999..."
7,1500000US170898501032,2370783,73356,2,089,170898501032,BG,2,17,850103,"POLYGON ((-88.315753 42.114224, -88.310389 42...."
8,1500000US170313016001,164660,0,1,031,170313016001,BG,1,17,301600,"POLYGON ((-87.714787 41.844385, -87.7099099999..."
9,1500000US171990212003,582364,0,3,199,171990212003,BG,3,17,021200,"POLYGON ((-88.948965 37.729057, -88.941936 37...."


In [25]:
# Getting census data
import requests
import pandas as pd

def get_data_census(varname, label):

    census_api_url = 'http://api.census.gov/data/' + \
                    '2015/acs5?get=NAME,' + varname + \
                    '&for=block+group:*&in=state:17&in=county:031&in=tract:*'

    response = requests.get(census_api_url)

    json_data = response.content

    data = pd.read_json(json_data)

    names = list(data.iloc[0])

    for i in range(0,6):
        data.rename(index=str, columns={i: names[i]}, inplace=True)
    
    data.rename(index=str, columns={varname: label}, inplace=True)
    
    #HERE, I REPLICATE THE FIPS_12 CODE
    data['GEOID'] = data['state'].map(str) + data['county'].map(str) + \
                            data['tract'].map(str) + data['block group'].map(str)

    data.drop(data.index[[0]], inplace=True)
    
    print('downloaded', varname)
    
    return data

download_data = [('B01003_001E', 'Total Population'),
                ('B02001_002E', 'White Population'),
                ('B19013_001E', 'Median Household Income'),
                ('B06009_002E', 'Less than highschool')]

for i, (varname, label) in enumerate(download_data):
    downloaded = get_data_census(varname, label)
    if i == 0:
        census_data = downloaded
    else:
        census_data = census_data.merge(downloaded, on='GEOID', how='outer')

data_with_blockid2 = data_with_blockid.merge(census_data, on='GEOID')    

downloaded B01003_001E
downloaded B02001_002E
downloaded B19013_001E
downloaded B06009_002E


In [26]:
data_with_blockid2.iloc[10]

Unnamed: 0                                                                                                       847629
Complaint Type                                                                                         Graffiti Removal
Sub Type                                                                                                          Front
Unnamed: 0.1                                                                                                      47629
address_street_direction                                                                                            NaN
address_street_name                                                                                                 NaN
address_street_number                                                                                               NaN
address_street_suffix                                                                                               NaN
any_people_using_property_homeless_child

### Problem 3

In [27]:
import requests
from geopy.geocoders import Nominatim
import xml.etree.ElementTree as ET

address = '"7500 S. Wolcott Ave.'
geolocator = Nominatim()
location = geolocator.geocode(address)
print(location.latitude, location.longitude)

census_block_url = 'http://data.fcc.gov/api/block/2010/find?latitude={lat}&longitude={lon}&showall=true'.format(
        lat=location.latitude, lon=location.longitude)

response = requests.get(census_block_url)

xml_data = response.content
etree = ET.fromstring(xml_data)
#print(etree)

for neighbor in etree.iter():
    if 'Block' in neighbor.tag:
        block_id = neighbor.attrib['FIPS'][:12]

data_with_blockid2[data_with_blockid2.GEOID == block_id]["Complaint Type"].value_counts()


#data = pd.read_json(json_data)
#    41.757488, -87.671259
#170317104005000

41.7573666 -87.6711973381924


Series([], Name: Complaint Type, dtype: int64)

In [35]:
data_with_blockid2[data_with_blockid2['Complaint Type'] == 'Graffiti Removal'].community.value_counts()


AttributeError: 'GeoDataFrame' object has no attribute 'community'

In [None]:
len(blocks2.blockce10.value_counts())

In [None]:
blocks2.columns

In [None]:
blocks2.countyfp10.value_counts()

In [None]:
#to make graphs
%matplotlib inline
import matplotlib
blocks.plot();

base = blocks2.plot(color='white')

geo_sel.plot(ax=base, marker='o', color='red', markersize=5);