### Imports

In [170]:
import pandas as pd
import numpy as np
from shapely.geometry import Point
from geolocating_functions import get_community_name, chicago_lat_long_builder
from school_cleaning_functions import school_survey_cleaner, school_growth_cleaner

### Building geolocation to community map

In [2]:
comm_areas = pd.read_csv('CommAreas.csv')

In [3]:
comm_map = pd.DataFrame()

In [5]:
comm_map['perim_lat_long_points'] = comm_areas['the_geom'].apply(chicago_lat_long_builder)
comm_map['comm_name'] = comm_areas['COMMUNITY'].apply(lambda x: x.lower())
comm_map['comm_num'] = comm_areas['AREA_NUM_1'].astype(int)

In [6]:
comm_map

Unnamed: 0,perim_lat_long_points,comm_name,comm_num
0,"[(41.84469250265398, -87.60914087617894), (41....",douglas,35
1,"[(41.81692934626684, -87.59215283879394), (41....",oakland,36
2,"[(41.80189303368919, -87.62879823733725), (41....",fuller park,37
3,"[(41.81681377057218, -87.6067081256125), (41.8...",grand boulevard,38
4,"[(41.81692934626684, -87.59215283879394), (41....",kenwood,39
...,...,...,...
72,"[(41.70714491233857, -87.69645961375822), (41....",mount greenwood,74
73,"[(41.685082119670845, -87.64215204651398), (41...",morgan park,75
74,"[(41.986396111591276, -87.83658087874365), (41...",ohare,76
75,"[(41.99816614970252, -87.65455590025104), (41....",edgewater,77


In [7]:
comm_map.to_pickle('comm_map.pkl')

### Preparing the datasets with mergable column

##### Column Cleaner

In [77]:
def column_cleaner(dataframe):
    
    for col in dataframe.columns:
    
        dataframe.rename(columns={col: col.lower().strip()}, inplace=True)

#### Libraries

In [166]:
libraries = pd.read_csv('Libraries_-_Locations__Hours_and_Contact_Information.csv')

In [167]:
libraries['LOCATION'] = libraries['LOCATION'].apply(chicago_lat_long_builder)

In [168]:
libraries['community_name'] = libraries['LOCATION'].apply(get_community_name)

In [171]:
column_cleaner(libraries)
libraries.drop_duplicates(['location'], inplace=True)
libraries['points'] = libraries['location'].apply(lambda x: Point(x))
libraries.reset_index(inplace=True, drop=True)
libraries[['name', 'location', 'community_name', 'points']].to_pickle('libraries.pkl')

#### Clinics

In [179]:
clinics = pd.read_csv('Public_Health_Services-_Chicago_Primary_Care_Community_Health_Centers.csv')

In [180]:
clinics['community_name'] = clinics['Community Area (#)'].apply(lambda x: ' '.join([x for x in x.lower().split() if x.isalpha()]))

In [181]:
clinics['location'] = clinics['Address'].apply(lambda x: x.split('\n')[1])

In [182]:
clinics['location'] = clinics['location'].apply(chicago_lat_long_builder)

In [183]:
clinics['points'] = clinics['location'].apply(lambda x: Point(x))

In [184]:
clinics.rename(columns={'FQHC, Look-alike, or Neither; Special Notes': 'clinic_type'}, inplace=True)

In [185]:
column_cleaner(clinics)
clinics.drop_duplicates(['location'], inplace=True)
clinics.reset_index(inplace=True, drop=True)
clinics[['facility', 'clinic_type', 'community_name', 'location', 'points']].to_pickle('clinics.pkl')

#### Grocery Stores

In [191]:
groc_stores = pd.read_csv('Grocery_Stores_-_2013.csv')

In [192]:
column_cleaner(groc_stores)

In [193]:
groc_stores.rename(columns={'store name': 'store_name',\
                            'community area name': 'community_name', \
                            'community area': 'community_number', \
                            'square feet': 'sq_ft', \
                            'buffer size': 'buffer_size'}, inplace=True, errors='ignore')
groc_stores['community_name'] = groc_stores['community_name'].apply(lambda x: x.lower())
groc_stores['location'] = groc_stores['location'].apply(chicago_lat_long_builder)
groc_stores['buffer_size'] = groc_stores['buffer_size'].apply(lambda x: 1 if x == 'A' else 0)
groc_stores.drop_duplicates(['location'], inplace=True)
groc_stores['points'] = groc_stores['location'].apply(lambda x: Point(x))
groc_stores.reset_index(inplace=True, drop=True)

In [194]:
groc_stores[['store_name', 'sq_ft', 'buffer_size', 'community_name', 'community_number', 'location', 'points']].to_pickle('grocery_stores.pkl')

#### Pharmacies

In [195]:
pharma = pd.read_csv('Pharmacy_Status.csv')

In [196]:
column_cleaner(pharma)

In [197]:
pharma.rename(columns={'pharmacy name': 'pharmacy_name', \
                       'new georeferenced column': 'location'}, inplace=True, errors='ingore')

In [198]:
pharma['status'] = pharma['status'].apply(lambda x: x.lower())

In [199]:
pharma_open = pharma[pharma['status'] != 'permanently closed'].reset_index(drop=True)

In [200]:
pharma_open = pharma_open[['pharmacy_name','address', 'location']].dropna().reset_index(drop=True)

In [201]:
pharma_open['location'] = pharma_open['location'].apply(chicago_lat_long_builder)

In [202]:
pharma_open['community_name'] = pharma_open['location'].apply(get_community_name)

In [203]:
pharma_open.drop_duplicates(['location'], inplace=True)
pharma_open['points'] = pharma_open['location'].apply(lambda x: Point(x))
pharma_open.reset_index(inplace=True, drop=True)

In [204]:
pharma_open.to_pickle('pharmacies.pkl')

#### Schools

In [205]:
school_locations = pd.read_csv('Chicago_Public_Schools_-_School_Locations_SY1920.csv')

In [206]:
school_progress_report = pd.read_csv('Chicago_Public_Schools_-_School_Progress_Reports_SY1819.csv')

In [207]:
column_cleaner(school_locations), column_cleaner(school_progress_report)

(None, None)

In [208]:
school_loc_merger = school_locations[['the_geom', 'school_id']]

In [209]:
school_prog_merger = school_progress_report[['school_id', 'long_name', 'school_type', 'student_growth_rating', \
                        'school_survey_involved_families', 'school_survey_supportive_environment', \
                        'school_survey_ambitious_instruction', 'school_survey_effective_leaders', \
                        'school_survey_collaborative_teachers', 'school_survey_safety', \
                        'mobility_rate_pct', 'chronic_truancy_pct']]

In [210]:
schools = pd.merge(school_loc_merger, school_prog_merger, on='school_id')

In [211]:
schools.rename(columns={'the_geom': 'location'}, inplace=True, errors='ignore')

In [212]:
schools['location'] = schools['location'].apply(chicago_lat_long_builder)

In [213]:
schools['points'] = schools['location'].apply(lambda x: Point(x))

In [214]:
schools['community_name'] = schools['location'].apply(get_community_name)

In [215]:
for col in schools.columns:
    if 'school_survey' in col:
        schools[col] = schools[col].apply(school_survey_cleaner)
    if 'student_growth' in col:
        schools[col] = schools[col].apply(school_growth_cleaner)

In [216]:
prestigious = ['Charter', 'Regional gifted center', 'Magnet', 'Selective enrollment']

In [217]:
schools['prestigious_school'] = schools['school_type'].apply(lambda x: 1 if x in prestigious else 0)

In [218]:
schools.to_pickle('schools.pkl')

#### Food Inspections

In [219]:
food_inspections_base = pd.read_csv('Food_Inspections_-_7_1_2018_-_Present.csv')

In [220]:
column_cleaner(food_inspections_base)

In [221]:
food_inspections_full = food_inspections_base[['risk', 'inspection date', 'results', 'location']]

In [222]:
food_inspections_full['year'] = pd.to_datetime(food_inspections_full['inspection date']).apply(lambda x: x.year)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  food_inspections_full['year'] = pd.to_datetime(food_inspections_full['inspection date']).apply(lambda x: x.year)


In [223]:
food_inspections_2019 = food_inspections_full[food_inspections_full['year'] == 2019].reset_index(drop=True)

In [224]:
food_inspections_2019.drop(['inspection date', 'year'], axis=1, inplace=True, errors='ignore')

In [225]:
food_inspections = food_inspections_2019.dropna().reset_index(drop=True)

In [226]:
food_inspections['risk'] = food_inspections['risk'].apply(lambda x: 1 if x == 'Risk 1 (High)' else 0)

In [227]:
food_inspections['results'] = food_inspections['results'].apply(lambda x: 1 if x == 'Fail' else 0)

In [228]:
food_inspections.rename(columns={'risk': 'high_risk', 'results': 'failed_inspection'}, inplace=True, errors='ignore')

In [229]:
food_inspections['location'] = food_inspections['location'].apply(chicago_lat_long_builder)

In [230]:
food_inspections.drop_duplicates(['location'], inplace=True)

In [231]:
food_inspections['points'] = food_inspections['location'].apply(lambda x: Point(x))

In [232]:
food_inspections['community_name'] = food_inspections['location'].apply(get_community_name)

In [233]:
food_inspections.to_pickle('food_inspections.pkl')