In [292]:
import pandas as pd
import numpy as np
import os
import geopandas as gp
from datetime import datetime

In [271]:
# -- year being used for evaluation
Year = 2014

# -- spatial key being used for calculation
SpatialKey = 'ZIPCODE'

# -- input path
InputPath = '../../shapeData/Dataframes/'

# -- output path
OutputPath = '../../shapeData/Tables/'

In [272]:
# -- read the park quality file on a year basis 
try:
    ParkQualityYear
except:
    print "reading %d_Inspections.csv..." % (Year)
    yearDF = pd.read_csv(InputPath + '%d_Inspections.csv' % (Year), sep='\t')

reading 2014_Inspections.csv...


In [273]:
# -- read the Amenity file 
try:
    ameneties
except:
    print "reading PIP_Inventory20150618update_FINAL_ouafa.xlsx.."
    amenities = pd.read_excel(OutputPath + "PIP_Inventory20150618update_FINAL_ouafa.xlsx")

reading PIP_Inventory20150618update_FINAL_ouafa.xlsx..


In [274]:
# -- Create a count of all available amenities in each park
byParkID = amenities.groupby("Prop ID", as_index = False)
AmenitySums = byParkID.sum()[['Prop ID','AmenetiesCounted']]

In [275]:
# -- Get rid of the duplicated index column
del yearDF['Unnamed: 0']

In [276]:
# -- categories
categories = [u'Athletic Fields', u'Benches',
       u'Fences', u'Glass', u'Graffiti', u'Horticultural Areas', u'Ice',
       u'Lawns', u'Litter', u'Paved Surfaces', u'Play Equipment',
       u'Safety Surface', u'Sidewalks', u'Trails', u'Trees', u'Water Bodies',
       u'Weeds']

In [277]:
# -- calculate inspection scores [ratio]
yearDF['Ratings Ratio'] = 1 - yearDF[categories].mean(axis=1)

In [278]:
# -- seed final park stats DF
byParkID = yearDF[['Prop ID', 'PID_base', 'Category'] + [SpatialKey]].groupby(['Prop ID'], as_index = False)
parkStats = byParkID.first()

# -- average Park Score and Acres
byPark = yearDF.groupby(['Prop ID'], as_index = False)
parkAvgScore = byPark['Ratings Ratio','Adjusted Acres'].mean()
parkAvgScore.rename(columns={'Ratings Ratio':'Avg Ratio'}, inplace=True)
parkStats = pd.merge(parkStats, parkAvgScore, on='Prop ID', how='left')

# -- calculate weighted Score for park
parkStats['Weighted Score'] = parkStats['Avg Ratio'] * parkStats['Adjusted Acres']

In [279]:
parksWithAmenities = pd.merge(parkStats,AmenitySums, on ='Prop ID')

In [280]:
# -- seed district_category stats DF to calculate the mean Ameneties in each zipcode
byCategory = parksWithAmeneties.groupby([SpatialKey] + ['Category'], as_index = False)

# -- calculate average amneties in each zipcode <m>
catAmenity = byCategory['AmenetiesCounted'].mean()
catAmenity.rename(columns={'AmenetiesCounted':'<m>'}, inplace=True)
# catAmenity.loc[catAmenity['ZIPCODE']==10308 ]
# catAmenity.loc[catAmenity['<m>']==0 ]

# -- special cases where the mean was aggregated wrong
parkZip10475 = parksWithAmenities.loc[parksWithAmenities['ZIPCODE']==10475 ]
byCategory10475 = parkZip10475.groupby([SpatialKey] + ['Category'], as_index = False)
catAmenity10475 = byCategory10475['AmenetiesCounted'].mean()

parkZip11418 = parksWithAmenities.loc[parksWithAmenities['ZIPCODE']==11418 ]
byCategory11418 = parkZip11418.groupby([SpatialKey] + ['Category'], as_index = False)
catAmenity11418 = byCategory11418['AmenetiesCounted'].mean()

specialcases = pd.concat([catAmenity10475,catAmenity11418])

In [281]:
#Join the amenity average table to the orginal park info table to calculate the equation
amenityEquationDataframe = pd.merge(parksWithAmenities, catAmenity, on=[SpatialKey,'Category'], how='left')
# -- Add the special cases
amenityEquationDataframe.loc[(amenityEquationDataframe['ZIPCODE'] == 10475) & (amenityEquationDataframe['Category']== 'Large Park'), '<m>'] = 1.888889
amenityEquationDataframe.loc[(amenityEquationDataframe['ZIPCODE'] == 10475) & (amenityEquationDataframe['Category']== 'Small Park'), '<m>'] = 6.666667
amenityEquationDataframe.loc[(amenityEquationDataframe['ZIPCODE'] == 11418) & (amenityEquationDataframe['Category']== 'Large Park'), '<m>'] = 0.800000
amenityEquationDataframe.loc[(amenityEquationDataframe['ZIPCODE'] == 11418) & (amenityEquationDataframe['Category']== 'Small Park'), '<m>'] = 4.333333
# -- Apply Equation
amenityEquationDataframe['m/<m>'] = amenityEquationDataframe['AmenetiesCounted']/amenityEquationDataframe['<m>']
amenityEquationDataframe['m/<m>'].replace(np.inf, 0)
amenityEquationDataframe['m/<m>'].fillna(0, inplace=True)
amenityEquationDataframe['1 + m/<m>'] = 1+(amenityEquationDataframe['m/<m>'])
amenityEquationDataframe['W'] = amenityEquationDataframe['Adjusted Acres']*amenityEquationDataframe['1 + m/<m>']
amenityEquationDataframe['W*Q'] = amenityEquationDataframe['W']*amenityEquationDataframe['Avg Ratio']

In [283]:
# -- seed district_category stats DF
byCategory2 = amenityEquationDataframe.groupby([SpatialKey] + ['Category'], as_index = False)
categoryStats = byCategory2.first()[[SpatialKey] + ['Category']]

# -- sum WQ and W per category
catSums = byCategory2['Adjusted Acres', 'Weighted Score','W*Q', 'W'].sum()
catSums.rename(columns={'Weighted Score':'Score'}, inplace=True)
categoryStats = pd.merge(categoryStats, catSums, on=[SpatialKey] + ['Category'], how='left')

# -- count parks per category
catCounts = byCategory2.size().reset_index().rename(columns={0:'Counts'})
categoryStats = pd.merge(categoryStats, catCounts, on=[SpatialKey] + ['Category'], how='left')

In [284]:
# -- group by zipcode and category
bySpatialKey = categoryStats.groupby(SpatialKey, as_index = False)
SpatialKeySums = bySpatialKey.sum()[[SpatialKey] + ['Counts']]

categoryStats = pd.merge(categoryStats, SpatialKeySums, on=SpatialKey, how = 'left', suffixes = ['_Cat', '_%s' % SpatialKey])

In [285]:
# -- calculate weighted score for SpatialKey(zipcode)
categoryStats['Weighted Score'] = categoryStats['Score'] / categoryStats['Adjusted Acres']
categoryStats['Area Normalized Score'] = categoryStats['Weighted Score'] * (categoryStats['Counts_Cat'] / categoryStats['Counts_%s' % SpatialKey])
categoryStats['Equation Score'] = categoryStats['W*Q'] / categoryStats['W']
categoryStats['Ameneties & Area Normalized Score'] = categoryStats['Equation Score'] * (categoryStats['Counts_Cat'] / categoryStats['Counts_%s' % SpatialKey])

In [286]:
# -- sum Scores per Zipcode
bySpatialKey = categoryStats.groupby(SpatialKey)
SpatialStats = bySpatialKey \
                    .sum() \
                    .reset_index() \
                    [[SpatialKey] + ['Ameneties & Area Normalized Score']+['Area Normalized Score']]

In [289]:
# -- floor scores at 0 just in case U/S overtake ratios
SpatialStats.loc[SpatialStats['Ameneties & Area Normalized Score'] < 0, 'Ameneties & Area Normalized Score'] = 0
SpatialStats.loc[SpatialStats['Area Normalized Score'] < 0, 'Area Normalized Score'] = 0

In [293]:
# -- Write to csv
now = datetime.now()
SpatialStats.to_csv(OutputPath + 'AmenityAreaParkQuality%s_%s_%s.csv' % (SpatialKey, Year, now.strftime('%m-%d-%Y')), sep=',')