In [1]:
import pandas as pd
import numpy as np
import os
import geopandas as gp

In [2]:
# -- read the park quality file on a year basis 
try:
    ParkQualityYear
except:
    print("reading 2014_Inspections.csv...")
    in_path = os.path.join('../../shapeData','Dataframes')
    in_name = os.path.join(in_path,'2014_Inspections.csv')
    yearDemoDF = pd.read_csv(in_name, sep='\t')

reading 2014_Inspections.csv...


In [4]:
# -- Get rid of the duplicated index column
del yearDemoDF['Unnamed: 0']

In [5]:
# -- categories
categories = [u'Athletic Fields', u'Benches',
       u'Fences', u'Glass', u'Graffiti', u'Horticultural Areas', u'Ice',
       u'Lawns', u'Litter', u'Paved Surfaces', u'Play Equipment',
       u'Safety Surface', u'Sidewalks', u'Trails', u'Trees', u'Water Bodies',
       u'Weeds']

In [6]:
# -- calculate inspection scores [ratio]
yearDemoDF['Ratings Ratio'] = yearDemoDF[categories].mean(axis=1)

In [7]:
# -- seed final park stats DF
byParkID = yearDemoDF[['Prop ID', 'PID_base', 'Category', 'ZIPCODE']].groupby(['Prop ID'], as_index = False)
parkStats = byParkID.first()

# -- average Park Score and Acres
byParkBase = yearDemoDF.groupby(['PID_base'], as_index = False)
parkAvgScore = byParkBase['Ratings Ratio','ACRES_x'].mean()
parkAvgScore.rename(columns={'Ratings Ratio':'Avg Ratio'}, inplace=True)
parkStats = pd.merge(parkStats, parkAvgScore, on='PID_base', how='left')

# -- calculate weighted Score for park
parkStats['Weighted Score'] = parkStats['Avg Ratio'] * parkStats['ACRES_x']

In [8]:
# -- seed district_category stats DF
byCategory = parkStats.groupby(['ZIPCODE', 'Category'], as_index = False)
categoryStats = byCategory.first()[['ZIPCODE', 'Category']]

# -- sum Acres and Scores per category
catSums = byCategory['ACRES_x', 'Weighted Score'].sum()
catSums.rename(columns={'Weighted Score':'Score'}, inplace=True)
categoryStats = pd.merge(categoryStats, catSums, on=['ZIPCODE', 'Category'], how='left')

# -- count parks per category
catCounts = byCategory.size().reset_index().rename(columns={0:'Counts'})
categoryStats = pd.merge(categoryStats, catCounts, on=['ZIPCODE', 'Category'], how='left')

In [9]:
# -- group by zipcode and category
byZIPCODE = categoryStats.groupby('ZIPCODE', as_index = False)
ZIPCODESums = byZIPCODE.sum()[['ZIPCODE', 'Counts']]

categoryStats = pd.merge(categoryStats, ZIPCODESums, on='ZIPCODE', how = 'left', suffixes = ['_Cat', '_ZIPCODE'])

In [10]:
# -- calculate weighted score for zipcode
categoryStats['Weighted Score'] = categoryStats['Score'] / categoryStats['ACRES_x']
categoryStats['Normalized Score'] = categoryStats['Weighted Score'] * (categoryStats['Counts_Cat'] / categoryStats['Counts_ZIPCODE'])

In [11]:
# -- sum Scores per Zipcode
byZIPCODE = categoryStats.groupby('ZIPCODE')
ZIPCODEStats = byZIPCODE \
                    .sum() \
                    .reset_index() \
                    [['ZIPCODE', 'Normalized Score']]

In [15]:
# -- Write to csv
ZIPCODEStats.to_csv('../../shapeData/Tables/ParkQualityZipcode2014.csv', sep=',')