In [None]:
import pandas as pd
import numpy as np
import os
import geopandas as gp
from datetime import datetime

In [None]:
# -- year being used for evaluation
Year = 2014

# -- spatial key being used for calculation
SpatialKey = 'GEOID'

# -- Dataframe input path
InspectionInputPath = '../../shapeData/Dataframes/'

# -- Census Park Breakdown input file
CensusParkInputFile = '../../shapeData/Tables/AllSites_MasterPropID_CT.xlsx'

# -- output path
OutputPath = '../../shapeData/Tables/'

In [None]:
# -- read the park quality file on a year basis 
try:
    yearDF
except:
    print "reading %d_Inspections.csv..." % (Year)
    yearDF = pd.read_csv(InspectionInputPath + '%d_Inspections.csv' % (Year), index_col=0, sep='\t')

In [None]:
# -- read the park listing breakdowns by census tract
try:
    censusParkDF
except:
    if SpatialKey == 'GEOID':
        print "reading census park info file..."
        censusParkDF = pd.read_excel(CensusParkInputFile)

In [None]:
# -- categories
categories = [u'Athletic Fields', u'Benches',
       u'Fences', u'Glass', u'Graffiti', u'Horticultural Areas', u'Ice',
       u'Lawns', u'Litter', u'Paved Surfaces', u'Play Equipment',
       u'Safety Surface', u'Sidewalks', u'Trails', u'Trees', u'Water Bodies',
       u'Weeds']

In [None]:
# -- calculate inspection scores [ratio]
yearDF['Ratings Ratio'] = 1 - yearDF[categories].mean(axis=1)

In [None]:
# -- build attribute list to capture from inspection reports.  
# --   If Spatial included, grab it.  If not, its GEOID which is in a different file
yearDFAttr = ['Prop ID', 'PID_base', 'Category']
if SpatialKey in yearDF.columns:
    yearDFAttr.append(SpatialKey)
    
# -- seed final park stats DF   
byParkID = yearDF[yearDFAttr].groupby(['Prop ID'], as_index = False)
parkStats = byParkID.first()

# -- average Park Score and Acres
byParkBase = yearDF.groupby(['Prop ID', 'Adjusted Acres'], as_index = False)
parkAvgScore = byParkBase['Ratings Ratio'].mean()
parkAvgScore.rename(columns={'Ratings Ratio':'Avg Ratio'}, inplace=True)
parkStats = pd.merge(parkStats, parkAvgScore, on='Prop ID', how='left')

# -- Utilize new park table which has parks broken up by census tracts if used!
if SpatialKey == 'GEOID':
    parkStats = pd.merge(censusParkDF[['Prop ID', 'GEOID', 'Fractional Acres']], parkStats, on = 'Prop ID', how = 'left')
    
    # -- Replace acerage with census breakup acreage
    parkStats['Adjusted Acres'] = parkStats['Fractional Acres']
    parkStats.drop('Fractional Acres', axis=1, inplace=True)

# -- calculate weighted Score for park
parkStats['Weighted Score'] = parkStats['Avg Ratio'] * parkStats['Adjusted Acres']

In [None]:
# -- seed district_category stats DF
byCategory = parkStats.groupby([SpatialKey] + ['Category'], as_index = False)
categoryStats = byCategory.first()[[SpatialKey] + ['Category']]

# -- sum Acres and Scores per category
catSums = byCategory['Adjusted Acres', 'Weighted Score'].sum()
catSums.rename(columns={'Weighted Score':'Score'}, inplace=True)
categoryStats = pd.merge(categoryStats, catSums, on=[SpatialKey] + ['Category'], how='left')

# -- count parks per category
catCounts = byCategory.size().reset_index().rename(columns={0:'Counts'})
categoryStats = pd.merge(categoryStats, catCounts, on=[SpatialKey] + ['Category'], how='left')

In [None]:
# -- group by zipcode and category
bySpatialKey = categoryStats.groupby(SpatialKey, as_index = False)
SpatialKeySums = bySpatialKey.sum()[[SpatialKey] + ['Counts']]

categoryStats = pd.merge(categoryStats, SpatialKeySums, on=SpatialKey, how = 'left', suffixes = ['_Cat', '_%s' % SpatialKey])

In [None]:
# -- calculate weighted score for zipcode
categoryStats['Weighted Score'] = categoryStats['Score'] / categoryStats['Adjusted Acres']
categoryStats['Normalized Score'] = categoryStats['Weighted Score'] * (categoryStats['Counts_Cat'] / categoryStats['Counts_%s' % SpatialKey])

In [None]:
# -- sum Scores per Zipcode
bySpatialKey = categoryStats.groupby(SpatialKey)
SpatialStats = bySpatialKey \
                    .sum() \
                    .reset_index() \
                    [[SpatialKey] + ['Normalized Score']]

In [None]:
# -- floor scores at 0 just in case U/S overtake ratios
SpatialStats.loc[SpatialStats['Normalized Score'] < 0, 'Normalized Score'] = 0

In [None]:
# -- Write to csv
now = datetime.now()
SpatialStats.to_csv(OutputPath + 'ParkQuality_%s_%s_%s.csv' % (SpatialKey, Year, now.strftime('%m-%d-%Y')), sep=',')