In [1]:
import pandas as pd
import numpy as np
import time
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut

## School Progress Report (SPR) Data

In [2]:
# read SPR data
spr = pd.read_excel('data/SPR_SY1617_Data/SPR_SY1617_School_Metric_Scores_20180206.xlsx', sheet_name=1)

# convert School ID to string
spr = spr.rename(columns={'SRC School ID': 'School ID'})
spr['School ID'] = spr['School ID'].astype(str)

school_info = spr[['School', 'School ID', 'Rpt Type Long', 'Governance', 'Grades Served', 'Enrollment']].drop_duplicates()

school_info.head(3)

Unnamed: 0,School,School ID,Rpt Type Long,Governance,Grades Served,Enrollment
0,John Bartram High School,101,High School,District,'9-12',601
1,West Philadelphia High School,102,High School,District,'9-12',483
2,High School of the Future,103,High School,District,'9-12',476


## Serious Incidents (aka Safety)

In [3]:
si_1314 = pd.read_csv('data/Serious_Incidents/Serious_Incidents_2013-14.TXT')
si_1415 = pd.read_excel('data/Serious_Incidents/Serious_Incidents_2014-15.xlsx', sheet_name=0)
si_1415.rename(columns={'ULCS Code':'ULCS_NO', 
                        'School Year':'SCHOOL_YEAR', 
                        'Incident Type':'INCIDENT_TYPE',
                        'Incident Count':'INCIDENT_COUNT',
                        'School ID':'SCHOOL_ID'}, inplace=True)

si_1516 = pd.read_excel('data/Serious_Incidents/Serious_Incidents_2015-16.xls', sheet_name=0)
si_1617 = pd.read_excel('data/Serious_Incidents/Serious_Incidents_2016-17.xlsx', sheet_name=0)

si = pd.concat([si_1314, si_1415, si_1516, si_1617]).reset_index(drop=True)
si.drop('ULCS_NO', axis=1, inplace=True)
si['INCIDENT_TYPE'] = si['INCIDENT_TYPE'].str.upper()
si['INCIDENT_TYPE'].replace('SUICIDE - ATTEMPTS & THREATS', 'SUICIDE - ATTEMPTS AND THREATS', inplace=True)
si['INCIDENT_TYPE'].replace('DRUGS & ALCOHOL OFFENSE', 'DRUG AND ALCOHOL OFFENSES', inplace=True)
si['INCIDENT_TYPE'].replace('DRUG & ALCOHOL OFFENSES', 'DRUG AND ALCOHOL OFFENSES', inplace=True)
si['INCIDENT_TYPE'].replace('THREATS', 'THREAT', inplace=True)
si['INCIDENT_TYPE'].replace('THEFTS', 'THEFT', inplace=True)
si['INCIDENT_TYPE'].replace('BOMB SCARES', 'BOMB SCARE', inplace=True)
si['INCIDENT_TYPE'].replace('FIRES AND FALSE ALARMS', 'FIRE - INCENDIARY', inplace=True)
si['INCIDENT_TYPE'].replace('THREATS - VERBAL AND WRITTEN', 'THREAT', inplace=True)
si['INCIDENT_TYPE'].replace('WEAPONS OFFENSES', 'WEAPONS', inplace=True)
si.head()

Unnamed: 0,INCIDENT_COUNT,INCIDENT_TYPE,SCHOOL_ID,SCHOOL_YEAR
0,1,HARASSMENT,526,2013-2014
1,5,ASSAULTS,534,2013-2014
2,8,THREAT,535,2013-2014
3,0,TAKE OFF,537,2013-2014
4,0,DRUG AND ALCOHOL OFFENSES,545,2013-2014


In [14]:
# convert School ID to string
si['SCHOOL_ID'] = si['SCHOOL_ID'].astype(str)

df = pd.merge(si, school_info, how='left', left_on='SCHOOL_ID', right_on='School ID')
df['Incident_rate'] = df['INCIDENT_COUNT'] * 100 / df['Enrollment']

df['INCIDENT_TYPE'] = df['INCIDENT_TYPE'].apply(lambda x: x.title())
df = df[~pd.isnull(df['School'])]

df.drop('School ID', axis=1, inplace=True)
df.head()

Unnamed: 0,INCIDENT_COUNT,INCIDENT_TYPE,SCHOOL_ID,SCHOOL_YEAR,School,Rpt Type Long,Governance,Grades Served,Enrollment,Incident_rate
0,1,Harassment,526,2013-2014,Lewis Elkin School,Elementary School,District,'K-4',866.0,0.115473
1,5,Assaults,534,2013-2014,James R. Ludlow School,K8 School,District,'K-8',312.0,1.602564
2,8,Threat,535,2013-2014,William McKinley School,K8 School,District,'K-8',460.0,1.73913
3,0,Take Off,537,2013-2014,John Moffet School,Elementary School,District,'K-5',345.0,0.0
5,0,Burglary,548,2013-2014,General Philip Kearny School,K8 School,District,'K-8',401.0,0.0


# Export Data

In [15]:
# save data
df.to_csv("philly_school_safety.csv", sep='|', index=False)

In [17]:
df[df['School'].str.contains('Treaty')]

Unnamed: 0,INCIDENT_COUNT,INCIDENT_TYPE,SCHOOL_ID,SCHOOL_YEAR,School,Rpt Type Long,Governance,Grades Served,Enrollment,Incident_rate
232,0,Take Off,516,2013-2014,Penn Treaty High School,High School,District,'9-12',346.0,0.000000
233,0,Take Off,516,2013-2014,Penn Treaty High School,Middle School,District,'6-8',216.0,0.000000
473,4,Morals Offenses,516,2013-2014,Penn Treaty High School,High School,District,'9-12',346.0,1.156069
474,4,Morals Offenses,516,2013-2014,Penn Treaty High School,Middle School,District,'6-8',216.0,1.851852
710,4,Robbery,516,2013-2014,Penn Treaty High School,High School,District,'9-12',346.0,1.156069
711,4,Robbery,516,2013-2014,Penn Treaty High School,Middle School,District,'6-8',216.0,1.851852
712,9,Weapons,516,2013-2014,Penn Treaty High School,High School,District,'9-12',346.0,2.601156
713,9,Weapons,516,2013-2014,Penn Treaty High School,Middle School,District,'6-8',216.0,4.166667
784,0,Burglary,516,2013-2014,Penn Treaty High School,High School,District,'9-12',346.0,0.000000
785,0,Burglary,516,2013-2014,Penn Treaty High School,Middle School,District,'6-8',216.0,0.000000
