Start with the standard imports we have used for every notebook in this class.

In [29]:
%matplotlib inline
import numpy as np
import scipy as sp
import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import pandas as pd
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.notebook_repr_html', True)
import seaborn as sns
sns.set_style("whitegrid")
sns.set_context("poster")

#These are one time load of SCHOOL DISTRICT data steps.
#START SECTION OF DO NOT RUN THESE AGAIN

Each of the datasheets downloaded from ELSI had download metadata on the top of them and total and key information on the bottom of them that were not data rows. This metadata, total, and key information was manually deleted before import. Some of the files had ="0" instead of 0 in the cells. This was found/replaced before import using the sed -i '' 's/="0"/0/g' *.csv command from the terminal.

In [41]:
#CITATION: This is the data from National Center for Education Statistics on Schools
#School districts for all 50 states and Washington, D.C.
#http://nces.ed.gov/ccd/elsi/
#Data Source: U.S. Department of Education National Center for Education Statistics Common Core of Data (CCD) "Local Education Agency (School District) Universe Survey" 2009-10 v.2a  2013-14 v.1a; "Public Elementary/Secondary School Universe Survey" 2009-10 v.2a; "Survey of Local Government Finances School Systems (F-33)" 2009-10 (FY 2010) v.1a.
#KEY:
#† indicates that the data are not applicable.
#– indicates that the data are missing.
#‡ indicates that the data do not meet NCES data quality standards.

districtinformation = pd.read_csv("tempdata/school districts/2009-2010 DISTRICTS Information Tab.csv", dtype=np.str)
districtcharacteristicsa = pd.read_csv("tempdata/school districts/2009-2010 DISTRICTS CharacteristicsA Tab.csv", dtype=np.str)
districtcharacteristicsb = pd.read_csv("tempdata/school districts/2009-2010 DISTRICTS CharacteristicsB Tab.csv", dtype=np.str)
districtenrollments = pd.read_csv("tempdata/school districts/2009-2010 DISTRICTS Enrollments Tab.csv", dtype=np.str)
districtenrollmentK3 = pd.read_csv("tempdata/school districts/2009-2010 DISTRICTS EnrollK3 Tab.csv", dtype=np.str)
districtenrollment48 = pd.read_csv("tempdata/school districts/2009-2010 DISTRICTS Enroll48 Tab.csv", dtype=np.str)
districtenrollment912 = pd.read_csv("tempdata/school districts/2009-2010 DISTRICTS Enroll912 Tab.csv", dtype=np.str)
districtteacherstaff = pd.read_csv("tempdata/school districts/2009-2010 DISTRICTS TeacherStaff Tab.csv", dtype=np.str)
districtgeneralfinance = pd.read_csv("tempdata/school districts/2009-2010 DISTRICTS GeneralFinance Tab.csv", dtype=np.str)
districtrevenue = pd.read_csv("tempdata/school districts/2009-2010 DISTRICTS Revenue Tab.csv", dtype=np.str)
districtexpenditures = pd.read_csv("tempdata/school districts/2009-2010 DISTRICTS Expenditures Tab.csv", dtype=np.str)

#Data Source: Local Education Agency (School District) Universe Survey Dropout and Completion Data: 2009-10 v.1a.

districtdropoutscompleters = pd.read_csv("tempdata/school districts/2009-2010 DISTRICTS DropoutsCompleters.txt", dtype=np.str, delim_whitespace=True)

Check the lengths of the datasets to see if we have a row for every school district.

In [27]:
print len(districtinformation)
print len(districtcharacteristicsa)
print len(districtcharacteristicsb)
print len(districtenrollments)
print len(districtenrollmentK3)
print len(districtenrollment48)
print len(districtenrollment912)
print len(districtteacherstaff)
print len(districtgeneralfinance)
print len(districtrevenue)
print len(districtexpenditures)
print len(districtdropoutscompleters)

19023
17916
17916
17916
17916
17916
17916
17916
17916
17916
17916
18439


Drop all of the duplicate columns.

In [42]:
districtcharacteristicsb = districtcharacteristicsb.drop(districtcharacteristicsb.columns[[0, 1]], 1)
districtenrollments = districtenrollments.drop(districtenrollments.columns[[0, 1]], 1)
districtenrollmentK3 = districtenrollmentK3.drop(districtenrollmentK3.columns[[0, 1]], 1)
districtenrollment48 = districtenrollment48.drop(districtenrollment48.columns[[0, 1]], 1)
districtenrollment912 = districtenrollment912.drop(districtenrollment912.columns[[0, 1]], 1)
districtteacherstaff = districtteacherstaff.drop(districtteacherstaff.columns[[0, 1]], 1)
districtgeneralfinance = districtgeneralfinance.drop(districtgeneralfinance.columns[[0, 1]], 1)
districtrevenue = districtrevenue.drop(districtrevenue.columns[[0, 1]], 1)
districtexpenditures = districtexpenditures.drop(districtexpenditures.columns[[0, 1]], 1)

Join all of the school district datasets. The datasets districtinformation and districtdropoutscompleters need special treatment, as they have more rows for the school districts than the other datasets. All of the other datasets can be joined by ID without issue.

In [85]:
#Join the datasets that can be joined without issue.
joineddistrict = districtcharacteristicsa.join([districtcharacteristicsb, districtenrollments, districtenrollmentK3, districtenrollment48, districtenrollment912, districtteacherstaff, districtgeneralfinance, districtrevenue, districtexpenditures])

#Clean up an extra hidden character in the Agency Name column
joineddistrict = joineddistrict.rename(columns={'﻿Agency Name': 'Agency Name'})
districtinformation = districtinformation.rename(columns={'﻿Agency Name': 'Agency Name'})

#Merge to the districtinformation dataset
joineddistrict = districtinformation.merge(joineddistrict, 'outer', ['Agency Name', u'State Name [District] Latest available year'], suffixes=('', '_DEL'))

#Need to get rid of Excel syntax ="" from some of the columns
joineddistrict['Agency ID - NCES Assigned [District] Latest available year'] = joineddistrict['Agency ID - NCES Assigned [District] Latest available year'].map(lambda x: str(x).lstrip('="').rstrip('"'))
joineddistrict['County Number [District] 2009-10'] = joineddistrict['County Number [District] 2009-10'].map(lambda x: str(x).lstrip('="').rstrip('"'))
joineddistrict['ANSI/FIPS State Code [District] Latest available year'] = joineddistrict['ANSI/FIPS State Code [District] Latest available year'].map(lambda x: str(x).lstrip('="').rstrip('"'))
joineddistrict['Location ZIP [District] 2013-14'] = joineddistrict['Location ZIP [District] 2013-14'].map(lambda x: str(x).lstrip('="').rstrip('"'))
joineddistrict['Location ZIP4 [District] 2013-14'] = joineddistrict['Location ZIP4 [District] 2013-14'].map(lambda x: str(x).lstrip('="').rstrip('"'))
joineddistrict['Mailing ZIP [District] 2013-14'] = joineddistrict['Mailing ZIP [District] 2013-14'].map(lambda x: str(x).lstrip('="').rstrip('"'))
joineddistrict['Mailing ZIP4 [District] 2013-14'] = joineddistrict['Mailing ZIP4 [District] 2013-14'].map(lambda x: str(x).lstrip('="').rstrip('"'))
joineddistrict['School District Level Code (SCHLEV) [District Finance] 2009-10'] = joineddistrict['School District Level Code (SCHLEV) [District Finance] 2009-10'].map(lambda x: str(x).lstrip('="').rstrip('"'))
joineddistrict['Supervisory Union (ID) Number [District] 2009-10'] = joineddistrict['Supervisory Union (ID) Number [District] 2009-10'].map(lambda x: str(x).lstrip('="').rstrip('"'))
joineddistrict['Metro Micro Area Code [District] 2009-10'] = joineddistrict['Metro Micro Area Code [District] 2009-10'].map(lambda x: str(x).lstrip('="').rstrip('"'))
joineddistrict['Congressional Code [District] 2009-10'] = joineddistrict['Congressional Code [District] 2009-10'].map(lambda x: str(x).lstrip('="').rstrip('"'))
#joineddistrict[''] = joineddistrict[''].map(lambda x: x.lstrip('="').rstrip('"'))

#Rename the LEAID column so it can be merged with the joineddistrict dataset
districtdropoutscompleters = districtdropoutscompleters.rename(columns={'LEAID': 'Agency ID - NCES Assigned [District] Latest available year'})

#Merge to the joineddistrict dataset
joineddistrict = joineddistrict.merge(districtdropoutscompleters, 'outer', 'Agency ID - NCES Assigned [District] Latest available year', suffixes=('', '_DEL'))

#If by chance any rows have NaN, replace with the ELSI standard for missing data '–'
joineddistrict = joineddistrict.fillna('–')
joineddistrict = joineddistrict.replace('nan', '–')

#Where these conditions are true, these rows are not usable
joineddistrict = joineddistrict[joineddistrict['Agency Name']!='–']
joineddistrict = joineddistrict[joineddistrict['Agency Type [District] 2009-10']!='–']

joineddistrict.head()

Unnamed: 0,Agency Name,State Name [District] Latest available year,State Name [District] 2009-10,State Abbr [District] Latest available year,Agency Name [District] 2009-10,Agency ID - NCES Assigned [District] Latest available year,County Name [District] 2009-10,County Number [District] 2009-10,Race/Ethnicity Category [District] 2009-10,ANSI/FIPS State Code [District] Latest available year,Total Number Operational Schools [Public School] 2009-10,Total Number Operational Charter Schools [Public School] 2009-10,Total Number of Public Schools [Public School] 2009-10,Years District Reported Data [District] Latest available year,Years District Did Not Report Data [District] Latest available year,Location Address [District] 2013-14,Location City [District] 2013-14,Location State Abbr [District] 2013-14,Location ZIP [District] 2013-14,Location ZIP4 [District] 2013-14,Mailing Address [District] 2013-14,Mailing City [District] 2013-14,Mailing State Abbr [District] 2013-14,Mailing ZIP [District] 2013-14,Mailing ZIP4 [District] 2013-14,Phone Number [District] 2013-14,Agency Type [District] 2009-10,School District Level Code (SCHLEV) [District Finance] 2009-10,Urban-centric Locale [District] 2009-10,Boundary Change Indicator Flag [District] 2009-10,CBSA Name [District] 2009-10,CBSA ID [District] 2009-10,CSA Name [District] 2009-10,CSA ID [District] 2009-10,Latitude [District] 2009-10,Longitude [District] 2009-10,State Agency ID [District] 2009-10,Supervisory Union (ID) Number [District] 2009-10,Agency Charter Status [District] 2009-10,Metro Micro Area Code [District] 2009-10,Congressional Code [District] 2009-10,Census ID (CENSUSID) [District Finance] 2009-10,Lowest Grade Offered [District] 2009-10,Highest Grade Offered [District] 2009-10,Total Students (UG PK-12) [District] 2009-10,PK thru 12th Students [District] 2009-10,Ungraded Students [District] 2009-10,Total Students [Public School] 2009-10,Limited English Proficient (LEP) / English Language Learners (ELL) [District] 2009-10,Individualized Education Program Students [District] 2009-10,...,Total - School Admin.- Supp. Serv. Exp. (E09) [District Finance] 2009-10,Total - Student Transp.- Supp. Serv. Exp. (V45) [District Finance] 2009-10,Total - Students- Supp. Serv. Exp. (E17) [District Finance] 2009-10,Salary - Instruction Expenditures (Z33) [District Finance] 2009-10,Salary - Students- Supp. Serv. Exp. (V11) [District Finance] 2009-10,Salary - Instruct. Staff- Supp. Serv. Exp. (V13) [District Finance] 2009-10,Salary - General Admin.- Supp. Serv. Exp. (V15) [District Finance] 2009-10,Salary - School Admin.- Supp. Serv. Exp. (V17) [District Finance] 2009-10,Salary - Ops. & Mainten.- Supp. Serv. Exp. (V21) [District Finance] 2009-10,Salary - Student Transp.- Supp. Serv. Exp. (V23) [District Finance] 2009-10,Salary - Other Supp. Serv.- Supp. Serv. Exp. (V37) [District Finance] 2009-10,Salary - Food Services- Non-Instruction (V29) [District Finance] 2009-10,Employee Benefits - Instruction Expend. (V10) [District Finance] 2009-10,Empl. Benefits - Students- Supp. Serv. Exp. (V12) [District Finance] 2009-10,Empl. Benefits - Instruction- Supp. Serv. Exp. (V14) [District Finance] 2009-10,Empl. Benefits - Gen. Adm.- Supp. Serv. Exp. (V16) [District Finance] 2009-10,Empl. Benefits - Sch. Adm.- Supp. Serv. Exp. (V18) [District Finance] 2009-10,Empl. Benefits - Ops. & Maint.- Supp. Serv. Exp. (V22) [District Finance] 2009-10,Empl. Benefits - Student Trans.- Supp. Serv. Exp. (V24) [District Finance] 2009-10,Empl. Benefits - Other Supp Serv- Supp. Serv. Exp. (V38) [District Finance] 2009-10,Empl. Benefits - Food Services- Non-Instruction (V30) [District Finance] 2009-10,Empl. Benefits - Enterp. Oper.- Non-Instruction (V32) [District Finance] 2009-10,Current Spending - Private Schools (V91) [District Finance] 2009-10,Current Spending - Public Charter Schools (V92) [District Finance] 2009-10,Teacher Salaries - Regular Education Programs (Z35) [District Finance] 2009-10,Teacher Salaries - Special Education Programs (Z36) [District Finance] 2009-10,Teacher Salaries - Vocational Education Programs (Z37) [District Finance] 2009-10,Teacher Salaries - Other Education Programs (Z38) [District Finance] 2009-10,Textbooks for Instruction (V93) [District Finance] 2009-10,Community Services - Non El-Sec (V70) [District Finance] 2009-10,Adult Education - Non El-Sec (V75) [District Finance] 2009-10,Other Expenditures - Non El-Sec (V80) [District Finance] 2009-10,Construction - Capital Outlay (F12) [District Finance] 2009-10,Instructional Equipment - Capital Outlay (K09) [District Finance] 2009-10,Other Equipment - Capital Outlay (K10) [District Finance] 2009-10,Non-specified - Equipment Expenditures (K11) [District Finance] 2009-10,Land & Existing Structures - Capital Outlay (G15) [District Finance] 2009-10,Payments to Local Governments (M12) [District Finance] 2009-10,Payments to State Governments (L12) [District Finance] 2009-10,Interest on School System Indebtedness (I86) [District Finance] 2009-10,Payments to Other School Systems (Q11) [District Finance] 2009-10,SURVYEAR,FIPST,TOTD912,EBS912,DRP912,TOTDPL,AFGEB,AFGR,TOTOHC
1,21ST CENTURY CHARTER SCH OF GARY,Indiana,Indiana,IN,21ST CENTURY CHARTER SCH OF GARY,1800046,MARION COUNTY,18097,Reported 5 categories,18,1,1,1,2004-2013,1986-2003,556 WASHINGTON ST,GARY,IN,46402,†,333 N PENNSYLVANIA SUITE 1000,INDIANAPOLIS,IN,46202,†,3175361027,7-Charter school agency,03-Elementary/secondary school system,13-City: Small,1-No change since last report,Indianapolis IN,26900,Indianapolis-Anderson-Columbus IN,294,39.771949,-86.155184,9545,000,1-All associated schools are charter schools,1-Metropolitan Area,1807,†,Kindergarten,12th Grade,360,360,†,360,0,28,...,318000,9000,18000,1173000,0,0,0,315000,0,0,0,0,12000,0,0,0,0,0,0,358000,0,0,0,0,0,0,0,0,77000,13000,13000,0,37000,86000,72000,146000,374000,0,0,0,0,2009-10,18,-3,100,-3.0,13,43,30.2,-2
2,21ST CENTURY CYBER CS,Pennsylvania,Pennsylvania,PA,21ST CENTURY CYBER CS,4200091,CHESTER COUNTY,42029,Reported 5 categories,42,1,1,1,2001-2013,1986-2000,805 SPRINGDALE DR,EXTON,PA,19341,3043,805 SPRINGDALE DR.,EXTON,PA,19341,3043,4848755400,7-Charter school agency,03-Elementary/secondary school system,21-Suburb: Large,1-No change since last report,Philadelphia-Camden-Wilmington PA-NJ-DE-MD,37980,Philadelphia-Camden-Vineland PA-NJ-DE-MD,428,40.00503,-75.678564,124150002,000,1-All associated schools are charter schools,1-Metropolitan Area,4206,†,6th Grade,12th Grade,594,594,†,594,0,37,...,220000,0,337000,1652000,252000,224000,190000,174000,0,0,0,0,644000,74000,61000,47000,38000,0,0,0,0,0,18000,0,1512000,140000,0,0,236000,0,0,0,0,317000,12000,0,0,0,0,0,24000,2009-10,42,33,483,6.8,102,77,100.0,-2
3,21ST CENTURY PREPARATORY SCHOOL AGENCY,Wisconsin,Wisconsin,WI,21ST CENTURY PREPARATORY SCHOOL AGENCY,5500045,RACINE COUNTY,55101,Reported 5 categories,55,1,1,1,2002-2013,1986-2001,1220 MOUND AVE,RACINE,WI,53404,†,1220 MOUND AVE,RACINE,WI,53404,†,2625980026,7-Charter school agency,01-Elementary school system only,13-City: Small,1-No change since last report,Racine WI,39540,Milwaukee-Racine-Waukesha WI,376,42.729073,-87.797014,8110,000,1-All associated schools are charter schools,1-Metropolitan Area,5501,†,Prekindergarten,8th Grade,507,507,†,507,35,45,...,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,2009-10,55,-2,-2,-2.0,-2,-2,-2.0,-2
4,4-WINDS ACADEMY INCORPORATED DBA 4-WINDS ACADEMY,Arizona,Arizona,AZ,4-WINDS ACADEMY INCORPORATED DBA 4-WINDS ACADEMY,400380,APACHE COUNTY,4001,Reported 5 categories,4,0,0,1,2002-2013,1986-2001,725 EAST MAIN,SPRINGERVILLE,AZ,85938,†,P. O. BOX 1210,EAGAR,AZ,85925,†,9283331060,7-Charter school agency,†,33-Town: Remote,6-Agency temporarily closed,†,†,†,†,34.132862,-109.276602,79998,†,1-All associated schools are charter schools,0-New England (NECTA) or not reported,401,†,Kindergarten,8th Grade,†,†,†,†,†,†,...,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,2009-10,4,-2,-2,-2.0,-2,-2,-2.0,-2
5,A CENTER FOR CREATIVE EDUCATION,Arizona,Arizona,AZ,CENTER FOR CREATIVE EDUCATION INC,400328,YAVAPAI COUNTY,4025,Reported 5 categories,4,2,2,2,2001-2013,1986-2000,215 SOUTH MAIN ST,COTTONWOOD,AZ,86326,†,P.O. BOX 2678,COTTONWOOD,AZ,86326,†,9286343288,7-Charter school agency,01-Elementary school system only,32-Town: Distant,1-No change since last report,Prescott AZ,39140,†,†,34.735242,-112.009497,79457,000,1-All associated schools are charter schools,1-Metropolitan Area,401,†,Kindergarten,6th Grade,83,83,0,83,0,10,...,52000,0,29000,204000,0,0,0,38000,0,0,10000,0,18000,0,0,0,4000,0,0,1000,0,0,0,0,13000,0,0,0,0,0,19000,0,0,0,0,0,0,0,0,0,0,2009-10,4,-2,-2,-2.0,-2,-2,-2.0,-2


In [86]:
print len(joineddistrict)

18184


In [87]:
joineddistrict.to_csv("tempdata/districts.csv")

#END SECTION OF DO NOT RUN THESE AGAIN

In [3]:
districtsdf = pd.read_csv("tempdata/districts.csv")
districtsdf.head()

  data = self._reader.read(nrows)


Unnamed: 0.1,Unnamed: 0,Agency Name,State Name [District] Latest available year,State Name [District] 2009-10,State Abbr [District] Latest available year,Agency Name [District] 2009-10,Agency ID - NCES Assigned [District] Latest available year,County Name [District] 2009-10,County Number [District] 2009-10,Race/Ethnicity Category [District] 2009-10,ANSI/FIPS State Code [District] Latest available year,Total Number Operational Schools [Public School] 2009-10,Total Number Operational Charter Schools [Public School] 2009-10,Total Number of Public Schools [Public School] 2009-10,Years District Reported Data [District] Latest available year,Years District Did Not Report Data [District] Latest available year,Location Address [District] 2013-14,Location City [District] 2013-14,Location State Abbr [District] 2013-14,Location ZIP [District] 2013-14,Location ZIP4 [District] 2013-14,Mailing Address [District] 2013-14,Mailing City [District] 2013-14,Mailing State Abbr [District] 2013-14,Mailing ZIP [District] 2013-14,Mailing ZIP4 [District] 2013-14,Phone Number [District] 2013-14,Agency Type [District] 2009-10,School District Level Code (SCHLEV) [District Finance] 2009-10,Urban-centric Locale [District] 2009-10,Boundary Change Indicator Flag [District] 2009-10,CBSA Name [District] 2009-10,CBSA ID [District] 2009-10,CSA Name [District] 2009-10,CSA ID [District] 2009-10,Latitude [District] 2009-10,Longitude [District] 2009-10,State Agency ID [District] 2009-10,Supervisory Union (ID) Number [District] 2009-10,Agency Charter Status [District] 2009-10,Metro Micro Area Code [District] 2009-10,Congressional Code [District] 2009-10,Census ID (CENSUSID) [District Finance] 2009-10,Lowest Grade Offered [District] 2009-10,Highest Grade Offered [District] 2009-10,Total Students (UG PK-12) [District] 2009-10,PK thru 12th Students [District] 2009-10,Ungraded Students [District] 2009-10,Total Students [Public School] 2009-10,Limited English Proficient (LEP) / English Language Learners (ELL) [District] 2009-10,...,Total - School Admin.- Supp. Serv. Exp. (E09) [District Finance] 2009-10,Total - Student Transp.- Supp. Serv. Exp. (V45) [District Finance] 2009-10,Total - Students- Supp. Serv. Exp. (E17) [District Finance] 2009-10,Salary - Instruction Expenditures (Z33) [District Finance] 2009-10,Salary - Students- Supp. Serv. Exp. (V11) [District Finance] 2009-10,Salary - Instruct. Staff- Supp. Serv. Exp. (V13) [District Finance] 2009-10,Salary - General Admin.- Supp. Serv. Exp. (V15) [District Finance] 2009-10,Salary - School Admin.- Supp. Serv. Exp. (V17) [District Finance] 2009-10,Salary - Ops. & Mainten.- Supp. Serv. Exp. (V21) [District Finance] 2009-10,Salary - Student Transp.- Supp. Serv. Exp. (V23) [District Finance] 2009-10,Salary - Other Supp. Serv.- Supp. Serv. Exp. (V37) [District Finance] 2009-10,Salary - Food Services- Non-Instruction (V29) [District Finance] 2009-10,Employee Benefits - Instruction Expend. (V10) [District Finance] 2009-10,Empl. Benefits - Students- Supp. Serv. Exp. (V12) [District Finance] 2009-10,Empl. Benefits - Instruction- Supp. Serv. Exp. (V14) [District Finance] 2009-10,Empl. Benefits - Gen. Adm.- Supp. Serv. Exp. (V16) [District Finance] 2009-10,Empl. Benefits - Sch. Adm.- Supp. Serv. Exp. (V18) [District Finance] 2009-10,Empl. Benefits - Ops. & Maint.- Supp. Serv. Exp. (V22) [District Finance] 2009-10,Empl. Benefits - Student Trans.- Supp. Serv. Exp. (V24) [District Finance] 2009-10,Empl. Benefits - Other Supp Serv- Supp. Serv. Exp. (V38) [District Finance] 2009-10,Empl. Benefits - Food Services- Non-Instruction (V30) [District Finance] 2009-10,Empl. Benefits - Enterp. Oper.- Non-Instruction (V32) [District Finance] 2009-10,Current Spending - Private Schools (V91) [District Finance] 2009-10,Current Spending - Public Charter Schools (V92) [District Finance] 2009-10,Teacher Salaries - Regular Education Programs (Z35) [District Finance] 2009-10,Teacher Salaries - Special Education Programs (Z36) [District Finance] 2009-10,Teacher Salaries - Vocational Education Programs (Z37) [District Finance] 2009-10,Teacher Salaries - Other Education Programs (Z38) [District Finance] 2009-10,Textbooks for Instruction (V93) [District Finance] 2009-10,Community Services - Non El-Sec (V70) [District Finance] 2009-10,Adult Education - Non El-Sec (V75) [District Finance] 2009-10,Other Expenditures - Non El-Sec (V80) [District Finance] 2009-10,Construction - Capital Outlay (F12) [District Finance] 2009-10,Instructional Equipment - Capital Outlay (K09) [District Finance] 2009-10,Other Equipment - Capital Outlay (K10) [District Finance] 2009-10,Non-specified - Equipment Expenditures (K11) [District Finance] 2009-10,Land & Existing Structures - Capital Outlay (G15) [District Finance] 2009-10,Payments to Local Governments (M12) [District Finance] 2009-10,Payments to State Governments (L12) [District Finance] 2009-10,Interest on School System Indebtedness (I86) [District Finance] 2009-10,Payments to Other School Systems (Q11) [District Finance] 2009-10,SURVYEAR,FIPST,TOTD912,EBS912,DRP912,TOTDPL,AFGEB,AFGR,TOTOHC
0,1,21ST CENTURY CHARTER SCH OF GARY,Indiana,Indiana,IN,21ST CENTURY CHARTER SCH OF GARY,1800046,MARION COUNTY,18097,Reported 5 categories,18,1,1,1,2004-2013,1986-2003,556 WASHINGTON ST,GARY,IN,46402,†,333 N PENNSYLVANIA SUITE 1000,INDIANAPOLIS,IN,46202,†,3175361027,7-Charter school agency,03-Elementary/secondary school system,13-City: Small,1-No change since last report,Indianapolis IN,26900,Indianapolis-Anderson-Columbus IN,294,39.77195,-86.15518,9545,000,1-All associated schools are charter schools,1-Metropolitan Area,1807,†,Kindergarten,12th Grade,360,360,†,360,0,...,318000,9000,18000,1173000,0,0,0,315000,0,0,0,0,12000,0,0,0,0,0,0,358000,0,0,0,0,0,0,0,0,77000,13000,13000,0,37000,86000,72000,146000,374000,0,0,0,0,2009-10,18,-3,100,-3.0,13,43,30.2,-2
1,2,21ST CENTURY CYBER CS,Pennsylvania,Pennsylvania,PA,21ST CENTURY CYBER CS,4200091,CHESTER COUNTY,42029,Reported 5 categories,42,1,1,1,2001-2013,1986-2000,805 SPRINGDALE DR,EXTON,PA,19341,3043,805 SPRINGDALE DR.,EXTON,PA,19341,3043,4848755400,7-Charter school agency,03-Elementary/secondary school system,21-Suburb: Large,1-No change since last report,Philadelphia-Camden-Wilmington PA-NJ-DE-MD,37980,Philadelphia-Camden-Vineland PA-NJ-DE-MD,428,40.00503,-75.67856,124150002,000,1-All associated schools are charter schools,1-Metropolitan Area,4206,†,6th Grade,12th Grade,594,594,†,594,0,...,220000,0,337000,1652000,252000,224000,190000,174000,0,0,0,0,644000,74000,61000,47000,38000,0,0,0,0,0,18000,0,1512000,140000,0,0,236000,0,0,0,0,317000,12000,0,0,0,0,0,24000,2009-10,42,33,483,6.8,102,77,100.0,-2
2,3,21ST CENTURY PREPARATORY SCHOOL AGENCY,Wisconsin,Wisconsin,WI,21ST CENTURY PREPARATORY SCHOOL AGENCY,5500045,RACINE COUNTY,55101,Reported 5 categories,55,1,1,1,2002-2013,1986-2001,1220 MOUND AVE,RACINE,WI,53404,†,1220 MOUND AVE,RACINE,WI,53404,†,2625980026,7-Charter school agency,01-Elementary school system only,13-City: Small,1-No change since last report,Racine WI,39540,Milwaukee-Racine-Waukesha WI,376,42.72907,-87.79701,8110,000,1-All associated schools are charter schools,1-Metropolitan Area,5501,†,Prekindergarten,8th Grade,507,507,†,507,35,...,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,–,2009-10,55,-2,-2,-2.0,-2,-2,-2.0,-2
3,4,4-WINDS ACADEMY INCORPORATED DBA 4-WINDS ACADEMY,Arizona,Arizona,AZ,4-WINDS ACADEMY INCORPORATED DBA 4-WINDS ACADEMY,400380,APACHE COUNTY,4001,Reported 5 categories,4,0,0,1,2002-2013,1986-2001,725 EAST MAIN,SPRINGERVILLE,AZ,85938,†,P. O. BOX 1210,EAGAR,AZ,85925,†,9283331060,7-Charter school agency,†,33-Town: Remote,6-Agency temporarily closed,†,†,†,†,34.13286,-109.2766,79998,†,1-All associated schools are charter schools,0-New England (NECTA) or not reported,401,†,Kindergarten,8th Grade,†,†,†,†,†,...,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,†,2009-10,4,-2,-2,-2.0,-2,-2,-2.0,-2
4,5,A CENTER FOR CREATIVE EDUCATION,Arizona,Arizona,AZ,CENTER FOR CREATIVE EDUCATION INC,400328,YAVAPAI COUNTY,4025,Reported 5 categories,4,2,2,2,2001-2013,1986-2000,215 SOUTH MAIN ST,COTTONWOOD,AZ,86326,†,P.O. BOX 2678,COTTONWOOD,AZ,86326,†,9286343288,7-Charter school agency,01-Elementary school system only,32-Town: Distant,1-No change since last report,Prescott AZ,39140,†,†,34.73524,-112.0095,79457,000,1-All associated schools are charter schools,1-Metropolitan Area,401,†,Kindergarten,6th Grade,83,83,0,83,0,...,52000,0,29000,204000,0,0,0,38000,0,0,10000,0,18000,0,0,0,4000,0,0,1000,0,0,0,0,13000,0,0,0,0,0,19000,0,0,0,0,0,0,0,0,0,0,2009-10,4,-2,-2,-2.0,-2,-2,-2.0,-2


#Exported to excel to fix header names and remove duplicates and extra columns... reloaded as districts_cleaned.csv

In [6]:
districtsdf = pd.read_csv("tempdata/districts_cleaned.csv")
print districtsdf.shape
districtsdf.head()

(17537, 342)


Unnamed: 0,rownum,agency,state,state_abbr,agency_name,agency_id_nces,county,num_schools,num_charter_schools,num_pub_schools,city,zipcode,agency_type,district_level_code,location_type,cbsa_id,latitude,longitude,agency_id_state,charter_status,congressional_code,offered_g_lowest,offered_g_highest,students_ug,total_students,lep,ieps,lunch_free_eligible,lunch_reduced_eligible,lunch_total_free_and_reduced,students_pk_k,students_g1_g8,students_g9_g12,students_pk,students_k,students_g1,students_g2,students_g3,students_g4,students_g5,students_g6,students_g7,students_g8,students_g9,students_g10,students_g11,students_g12,students_ug.1,students_m,students_f,...,fall_membership,totalrev,tlocrev,tsrev,tfedrev,tcurelsc,tcurinst,tcurssvc,tcuroth,tcursalary,tcurbenefits,totalexp,tcapout,tnonelse,arra_rev,arra_exp,arra_capout,totalrev_pp,tlocrev_pp,tsrev_pp,tfedrev_pp,tcurinst_pp,tcurssv_pp,tcuroth_pp,tcursalary_pp,tcurbenefits_pp,totalexp_pp,tcapout_pp,tnonelse_pp,tcurelsc_pp,instexp_pp,tcurelsc_percent,tcurinst_percent,tcuroth_percent,tcurelsc_percent.1,tcurssvc_percent,tfedrev_percent,tlocrev_percent,tsrev_percent,bond_funds,other_funds,survyear,fipst,totd912,ebs912,drp912,totdpl,afgeb,afgr,totohc
0,1,21ST CENTURY CHARTER SCH OF GARY,Indiana,IN,21ST CENTURY CHARTER SCH OF GARY,1800046,MARION COUNTY,1,1,1,GARY,46402,7-Charter school agency,03-Elementary/secondary school system,13-City: Small,26900,39.771949,-86.155184,9545,1-All associated schools are charter schools,1807,Kindergarten,12th Grade,,360,0,28,304.0,18.0,322.0,30,230,100,,30.0,30.0,30.0,30.0,27.0,23.0,27.0,31.0,32.0,31.0,29.0,25.0,15.0,,186,172,...,360,4000000.0,128000.0,2853000.0,1019000.0,3290000.0,1435000.0,1698000.0,157000.0,1488000.0,370000.0,4031000.0,715000.0,26000.0,178000.0,426000.0,171000.0,11111.0,356.0,7925.0,2831.0,3986.0,4717.0,436.0,4133.0,1028.0,11197.0,1986.0,72.0,9139.0,3986.0,11.2,43.6,4.8,45.2,51.6,25.5,3.2,71.3,0.0,174000.0,2009-10,18,-3,100,-3.0,13,43,30.2,-2
1,2,21ST CENTURY CYBER CS,Pennsylvania,PA,21ST CENTURY CYBER CS,4200091,CHESTER COUNTY,1,1,1,EXTON,19341,7-Charter school agency,03-Elementary/secondary school system,21-Suburb: Large,37980,40.00503,-75.678564,124150002,1-All associated schools are charter schools,4206,6th Grade,12th Grade,,594,0,37,111.0,84.0,195.0,0,111,483,,,,,,,,19.0,32.0,60.0,114.0,122.0,138.0,109.0,,231,355,...,594,6271000.0,6109000.0,162000.0,0.0,4816000.0,3032000.0,1784000.0,0.0,2494000.0,865000.0,5187000.0,329000.0,0.0,0.0,2000.0,0.0,10557.0,10285.0,273.0,0.0,5104.0,3003.0,0.0,4199.0,1456.0,8732.0,554.0,0.0,8108.0,5104.0,18.0,63.0,0.0,51.8,37.0,0.0,97.4,2.6,0.0,2132000.0,2009-10,42,33,483,6.8,102,77,100.0,-2
2,3,21ST CENTURY PREPARATORY SCHOOL AGENCY,Wisconsin,WI,21ST CENTURY PREPARATORY SCHOOL AGENCY,5500045,RACINE COUNTY,1,1,1,RACINE,53404,7-Charter school agency,01-Elementary school system only,13-City: Small,39540,42.729073,-87.797014,8110,1-All associated schools are charter schools,5501,Prekindergarten,8th Grade,,507,35,45,255.0,35.0,290.0,111,396,0,59.0,52.0,53.0,56.0,51.0,47.0,57.0,45.0,46.0,41.0,,,,,,231,276,...,507,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2009-10,55,-2,-2,-2.0,-2,-2,-2.0,-2
3,5,A CENTER FOR CREATIVE EDUCATION,Arizona,AZ,CENTER FOR CREATIVE EDUCATION INC,400328,YAVAPAI COUNTY,2,2,2,COTTONWOOD,86326,7-Charter school agency,01-Elementary school system only,32-Town: Distant,39140,34.735242,-112.009497,79457,1-All associated schools are charter schools,401,Kindergarten,6th Grade,0.0,83,0,10,22.0,12.0,34.0,31,52,0,,31.0,13.0,5.0,7.0,9.0,8.0,10.0,,,,,,,0.0,49,34,...,83,595000.0,42000.0,418000.0,135000.0,522000.0,287000.0,235000.0,0.0,265000.0,24000.0,541000.0,0.0,19000.0,0.0,123000.0,0.0,7169.0,506.0,5036.0,1627.0,3458.0,2831.0,0.0,3193.0,289.0,6518.0,0.0,229.0,6289.0,3458.0,4.6,55.0,0.0,50.8,45.0,22.7,7.1,70.3,0.0,0.0,2009-10,4,-2,-2,-2.0,-2,-2,-2.0,-2
4,6,A CHILD'S VIEW SCHOOL INC.,Arizona,AZ,A CHILD'S VIEW SCHOOL INC.,400431,PIMA COUNTY,1,1,1,TUCSON,85746,7-Charter school agency,01-Elementary school system only,21-Suburb: Large,46060,32.149179,-111.027203,87344,1-All associated schools are charter schools,407,Kindergarten,5th Grade,,58,0,7,,,,10,48,0,,10.0,10.0,10.0,8.0,12.0,8.0,,,,,,,,,22,36,...,58,409000.0,2000.0,407000.0,0.0,379000.0,248000.0,131000.0,0.0,253000.0,23000.0,379000.0,0.0,0.0,0.0,0.0,0.0,7052.0,34.0,7017.0,0.0,4276.0,2259.0,0.0,4362.0,397.0,6534.0,0.0,0.0,6534.0,4276.0,6.1,65.4,0.0,66.8,34.6,0.0,0.5,99.5,0.0,0.0,2009-10,4,-2,-2,-2.0,-2,-2,-2.0,-2


In [13]:
print districtsdf.totalexp_pp.isnull().sum()
districtsdf.totalexp_pp.count()

1931


15606

In [93]:
districtsdf_gr=districtsdf[districtsdf['afgr']>0]
print districtsdf_gr.shape
districtsdf_gr.head()

(10903, 342)


Unnamed: 0,rownum,agency,state,state_abbr,agency_name,agency_id_nces,county,num_schools,num_charter_schools,num_pub_schools,city,zipcode,agency_type,district_level_code,location_type,cbsa_id,latitude,longitude,agency_id_state,charter_status,congressional_code,offered_g_lowest,offered_g_highest,students_ug,total_students,lep,ieps,lunch_free_eligible,lunch_reduced_eligible,lunch_total_free_and_reduced,students_pk_k,students_g1_g8,students_g9_g12,students_pk,students_k,students_g1,students_g2,students_g3,students_g4,students_g5,students_g6,students_g7,students_g8,students_g9,students_g10,students_g11,students_g12,students_ug.1,students_m,students_f,...,fall_membership,totalrev,tlocrev,tsrev,tfedrev,tcurelsc,tcurinst,tcurssvc,tcuroth,tcursalary,tcurbenefits,totalexp,tcapout,tnonelse,arra_rev,arra_exp,arra_capout,totalrev_pp,tlocrev_pp,tsrev_pp,tfedrev_pp,tcurinst_pp,tcurssv_pp,tcuroth_pp,tcursalary_pp,tcurbenefits_pp,totalexp_pp,tcapout_pp,tnonelse_pp,tcurelsc_pp,instexp_pp,tcurelsc_percent,tcurinst_percent,tcuroth_percent,tcurelsc_percent.1,tcurssvc_percent,tfedrev_percent,tlocrev_percent,tsrev_percent,bond_funds,other_funds,survyear,fipst,totd912,ebs912,drp912,totdpl,afgeb,afgr,totohc
0,1,21ST CENTURY CHARTER SCH OF GARY,Indiana,IN,21ST CENTURY CHARTER SCH OF GARY,1800046,MARION COUNTY,1,1,1,GARY,46402,7-Charter school agency,03-Elementary/secondary school system,13-City: Small,26900.0,39.771949,-86.155184,9545,1-All associated schools are charter schools,1807,Kindergarten,12th Grade,,360,0,28,304,18,322,30,230,100,,30.0,30.0,30.0,30.0,27.0,23.0,27,31,32,31,29,25,15,,186,172,...,360,4000000,128000,2853000,1019000,3290000,1435000,1698000,157000,1488000,370000,4031000,715000,26000,178000,426000,171000,11111,356,7925,2831,3986,4717,436,4133,1028,11197,1986,72,9139,3986,11.2,43.6,4.8,45.2,51.6,25.5,3.2,71.3,0,174000,2009-10,18,-3,100,-3.0,13,43,30.2,-2
1,2,21ST CENTURY CYBER CS,Pennsylvania,PA,21ST CENTURY CYBER CS,4200091,CHESTER COUNTY,1,1,1,EXTON,19341,7-Charter school agency,03-Elementary/secondary school system,21-Suburb: Large,37980.0,40.00503,-75.678564,124150002,1-All associated schools are charter schools,4206,6th Grade,12th Grade,,594,0,37,111,84,195,0,111,483,,,,,,,,19,32,60,114,122,138,109,,231,355,...,594,6271000,6109000,162000,0,4816000,3032000,1784000,0,2494000,865000,5187000,329000,0,0,2000,0,10557,10285,273,0,5104,3003,0,4199,1456,8732,554,0,8108,5104,18.0,63.0,0.0,51.8,37.0,0.0,97.4,2.6,0,2132000,2009-10,42,33,483,6.8,102,77,100.0,-2
8,10,A+ ACADEMY,Texas,TX,A+ ACADEMY,4800203,DALLAS COUNTY,1,1,1,DALLAS,75217,7-Charter school agency,03-Elementary/secondary school system,11-City: Large,19100.0,32.767535,-96.660866,57829,1-All associated schools are charter schools,4830,Prekindergarten,12th Grade,,1033,285,63,834,106,940,196,609,228,120.0,76.0,68.0,79.0,75.0,63.0,74.0,72,86,92,79,45,61,43,,478,555,...,1033,11379000,70000,9466000,1843000,9123000,5051000,3616000,456000,5979000,493000,9156000,0,1000,94000,501000,0,11015,68,9164,1784,4890,3500,441,5788,477,8864,0,1,8832,4890,5.4,55.4,5.0,65.5,39.6,16.2,0.6,83.2,0,2884000,2009-10,48,-3,228,-3.0,39,70,55.7,-2
10,13,A-C CENTRAL CUSD 262,Illinois,IL,A-C CENTRAL CUSD 262,1700105,CASS COUNTY,3,0,3,ASHLAND,62612,1-Local school district,03-Elementary/secondary school system,42-Rural: Distant,,39.892187,-90.016057,46-009-2620-26,3-All associated schools are noncharter,1718,Kindergarten,12th Grade,,432,0,70,107,65,172,29,273,130,1.0,28.0,38.0,34.0,37.0,32.0,27.0,39,36,30,28,41,31,30,,196,236,...,436,4956000,1907000,2219000,830000,4345000,2108000,2020000,217000,2503000,921000,5755000,961000,0,27000,391000,22000,11367,4374,5089,1904,4835,4633,498,5741,2112,13200,2204,0,9966,4835,21.2,48.5,5.0,57.6,46.5,16.7,38.5,44.8,0,2734000,2009-10,17,-3,130,-3.0,29,41,70.7,0
11,14,A-H-S-T COMM SCHOOL DISTRICT,Iowa,IA,A-H-S-T COMM SCHOOL DISTRICT,1904080,POTTAWATTAMIE COUNTY,2,0,2,AVOCA,51521,1-Local school district,03-Elementary/secondary school system,43-Rural: Remote,36540.0,41.471017,-95.341001,780441 000,3-All associated schools are noncharter,1905,Prekindergarten,12th Grade,,595,0,93,146,59,205,87,340,168,51.0,36.0,35.0,39.0,39.0,50.0,53.0,46,40,38,40,43,38,47,,287,308,...,696,7460000,3921000,2798000,741000,5139000,3441000,1464000,234000,3273000,1001000,8064000,1927000,0,7000,409000,0,10718,5634,4020,1065,4944,2103,336,4703,1438,11586,2769,0,7384,4944,19.5,67.0,4.6,63.7,28.5,9.9,52.6,37.5,0,2330000,2009-10,19,-3,168,-3.0,45,47,95.7,0


In [None]:
from scipy.stats.stats import pearsonr
corr_df=[]
corr_fields=[]
for column in districtsdf_gr:
    if(districtsdf_gr[column].dtype == np.float64 or districtsdf_gr[column].dtype == np.int64):
        corr_fields.append(column)
        
print corr_fields[320:]

for i in corr_fields[320:]:   
    for column in districtsdf_gr:
        if(districtsdf_gr[column].dtype == np.float64 or districtsdf_gr[column].dtype == np.int64):
            corr_districtsdf_gr=districtsdf_gr[np.isfinite(districtsdf_gr[column])]
            corr_df.append((column,i,pearsonr(corr_districtsdf_gr[column],corr_districtsdf_gr[i])[0],len(corr_districtsdf_gr)))

In [160]:
corr_data=pd.DataFrame(corr_df)
corr_data.columns=('corrfield1','corrfield2','pearsoncorr','rowcount')
corr_data_filter=(corr_data[corr_data.pearsoncorr >= .80])
corr_data_filter.sort(['pearsoncorr'],ascending=False)

Unnamed: 0,corrfield1,corrfield2,pearsoncorr,rowcount
2623,totohc,totohc,1.000000,10903
978,ebs912,ebs912,1.000000,10903
320,fipst,fipst,1.000000,10903
2294,afgr,afgr,1.000000,10903
649,totd912,totd912,1.000000,10903
1965,afgeb,afgeb,1.000000,10903
1636,totdpl,totdpl,1.000000,10903
1307,drp912,drp912,1.000000,10903
1,agency_id_nces,fipst,0.999947,10903
9,congressional_code,fipst,0.999937,10903
