# 0. Set Up

In [3]:
# import libraries
import pandas as pd 
import pickle as pk
import numpy as np

In [5]:
# Reads each csv file and coverts it into a 2-D data structure (Dataframe).
# Directory
f0 = pd.read_csv("ccd_201617/ccd_sch_029_1617_w_1a_11212017.csv", encoding = "latin1", low_memory = False)
# Lunch Program
f1 = pd.read_csv("ccd_201617/ccd_sch_033_1617_l_2a_11212017.csv", encoding = 'latin1', low_memory = False)
# Membership
f2 = pd.read_csv("ccd_201617/ccd_sch_052_1617_l_2a_11212017.csv", encoding = 'latin1', low_memory = False)
# Staff
f3 = pd.read_csv("ccd_201617/ccd_sch_059_1617_l_2a_11212017.csv", encoding = 'latin1', low_memory = False)
# School Characteristics
f4 = pd.read_csv("ccd_201617/ccd_sch_129_1617_w_1a_11212017.csv", encoding = 'latin1', low_memory = False)
# School Geography
fgeo = pd.read_excel("ccd_201617/EDGE_GEOCODE_PUBLICSCH_1617/EDGE_GEOCODE_PUBLICSCH_1617.xlsx", encoding = 'latin1')

In [6]:
# pd.set_option is to display all columns when calling dataframe
pd.set_option('display.max_columns', None)
# display the number of columns and rows of each data table
display(np.shape(f0))
display(np.shape(f1))
display(np.shape(f2))
display(np.shape(f3))
display(np.shape(f4))
display(np.shape(fgeo))

(102181, 65)

(495095, 17)

(12279594, 18)

(100062, 15)

(100062, 20)

(102173, 24)

# 1. Select All Charters

In [6]:
# Select all the charter schools from the directory table (There should at least 7,000 columns.)
charterSchools = f0.loc[f0['CHARTER_TEXT'] == 'Yes' , : ]
charterSchools

Unnamed: 0,SCHOOL_YEAR,FIPST,STATENAME,ST,SCH_NAME,LEA_NAME,STATE_AGENCY_NO,UNION,ST_LEAID,LEAID,...,G_10_OFFERED,G_11_OFFERED,G_12_OFFERED,G_13_OFFERED,G_UG_OFFERED,G_AE_OFFERED,GSLO,GSHI,LEVEL,IGOFFERED
152,2016-2017,1,ALABAMA,AL,Acceleration Day and Evening Acad,Acceleration Day and Evening Acad,1,,AL-800,100197,...,Yes,Yes,Yes,No,No,No,09,12,High,As reported
1559,2016-2017,2,ALASKA,AK,Ayaprun Elitnaurvik,Lower Kuskokwim School District,1,,AK-31,200001,...,No,No,No,No,No,No,PK,06,Elementary,As reported
1658,2016-2017,2,ALASKA,AK,Ketchikan Charter School,Ketchikan Gateway Borough School District,1,,AK-25,200150,...,No,No,No,No,No,No,KG,08,Elementary,As reported
1661,2016-2017,2,ALASKA,AK,Tongass School of Arts and Sciences Charter Sc...,Ketchikan Gateway Borough School District,1,,AK-25,200150,...,No,No,No,No,No,No,PK,06,Elementary,As reported
1727,2016-2017,2,ALASKA,AK,Aquarian Charter School,Anchorage School District,1,,AK-05,200180,...,No,No,No,No,No,No,KG,06,Elementary,As reported
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100049,2016-2017,56,WYOMING,WY,Snowy Range Academy,Albany County School District #1,1,,WY-0101000,5600730,...,No,No,No,No,No,No,KG,09,Other,As reported
100052,2016-2017,56,WYOMING,WY,Laramie Montessori Charter School,Albany County School District #1,1,,WY-0101000,5600730,...,No,No,No,No,No,No,KG,06,Elementary,As reported
100055,2016-2017,56,WYOMING,WY,Arapahoe Charter High School,Fremont County School District #38,1,,WY-0738000,5600960,...,Yes,Yes,Yes,No,No,No,09,12,High,As reported
100141,2016-2017,56,WYOMING,WY,PODER Academy,Laramie County School District #1,1,,WY-1101000,5601980,...,No,No,No,No,No,No,KG,05,Elementary,As reported


In [27]:
# Check to see if there is around 7,000 charter schools
display(np.shape(charterSchools))

(7356, 65)

In [7]:
# Select the NCESSCH column for the charter schools. This will serve as a primary key to merge and extract
# charter schoold from other tables
charter_id = np.array(charterSchools["NCESSCH"])
len(charter_id)

7356

# 2. Select Free and Reduced Lunch from f1 table

In [8]:
# Display the different types of Lunch Program : looking at the output, we want the 
# 'Free lunch qualified' and 'Reduced-price lunch qualified' labels
f1["LUNCH_PROGRAM"].unique()

array(['Free lunch qualified', 'Reduced-price lunch qualified', 'Missing',
       'Not Applicable', 'No Category Codes', nan], dtype=object)

In [9]:
# filter the selected labels
lunch_labels = ['Free lunch qualified', 'Reduced-price lunch qualified']
f1_filtered = f1.loc[f1['LUNCH_PROGRAM'].isin(lunch_labels) , : ]

In [10]:
# Add up the sum of the two lunch program
f1_wide = f1_filtered.groupby("NCESSCH").agg({"STUDENT_COUNT": ['sum']})

In [11]:
# Rename the new table column
f1_wide.columns = ['Free and Reduced Lunch']

In [12]:
#Select only charter schools Lunch Programs
charter_lunch = f1_wide.loc[f1_wide.index.isin(charter_id)]
charter_lunch

Unnamed: 0_level_0,Free and Reduced Lunch
NCESSCH,Unnamed: 1_level_1
1.001970e+10,0.0
2.000010e+10,141.0
2.001500e+10,88.0
2.001500e+10,77.0
2.001800e+10,0.0
...,...
4.000796e+11,598.0
4.000796e+11,145.0
4.000796e+11,116.0
4.000796e+11,552.0


# 3. Explore Membership Table - f2

In [13]:
# Display the different types of Total Indicators Labels: From the Documentation: 'Education Unit Total' gives the  
# total students enrolled in the school, 'Category Set A - By Race/Ethnicity; Sex; Grade' gives the grade/race/gender count
f2["TOTAL_INDICATOR"].unique()

array(['Category Set A - By Race/Ethnicity; Sex; Grade',
       'Derived - Education Unit Total minus Adult Education Count',
       'Derived - Subtotal by Race/Ethnicity and Sex minus Adult Education Count',
       'Education Unit Total', 'Subtotal 4 - By Grade'], dtype=object)

In [14]:
# Filter out the null values
f2notNan = f2.loc[pd.notnull(f2['STUDENT_COUNT']), ]

In [15]:
# Doing a check on 'Education Unit Total'
totals_id = ["Category Set A - By Race/Ethnicity; Sex; Grade"]
f2one = f2.loc[f2["NCESSCH"] == 780003000034, ]
f2onefilter = f2one.loc[f2['TOTAL_INDICATOR'].isin(totals_id) , : ]
f2onesum = f2onefilter.agg({"STUDENT_COUNT": ['sum']})
f2onesum

Unnamed: 0,STUDENT_COUNT
sum,537.0


In [16]:
f2one.loc[f2['TOTAL_INDICATOR'] == "Education Unit Total"]

Unnamed: 0,SCHOOL_YEAR,FIPST,STATENAME,ST,SCH_NAME,STATE_AGENCY_NO,UNION,ST_LEAID,LEAID,ST_SCHID,NCESSCH,SCHID,GRADE,RACE_ETHNICITY,SEX,STUDENT_COUNT,TOTAL_INDICATOR,DMS_FLAG
12279589,2016-2017,78,U.S. VIRGIN ISLANDS,VI,BERTHA BOSCHULTE JUNIOR HIGH,1,,VI-001,7800030,VI-001-9,780003000034,7800034,No Category Codes,No Category Codes,No Category Codes,537.0,Education Unit Total,Reported


In [26]:
#all_CatA = f2notNan.loc[f2notNan['TOTAL_INDICATOR'].isin(totals_id) , : ]
#agg_total = all_CatA.groupby('NCESSCH').agg({"STUDENT_COUNT": ['sum']})
agg_total.columns = ["calculated_total"]
total_only = f2notNan.loc[f2notNan['TOTAL_INDICATOR'] == 'Education Unit Total', ['NCESSCH', 'STUDENT_COUNT']]
check_total = agg_total.merge(total_only, how="outer", left_index = True, right_on = 'NCESSCH')

In [27]:
check_total.loc[check_total['NCESSCH'].isin(charter_id)]

Unnamed: 0,calculated_total,NCESSCH,STUDENT_COUNT
345613,163.0,20000100329,163.0
364595,186.0,20015000306,186.0
364970,174.0,20015000523,174.0
374081,378.0,20018000172,378.0
374302,622.0,20018000178,622.0
...,...,...,...
12037192,38.0,560198000574,38.0
434430,,40003700712,218.0
459922,,40025502075,699.0
482554,,40042901994,444.0


In [40]:
# perform checks
# Check to make sure Education Total Units match up with the caluclated Totals
calc = np.array(check_total["calculated_total"])
total = np.array(check_total["STUDENT_COUNT"])

not_match = []
for i in range(0, len(calc)):
    if calc[i] != total[i]:
        not_match.append(i)
        print((calc[i], total[i]))

(95.0, 94.0)
(396.0, 395.0)
(400.0, 398.0)
(357.0, 356.0)
(472.0, 471.0)
(1807.0, 1806.0)
(320.0, 319.0)
(35.0, 34.0)
(470.0, 468.0)
(1306.0, 1303.0)
(579.0, 578.0)
(822.0, 821.0)
(953.0, 951.0)
(1277.0, 1276.0)
(1053.0, 1051.0)
(691.0, 690.0)
(792.0, 790.0)
(777.0, 776.0)
(250.0, 249.0)
(583.0, 582.0)
(1432.0, 1429.0)
(733.0, 732.0)
(780.0, 779.0)
(1169.0, 1168.0)
(1505.0, 1503.0)
(520.0, 519.0)
(1795.0, 1792.0)
(2504.0, 2502.0)
(1024.0, 1023.0)
(472.0, 471.0)
(412.0, 411.0)
(443.0, 442.0)
(411.0, 410.0)
(1334.0, 1333.0)
(83.0, 82.0)
(187.0, 186.0)
(34.0, 33.0)
(7.0, 6.0)
(116.0, 115.0)
(305.0, 301.0)
(168.0, 167.0)
(155.0, 154.0)
(466.0, 465.0)
(594.0, 584.0)
(600.0, 599.0)
(201.0, 200.0)
(392.0, 377.0)
(222.0, 221.0)
(438.0, 437.0)
(785.0, 783.0)
(640.0, 639.0)
(795.0, 793.0)
(1192.0, 1191.0)
(1444.0, 1442.0)
(1365.0, 1363.0)
(597.0, 593.0)
(697.0, 379.0)
(484.0, 478.0)
(105.0, 103.0)
(159.0, 155.0)
(187.0, 186.0)
(369.0, 366.0)
(360.0, 358.0)
(98.0, 96.0)
(234.0, 233.0)
(737.0, 733

## 4. Extract total enrollment of student of each school

In [17]:
# Select the total number students
total_member = f2.loc[f2["TOTAL_INDICATOR"] == "Education Unit Total"]
total_member

Unnamed: 0,SCHOOL_YEAR,FIPST,STATENAME,ST,SCH_NAME,STATE_AGENCY_NO,UNION,ST_LEAID,LEAID,ST_SCHID,NCESSCH,SCHID,GRADE,RACE_ETHNICITY,SEX,STUDENT_COUNT,TOTAL_INDICATOR,DMS_FLAG
107,2016-2017,1,ALABAMA,AL,Sequoyah Sch - Chalkville Campus,1,,AL-210,100002,AL-210-0020,10000200277,100277,No Category Codes,No Category Codes,No Category Codes,,Education Unit Total,Not reported
222,2016-2017,1,ALABAMA,AL,Camps,1,,AL-210,100002,AL-210-0050,10000201667,101667,No Category Codes,No Category Codes,No Category Codes,,Education Unit Total,Not reported
337,2016-2017,1,ALABAMA,AL,Det Ctr,1,,AL-210,100002,AL-210-0060,10000201670,101670,No Category Codes,No Category Codes,No Category Codes,,Education Unit Total,Not reported
452,2016-2017,1,ALABAMA,AL,Wallace Sch - Mt Meigs Campus,1,,AL-210,100002,AL-210-0030,10000201705,101705,No Category Codes,No Category Codes,No Category Codes,,Education Unit Total,Not reported
567,2016-2017,1,ALABAMA,AL,McNeel Sch - Vacca Campus,1,,AL-210,100002,AL-210-0040,10000201706,101706,No Category Codes,No Category Codes,No Category Codes,,Education Unit Total,Not reported
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12279205,2016-2017,78,U.S. VIRGIN ISLANDS,VI,GLADYS A. ABRAHAM ELEMENTARY SCHOOL,1,,VI-001,7800030,VI-001-21,780003000025,7800025,No Category Codes,No Category Codes,No Category Codes,345.0,Education Unit Total,Reported
12279336,2016-2017,78,U.S. VIRGIN ISLANDS,VI,ULLA F MULLER ELEMENTARY SCHOOL,1,,VI-001,7800030,VI-001-17,780003000026,7800026,No Category Codes,No Category Codes,No Category Codes,449.0,Education Unit Total,Reported
12279467,2016-2017,78,U.S. VIRGIN ISLANDS,VI,YVONNE BOWSKY ELEMENTARY SCHOOL,1,,VI-001,7800030,VI-001-23,780003000027,7800027,No Category Codes,No Category Codes,No Category Codes,397.0,Education Unit Total,Reported
12279523,2016-2017,78,U.S. VIRGIN ISLANDS,VI,CANCRYN JUNIOR HIGH SCHOOL,1,,VI-001,7800030,VI-001-25,780003000033,7800033,No Category Codes,No Category Codes,No Category Codes,526.0,Education Unit Total,Reported


In [18]:
# Select only charter schools only in membership data
charter_membership = f2.loc[f2["NCESSCH"].isin(charter_id)]

In [26]:
len(charter_membership["NCESSCH"].unique())

7011

In [19]:
# Select only the charterschools
total_ch_mem = charter_membership.loc[charter_membership["TOTAL_INDICATOR"] == "Education Unit Total",['NCESSCH','STUDENT_COUNT']]
total_ch_mem

Unnamed: 0,NCESSCH,STUDENT_COUNT
28992,10019702432,
345613,20000100329,163.0
364595,20015000306,186.0
364970,20015000523,174.0
374081,20018000172,378.0
...,...,...
12026260,560073000216,198.0
12026704,560073000542,76.0
12027132,560096000367,22.0
12036956,560198000547,169.0


## 5. Calculate student total by race

In [20]:
# Total up all students by race for each school
charter_memfilter = charter_membership.loc[charter_membership['TOTAL_INDICATOR'].isin(totals_id) , : ]
charter_memgroup = charter_memfilter.groupby(["NCESSCH", 'RACE_ETHNICITY']).agg({"STUDENT_COUNT": ['sum']})
charter_memgroup

Unnamed: 0_level_0,Unnamed: 1_level_0,STUDENT_COUNT
Unnamed: 0_level_1,Unnamed: 1_level_1,sum
NCESSCH,RACE_ETHNICITY,Unnamed: 2_level_2
10019702432,American Indian or Alaska Native,0.0
10019702432,Asian,0.0
10019702432,Black or African American,0.0
10019702432,Hispanic/Latino,0.0
10019702432,Native Hawaiian or Other Pacific Islander,0.0
...,...,...
560198000574,Hispanic/Latino,10.0
560198000574,Native Hawaiian or Other Pacific Islander,0.0
560198000574,Not Specified,0.0
560198000574,Two or more races,1.0


In [21]:
# Unstack rowa to become columns for a wide format table
charter_memwide = charter_memgroup.unstack()

In [46]:
charter_memwide.index

Int64Index([ 10019702432,  20000100329,  20015000306,  20015000523,
             20018000172,  20018000178,  20018000459,  20018000460,
             20018000530,  20018000555,
            ...
            551629002953, 551632002752, 551650002523, 551668002901,
            551707002781, 560073000216, 560073000542, 560096000367,
            560198000547, 560198000574],
           dtype='int64', name='NCESSCH', length=7011)

In [22]:
# Merge with charter lunch data and total students data; use outer merge to keep all charter schools
chMemLun = charter_memwide.merge(charter_lunch, how="outer", left_index = True, right_index = True)
chMemLunTot = chMemLun.merge(total_ch_mem, how="outer", left_index = True, right_on = "NCESSCH")



In [23]:
# Set index of merged table to be NCESSCH to make later merges easier
ch_merged = chMemLunTot.set_index('NCESSCH')
ch_merged

Unnamed: 0_level_0,"(STUDENT_COUNT, sum, American Indian or Alaska Native)","(STUDENT_COUNT, sum, Asian)","(STUDENT_COUNT, sum, Black or African American)","(STUDENT_COUNT, sum, Hispanic/Latino)","(STUDENT_COUNT, sum, Native Hawaiian or Other Pacific Islander)","(STUDENT_COUNT, sum, Not Specified)","(STUDENT_COUNT, sum, Two or more races)","(STUDENT_COUNT, sum, White)",Free and Reduced Lunch,STUDENT_COUNT
NCESSCH,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
10019702432,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
20000100329,160.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,141.0,163.0
20015000306,77.0,33.0,1.0,13.0,5.0,0.0,6.0,51.0,88.0,186.0
20015000523,58.0,14.0,2.0,5.0,2.0,0.0,13.0,80.0,77.0,174.0
20018000172,10.0,10.0,4.0,23.0,1.0,0.0,55.0,275.0,0.0,378.0
...,...,...,...,...,...,...,...,...,...,...
560073000216,4.0,25.0,3.0,9.0,0.0,0.0,0.0,157.0,,198.0
560073000542,2.0,0.0,0.0,12.0,0.0,0.0,6.0,56.0,,76.0
560096000367,22.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,22.0
560198000547,0.0,1.0,4.0,55.0,0.0,0.0,4.0,105.0,,169.0


In [24]:
# Create proportion for each race by divideing each column by total numbre of students
proportions = ch_merged.copy()
columns = proportions.columns
for i, row in proportions.iterrows():
    for x in range(len(columns)):
        col = columns[x]
        if col != 'STUDENT_COUNT':
            value = row[x]
            if value == 0:
                proportions.at[i, col] = 0
            else:
                proportions.at[i, col] = str((row[x] / row[len(row) - 1]) * 100)

proportions.columns = ['American Indian/Alaska Native (% total)', 'Asian (% total)', 'Black/African American (% total)', 'Hispanic/Latino (% total)', 'Hawaiian/Pacific Islander (% total)', 'Not Specified (% total)', 'Two+ races (% total)', 'White (% total)', 'Free or Reduced Lunch (% total)', 'Total Student Count']              
proportions

Unnamed: 0_level_0,American Indian/Alaska Native (% total),Asian (% total),Black/African American (% total),Hispanic/Latino (% total),Hawaiian/Pacific Islander (% total),Not Specified (% total),Two+ races (% total),White (% total),Free or Reduced Lunch (% total),Total Student Count
NCESSCH,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
10019702432,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,
20000100329,98.159509,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,1.840491,86.503067,163.0
20015000306,41.397849,17.741935,0.537634,6.989247,2.688172,0.0,3.225806,27.419355,47.311828,186.0
20015000523,33.333333,8.045977,1.149425,2.873563,1.149425,0.0,7.471264,45.977011,44.252874,174.0
20018000172,2.645503,2.645503,1.058201,6.084656,0.264550,0.0,14.550265,72.751323,0.000000,378.0
...,...,...,...,...,...,...,...,...,...,...
560073000216,2.020202,12.626263,1.515152,4.545455,0.000000,0.0,0.000000,79.292929,,198.0
560073000542,2.631579,0.000000,0.000000,15.789474,0.000000,0.0,7.894737,73.684211,,76.0
560096000367,100.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,,22.0
560198000547,0.000000,0.591716,2.366864,32.544379,0.000000,0.0,2.366864,62.130178,,169.0


In [68]:
#Save progress into a csv in work folder
proportions.to_csv('mergedMemLun.csv', header=True)

# 6. Select charter schools only for f3, f4, and fgeo

In [25]:
# select all charter schools in f3 table
charter_f3 = f3.loc[f3["NCESSCH"].isin(charter_id)]

In [26]:
# Select all charters in f4 table
charter_f4 = f4.loc[f4["NCESSCH"].isin(charter_id)]

In [27]:
# Select all charters in fgro table
charter_geo = fgeo.loc[fgeo["NCESSCH"].isin(charter_id)]

In [28]:
len(charter_geo)


7351

# 7. Merge all Tables

In [29]:
# merge All tables
# merge directory and proportions table
mergef02016_17 = charterSchools.merge(proportions, how="outer", left_on = "NCESSCH", right_index = True)

In [30]:
# merge with f3 staff table
mergef32016_17 = mergef02016_17.merge(charter_f3, how="outer", left_on = "NCESSCH", right_on = "NCESSCH")

In [31]:
# merge with f4 school characteristics table
mergef42016_17 = mergef32016_17.merge(charter_f4, how="outer", left_on = "NCESSCH", right_on = "NCESSCH")

In [32]:
# merge with fgeo table
mergeAll2016_17 = mergef42016_17.merge(charter_geo, how="outer", left_on = "NCESSCH", right_on = "NCESSCH")

In [33]:
#Save progress of the merge table into working folder
mergeAll2016_17.to_csv('AllCharters1617.csv', header=True)

# 8. Perform Check on full merged table

In [85]:
#Perform Check 1
mergeAll2016_17.loc[mergeAll2016_17["SCH_NAME_x"] == "Inspire Charter School", ]
#Passes Check 1
# Should create more checks

Unnamed: 0,SCHOOL_YEAR_x,FIPST_x,STATENAME_x,ST_x,SCH_NAME_x,LEA_NAME,STATE_AGENCY_NO_x,UNION_x,ST_LEAID_x,LEAID_x,ST_SCHID_x,NCESSCH,SCHID_x,MSTREET1,MSTREET2,MSTREET3,MCITY,MSTATE,MZIP,MZIP4,LSTREET1,LSTREET2,LSTREET3,LCITY,LSTATE,LZIP,LZIP4,PHONE,WEBSITE,SY_STATUS,SY_STATUS_TEXT,UPDATED_STATUS,UPDATED_STATUS_TEXT,EFFECTIVE_DATE,SCH_TYPE_TEXT,SCH_TYPE,RECON_STATUS,OUT_OF_STATE_FLAG,CHARTER_TEXT,CHARTAUTH1,CHARTAUTHN1,CHARTAUTH2,CHARTAUTHN2,NOGRADES,G_PK_OFFERED,G_KG_OFFERED,G_1_OFFERED,G_2_OFFERED,G_3_OFFERED,G_4_OFFERED,G_5_OFFERED,G_6_OFFERED,G_7_OFFERED,G_8_OFFERED,G_9_OFFERED,G_10_OFFERED,G_11_OFFERED,G_12_OFFERED,G_13_OFFERED,G_UG_OFFERED,G_AE_OFFERED,GSLO,GSHI,LEVEL,IGOFFERED,American Indian/Alaska Native (% total),Asian (% total),Black/African American (% total),Hispanic/Latino (% total),Hawaiian/Pacific Islander (% total),Not Specified (% total),Two+ races (% total),White (% total),Free or Reduced Lunch (% total),Total Student Count,SCHOOL_YEAR_y,FIPST_y,STATENAME_y,ST_y,SCH_NAME_y,STATE_AGENCY_NO_y,UNION_y,ST_LEAID_y,LEAID_y,ST_SCHID_y,SCHID_y,TEACHERS,TOTAL_INDICATOR,DMS_FLAG,SCHOOL_YEAR,FIPST,STATENAME,ST,SCH_NAME,STATE_AGENCY_NO,UNION,ST_LEAID,LEAID,ST_SCHID,SCHID,SHARED_TIME,TITLEI_STATUS,TITLEI_STATUS_TEXT,MAGNET_TEXT,NSLP_STATUS,NSLP_STATUS_TEXT,VIRTUAL,VIRTUAL_TEXT,NAME,OPSTFIPS,STREET,CITY,STATE,ZIP,STFIP,CNTY,NMCNTY,LOCALE,LAT,LON,CBSA,NMCBSA,CBSATYPE,CSA,NMCSA,NECTA,NMNECTA,CD,SLDL,SLDU,SURVYEAR
671,2016-2017,6,CALIFORNIA,CA,Inspire Charter School,Acton-Agua Dulce Unified,1,,CA-1975309,600001,CA-1975309-0129742,60000113764,613764,33323 Santiago Rd.,,,Acton,CA,93510,1416.0,33323 Santiago Rd.,,,Acton,CA,93510,1416.0,(661)269-2214,http://www.inspireschools.org,1,Open,1,Open,01/27/2017,Regular School,1,No,No,Yes,600001,Acton-Agua Dulce Unified,,,No,No,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,No,No,No,KG,12,Other,As reported,0.303337,5.510617,6.825076,31.698686,0.303337,0.0,11.577351,43.781598,31.850354,1978.0,2016-2017,6.0,CALIFORNIA,CA,Inspire Charter School,1.0,,CA-1975309,600001.0,CA-1975309-0129742,613764.0,144.37,Education Unit Total,Reported,2016-2017,6.0,CALIFORNIA,CA,Inspire Charter School,1.0,,CA-1975309,600001.0,CA-1975309-0129742,613764.0,Missing,MISSING,Missing,No,NSLPNO,No,FULLVIRTUAL,Full Virtual,Inspire Charter School,6.0,33323 Santiago Rd.,Acton,CA,93510.0,6,6037,Los Angeles County,41,34.490786,-118.162386,31080,"Los Angeles-Long Beach-Anaheim, CA",1.0,348,"Los Angeles-Long Beach, CA",N,N,625,6036,6021,2016.0


# 9. Find new Charters

In [35]:
#Find New Charters 
charter_recorded = pd.read_csv("Checking charter URLs 2016 - full list.csv")
charter1617 = mergeAll2016_17.copy()

In [36]:
# Select the primary keys to identify new schools; new schools will have a new NCESSCH number
ncessch_recorded = charter_recorded["NCESSCH"]
ncessch_1617 = charter1617["NCESSCH"]

In [37]:
# Function to filter the new NCESSCH values
# Input: two arrays; series - array to check if values in check_to array, 
#        check_to - array with values to check with for series
# Output: array of indices to series (input) array for new NCESSCH values

def filarray(series, check_to):
    indexes = []
    for i in range(0, len(series)):
        if series[i] not in check_to:
            indexes.append(i)
    return indexes

In [38]:
# Turn series into np arrays
ncessch_1617 = np.array(ncessch_1617)
ncessch_recorded = np.array(ncessch_recorded)

In [39]:
# filter the ncessch from the 2016-17 data to the spreadsheet and select the new values
new_indexes = filarray(ncessch_1617, ncessch_recorded)

In [40]:
# Display new schools
pd.set_option('display.max_columns', None)
new_charters = charter1617.iloc[new_indexes, :]
new_charters

Unnamed: 0,SCHOOL_YEAR_x,FIPST_x,STATENAME_x,ST_x,SCH_NAME_x,LEA_NAME,STATE_AGENCY_NO_x,UNION_x,ST_LEAID_x,LEAID_x,ST_SCHID_x,NCESSCH,SCHID_x,MSTREET1,MSTREET2,MSTREET3,MCITY,MSTATE,MZIP,MZIP4,LSTREET1,LSTREET2,LSTREET3,LCITY,LSTATE,LZIP,LZIP4,PHONE,WEBSITE,SY_STATUS,SY_STATUS_TEXT,UPDATED_STATUS,UPDATED_STATUS_TEXT,EFFECTIVE_DATE,SCH_TYPE_TEXT,SCH_TYPE,RECON_STATUS,OUT_OF_STATE_FLAG,CHARTER_TEXT,CHARTAUTH1,CHARTAUTHN1,CHARTAUTH2,CHARTAUTHN2,NOGRADES,G_PK_OFFERED,G_KG_OFFERED,G_1_OFFERED,G_2_OFFERED,G_3_OFFERED,G_4_OFFERED,G_5_OFFERED,G_6_OFFERED,G_7_OFFERED,G_8_OFFERED,G_9_OFFERED,G_10_OFFERED,G_11_OFFERED,G_12_OFFERED,G_13_OFFERED,G_UG_OFFERED,G_AE_OFFERED,GSLO,GSHI,LEVEL,IGOFFERED,American Indian/Alaska Native (% total),Asian (% total),Black/African American (% total),Hispanic/Latino (% total),Hawaiian/Pacific Islander (% total),Not Specified (% total),Two+ races (% total),White (% total),Free or Reduced Lunch (% total),Total Student Count,SCHOOL_YEAR_y,FIPST_y,STATENAME_y,ST_y,SCH_NAME_y,STATE_AGENCY_NO_y,UNION_y,ST_LEAID_y,LEAID_y,ST_SCHID_y,SCHID_y,TEACHERS,TOTAL_INDICATOR,DMS_FLAG,SCHOOL_YEAR,FIPST,STATENAME,ST,SCH_NAME,STATE_AGENCY_NO,UNION,ST_LEAID,LEAID,ST_SCHID,SCHID,SHARED_TIME,TITLEI_STATUS,TITLEI_STATUS_TEXT,MAGNET_TEXT,NSLP_STATUS,NSLP_STATUS_TEXT,VIRTUAL,VIRTUAL_TEXT,NAME,OPSTFIPS,STREET,CITY,STATE,ZIP,STFIP,CNTY,NMCNTY,LOCALE,LAT,LON,CBSA,NMCBSA,CBSATYPE,CSA,NMCSA,NECTA,NMNECTA,CD,SLDL,SLDU,SURVYEAR
102,2016-2017,4,ARIZONA,AZ,Ombudsman - Charter East,Ombudsman Educational Services Ltd. a subsidia...,1,,AZ-4323,400103,AZ-4323-5493,40010301583,401583,13832 N. 32nd St. Ste 218 Bld,,,Phoenix,AZ,85032,,3943 E. Thomas,,,Phoenix,AZ,85018,,(602)840-2997,http://www.ombudsman.com,5,Changed Boundary,5,Changed Boundary,08/15/2017,Regular School,1,No,No,Yes,10500,Arizona State Board for Charter Schools,,,No,No,No,No,No,No,No,No,No,No,No,Yes,Yes,Yes,Yes,No,No,No,09,12,High,As reported,9.876543,1.234568,7.407407,67.901235,0.0,0.0,1.234568,12.345679,0.0,81.0,2016-2017,4.0,ARIZONA,AZ,Ombudsman - Charter East,1.0,,AZ-4323,400103.0,AZ-4323-5493,401583.0,,Education Unit Total,Not reported,2016-2017,4.0,ARIZONA,AZ,Ombudsman - Charter East,1.0,,AZ-4323,400103.0,AZ-4323-5493,401583.0,No,NOTTITLE1ELIG,Not a Title I school,Missing,NSLPNO,No,NOTVIRTUAL,Not Virtual,Ombudsman - Charter East,4.0,3943 E. Thomas,Phoenix,AZ,85018.0,04,04013,Maricopa County,11,33.479939,-111.996269,38060,"Phoenix-Mesa-Scottsdale, AZ",1.0,N,N,N,N,0407,04024,04024,2016.0
107,2016-2017,4,ARIZONA,AZ,Ombudsman - Charter East II,Ombudsman Educational Services Ltd. a subsidia...,1,,AZ-4323,400103,AZ-4323-89440,40010302855,402855,1585 N. Milwaukee Ave.,,,Libertyville,IL,60048,,4041 East Thomas Road,,,Phoenix,AZ,85018,,(602)840-2997,,5,Changed Boundary,5,Changed Boundary,08/15/2017,Regular School,1,No,Yes,Yes,10500,Arizona State Board for Charter Schools,,,No,No,No,No,No,No,No,No,No,No,No,Yes,Yes,Yes,Yes,No,No,No,09,12,High,As reported,8.383234,0.000000,4.191617,79.640719,0.0,0.0,0.000000,7.784431,0.0,167.0,2016-2017,4.0,ARIZONA,AZ,Ombudsman - Charter East II,1.0,,AZ-4323,400103.0,AZ-4323-89440,402855.0,,Education Unit Total,Not reported,2016-2017,4.0,ARIZONA,AZ,Ombudsman - Charter East II,1.0,,AZ-4323,400103.0,AZ-4323-89440,402855.0,No,NOTTITLE1ELIG,Not a Title I school,Missing,NSLPNO,No,NOTVIRTUAL,Not Virtual,Ombudsman - Charter East II,4.0,4041 East Thomas Road,Phoenix,AZ,85018.0,04,04013,Maricopa County,11,33.479926,-111.993746,38060,"Phoenix-Mesa-Scottsdale, AZ",1.0,N,N,N,N,0407,04024,04024,2016.0
108,2016-2017,4,ARIZONA,AZ,Ombudsman - Charter Valencia,Ombudsman Educational Services Ltd. a subsidia...,1,,AZ-4323,400103,AZ-4323-89827,40010302950,402950,1686 W. Valencia Road,,,Tucson,AZ,85746,,1686 W. Valencia Road,,,Tucson,AZ,85746,,(520)573-5858,,5,Changed Boundary,5,Changed Boundary,08/15/2017,Regular School,1,No,No,Yes,10500,Arizona State Board for Charter Schools,,,No,No,No,No,No,No,No,No,Yes,Yes,Yes,Yes,Yes,Yes,Yes,No,No,No,06,12,Other,As reported,11.270983,0.479616,2.637890,81.055156,0.0,0.0,0.000000,4.556355,0.0,417.0,2016-2017,4.0,ARIZONA,AZ,Ombudsman - Charter Valencia,1.0,,AZ-4323,400103.0,AZ-4323-89827,402950.0,,Education Unit Total,Not reported,2016-2017,4.0,ARIZONA,AZ,Ombudsman - Charter Valencia,1.0,,AZ-4323,400103.0,AZ-4323-89827,402950.0,No,NOTTITLE1ELIG,Not a Title I school,Missing,NSLPNO,No,NOTVIRTUAL,Not Virtual,Ombudsman - Charter Valencia,4.0,1686 W. Valencia Road,Tucson,AZ,85746.0,04,04019,Pima County,11,32.136471,-111.002207,46060,"Tucson, AZ",1.0,536,"Tucson-Nogales, AZ",N,N,0403,04004,04004,2016.0
2245,2016-2017,10,DELAWARE,DE,Mapleton Charter School at Whitehall,Mapleton Charter School at Whitehall,1,,DE-9613,1000060,DE-9613-4085,100006000375,1000375,401 Federal Street- Suite 2,,,Dover,DE,19901,,401 Federal Street- Suite 2,,,Dover,DE,19901,,(302)735-4000,,2,Closed,2,Closed,07/01/2017,Regular School,1,No,No,Yes,DOE01,Delaware Department of Education,,,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,M,M,Not reported,As reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Mapleton Charter School at Whitehall,10.0,401 Federal Street- Suite 2,Dover,DE,19901.0,10,10001,Kent County,13,39.158003,-75.522055,20100,"Dover, DE",1.0,428,"Philadelphia-Reading-Camden, PA-NJ-DE-MD",N,N,1000,10032,10017,2016.0
2314,2016-2017,11,DISTRICT OF COLUMBIA,DC,William E Doar PCS NW Soldiers Home,City Arts & Prep PCS,1,,DC-153,1100053,DC-153-1033,110005300428,1100428,3700 North Capitol St NW,,,Washington,DC,20011,,3700 North Capitol St NW,,,Washington,DC,20011,,(202)882-1930,http://www.wedjschool.us/,2,Closed,2,Closed,01/27/2017,Regular School,1,No,No,Yes,002,DC Public Charter School Board,,,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,M,M,Not reported,As reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,William E Doar PCS NW Soldiers Home,11.0,3700 North Capitol St NW,Washington,DC,20011.0,11,11001,District of Columbia,11,38.923611,-76.995785,47900,"Washington-Arlington-Alexandria, DC-VA-MD-WV",1.0,548,"Washington-Baltimore-Arlington, DC-MD-VA-WV-PA",N,N,1198,N,11005,2016.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7084,2016-2017,49,UTAH,UT,CAPSTONE CLASSICAL ACADEMY,CAPSTONE CLASSICAL ACADEMY,1,,UT-9L,4900199,UT-9L-9L300,490019901527,4901527,2307 N. 2850 W.,,,PLAIN CITY,UT,84404,,2307 N. 2850 W.,,,PLAIN CITY,UT,84404,,(801)415-9973,,7,Future,7,Future,09/13/2017,Regular School,1,No,No,Yes,99,USBE,,,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,M,M,Not reported,As reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CAPSTONE CLASSICAL ACADEMY,49.0,2307 N. 2850 W.,PLAIN CITY,UT,84404.0,49,49057,Weber County,21,41.299250,-112.049140,36260,"Ogden-Clearfield, UT",1.0,482,"Salt Lake City-Provo-Orem, UT",N,N,4901,49029,49020,2016.0
7127,2016-2017,55,WISCONSIN,WI,Pathways High School,Pathways High School Agency,1,,WI-8139,5500079,WI-8139-8139,550007903064,5503064,336 W Walnut St,,,Milwaukee,WI,53232,3847.0,336 W Walnut St,,,Milwaukee,WI,53232,3847.0,(414)943-2891,http://mhecimovich@pathwayshigh.org,7,Future,7,Future,07/17/2017,Regular School,1,No,No,Yes,,,,,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,M,M,Not reported,As reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Pathways High School,55.0,336 W Walnut St,Milwaukee,WI,53232.0,55,55079,Milwaukee County,11,43.052995,-87.915249,33340,"Milwaukee-Waukesha-West Allis, WI",1.0,376,"Milwaukee-Racine-Waukesha, WI",N,N,5504,55016,55006,2016.0
7132,2016-2017,55,WISCONSIN,WI,Tomorrow River Virtual Charter School (TRVCS),Tomorrow River School District,1,,WI-0126,5500330,WI-0126-0140,550033003063,5503063,10186 County Road MM,,,Amherst Junction,WI,54407,9053.0,10186 County Road MM,,,Amherst Junction,WI,54407,9053.0,(715)346-2730,http://trccs.org,7,Future,7,Future,07/17/2017,Regular School,1,No,No,Yes,0126,Tomorrow River School District,,,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,M,M,Not reported,As reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Tomorrow River Virtual Charter School (TRVCS),55.0,10186 County Road MM,Amherst Junction,WI,54407.0,55,55097,Portage County,42,44.542612,-89.275643,44620,"Stevens Point, WI",2.0,554,"Wausau-Stevens Point-Wisconsin Rapids, WI",N,N,5503,55071,55024,2016.0
7230,2016-2017,55,WISCONSIN,WI,Insight School of Wisconsin High School,McFarland School District,1,,WI-3381,5508910,WI-3381-0470,550891003067,5503067,4709 Dale-Curtin Dr,,,McFarland,WI,53558,,4709 Dale-Curtin Dr,,,McFarland,WI,53558,,(608)838-9482,http://www.wiva.k12.com,7,Future,7,Future,07/17/2017,Alternative School,4,No,No,Yes,3381,McFarland School District,,,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,Not reported,M,M,Not reported,As reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Insight School of Wisconsin High School,55.0,4709 Dale-Curtin Dr,McFarland,WI,53558.0,55,55025,Dane County,21,43.014807,-89.300307,31540,"Madison, WI",1.0,357,"Madison-Janesville-Beloit, WI",N,N,5502,55047,55016,2016.0


In [41]:
# length of new charters
len(new_charters['NCESSCH'])

63

In [42]:
#Save new charters in working folder
new_charters.to_csv('new_charters1617.csv', header=True)

# 10. Drop Repetitive Columns and clean full merged table

In [43]:
# Drop Repetitive Columns
charter_drop = charter1617.drop(['SCHOOL_YEAR_y', 'FIPST_y', 'STATENAME_y', 'ST_y', 'SCH_NAME_y', 'STATE_AGENCY_NO_y', 'UNION_y', 'ST_LEAID_y','LEAID_y', 'ST_SCHID_y','SCHID_y'
                 ,'SCHOOL_YEAR', 'FIPST', 'STATENAME', 'ST', 'SCH_NAME', 'STATE_AGENCY_NO', 'UNION', 'ST_LEAID','LEAID', 'ST_SCHID','SCHID', ], axis=1)

In [44]:
# Rename columns
cleaned_charter = charter_drop.rename(columns={'SCHOOL_YEAR_x':'SCHOOL_YEAR',
                                               'FIPST_x' : 'FIPST',
                                               'STATENAME_x' : 'STATENAME', 
                                               'ST_x' : 'ST',
                                               'SCH_NAME_x' : 'SCH_NAME',
                                               'STATE_AGENCY_NO_x' : 'STATE_AGENCY_NO',
                                               'UNION_x' : 'UNION',
                                               'ST_LEAID_x' : 'ST_LEAID',
                                               'LEAID_x': 'LEAID',
                                               'ST_SCHID_x' : 'ST_SCHID' ,
                                               'SCHID_x' : 'SCHID'})

In [45]:
# Save Progress 
cleaned_charter.to_csv("cleaned_AllCharters1617.csv", header = True)