In [1]:
#Import all libraries for data cleaning and merging
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from thefuzz import process
from fuzzywuzzy import fuzz

#Pandas settings
pd.set_option('display.max_rows',500)
pd.set_option('display.max_columns',500)

In [2]:
#Read in all multistat datafiles
#I have already moved the site tabs to the front & simplified column headings in Excel 
ms19 = pd.read_excel('data/2019-ms.xlsx')
ms21 = pd.read_excel('data/2021-ms.xlsx')
ms22 = pd.read_excel('data/2022-ms.xlsx')

#Read in all LEAP data
leap19 = pd.read_excel('data/2019-leap.xlsx')
leap21 = pd.read_excel('data/2021-leap.xlsx')
leap22 = pd.read_excel('data/2022-leap.xlsx')

In [3]:
#Dealing with multistats first
#Preview size of all files
print(ms19.shape)
print(ms21.shape)
print(ms22.shape)

(1414, 40)
(1399, 41)
(1394, 41)


In [4]:
ms19.head()

Unnamed: 0,School System,School System Name,SiteCd,FedSiteCd,SiteName,Total Students,%Female,%Male,AmInd,Asian,Black,Hispanic,HawPI,White,Multiple,Minority,%Fully-EP,%LEP,Infants SpEd,PreSchool SpEd,PreK,Kindergarten,Grade1,Grade2,Grade3,Grade4,Grade5,Grade6,Grade7,Grade8,Grade9,GradeT9,Grade10,Grade11,Grade12,ED%,Nonprofit,Charter Type,RollUpType,Parish Code
0,1,Acadia Parish,1001,1001,Armstrong Middle School,341,0.422287,0.577713,0,1,146,12,0,166,16,175,0.991202,0.008798,0,0,0,0,0,0,0,0,0,112,125,104,0,0,0,0,0,0.791789,,,,1.0
1,1,Acadia Parish,1002,1002,Branch Elementary School,277,0.494585,0.505415,0,0,20,9,0,240,8,37,0.98556,0.01444,0,1,19,35,28,31,31,35,25,24,27,21,0,0,0,0,0,0.545126,,,,1.0
2,1,Acadia Parish,1003,1003,Central Rayne Kindergarten School,215,0.506977,0.493023,0,0,85,1,0,111,18,104,1.0,0.0,0,12,68,135,0,0,0,0,0,0,0,0,0,0,0,0,0,0.786047,,,,1.0
3,1,Acadia Parish,1004,1004,Church Point Elementary School,587,0.459966,0.540034,4,0,237,47,0,243,56,344,0.991482,0.008518,0,3,55,88,79,91,91,90,90,0,0,0,0,0,0,0,0,0.850085,,,,1.0
4,1,Acadia Parish,1005,1005,Church Point High School,514,0.48249,0.51751,2,0,151,19,0,317,25,197,0.996109,0.003891,0,0,0,0,0,0,0,0,0,0,0,0,125,27,137,127,98,0.614786,,,,1.0


In [5]:
#Grabbing all variations of Orleans Parish before I filter
ms19['School System Name'].value_counts()

East Baton Rouge Parish                          86
Jefferson Parish                                 85
All Orleans                                      79
Caddo Parish                                     64
Calcasieu Parish                                 60
St. Tammany Parish                               56
Rapides Parish                                   47
Livingston Parish                                44
Lafayette Parish                                 44
Type 2 Charters                                  43
Ouachita Parish                                  37
Tangipahoa Parish                                35
Bossier Parish                                   34
St. Landry Parish                                34
Terrebonne Parish                                33
Acadia Parish                                    32
Lafourche Parish                                 31
Ascension Parish                                 28
Iberia Parish                                    26
St. Mary Par

In [6]:
ms19.loc[ms19['School System Name'] == 'RSD-Direct Run']

Unnamed: 0,School System,School System Name,SiteCd,FedSiteCd,SiteName,Total Students,%Female,%Male,AmInd,Asian,Black,Hispanic,HawPI,White,Multiple,Minority,%Fully-EP,%LEP,Infants SpEd,PreSchool SpEd,PreK,Kindergarten,Grade1,Grade2,Grade3,Grade4,Grade5,Grade6,Grade7,Grade8,Grade9,GradeT9,Grade10,Grade11,Grade12,ED%,Nonprofit,Charter Type,RollUpType,Parish Code
1337,RLA,RSD-Direct Run,396211,396211,Linwood Charter School,980,0.540816,0.459184,0,0,949,19,0,7,5,973,0.987755,0.012245,0,0,20,122,120,109,120,115,72,104,99,99,0,0,0,0,0,0.965306,,,RLA,9.0


In [7]:
#Ignoring Linwood as it is in Shreveport and ignoring NOCCA as it is a Type 2 Charter
#Let's pull Orleans parish schools only
#Starting with SY19 
nolams19 = ms19.loc[(ms19['School System Name'] == 'All Orleans')]

In [8]:
nolams19.shape

(79, 40)

In [9]:
#Repeating process for SY21 ms file
ms21.head()

Unnamed: 0,School System,School System Name,SIS Submit Site Code,Federal Reporting Site Code,Site Name,Total Enrollment,% Female,% Male,American Indian,Asian,Black,Hispanic,Hawaiian/Pacific Islander,White,Multiple Races (Non-Hispanic),Minority,% Fully English Proficient,% Limited English Proficient,Infants (Sp Ed),Pre-School (Sp Ed),Pre-K (Reg Ed),Kindergarten,Grade 1,Grade 2,Grade 3,Grade 4,Grade 5,Grade 6,Grade 7,Grade 8,Grade 9,Grade T9,Grade 10,Grade 11,Grade 12,Extension Academy,% Economically Disadvantaged,Nonprofit Organization,Charter Type,School System Roll Up Type,Parish Code
0,School System,School System Name,SiteCd,FedSiteCd,SiteName,Total Students,%Female,%Male,AmInd,Asian,Black,Hispanic,HawPI,White,Multiple,Minority,%Fully-EP,%LEP,Infants SpEd,PreSchool SpEd,PreK,Kindergarten,Grade1,Grade2,Grade3,Grade4,Grade5,Grade6,Grade7,Grade8,Grade9,GradeT9,Grade10,Grade11,Grade12,Extension Academy,ED%,Nonprofit,Charter Type,RollUpType,Parish Code
1,001,Acadia Parish,001001,001001,Armstrong Middle School,337,0.51632,0.48368,1,0,157,9,0,145,25,192,0.991098,0.00890208,0,0,0,0,0,0,0,0,0,131,96,110,0,0,0,0,0,0,0.878338,,,,01
2,001,Acadia Parish,001002,001002,Branch Elementary School,348,0.494253,0.505747,0,1,28,16,0,290,13,58,0.994253,0.00574713,0,1,19,33,48,39,39,39,30,40,37,23,0,0,0,0,0,0,0.600575,,,,01
3,001,Acadia Parish,001003,001003,Central Rayne Kindergarten School,196,0.484694,0.515306,2,1,78,7,0,101,7,95,0.989796,0.0102041,0,6,56,134,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.826531,,,,01
4,001,Acadia Parish,001004,001004,Church Point Elementary School,577,0.471404,0.528596,3,0,248,48,0,240,38,337,0.982669,0.017331,0,4,58,90,96,85,91,79,74,0,0,0,0,0,0,0,0,0,0.908146,,,,01


In [10]:
ms21['School System Name'].value_counts()

East Baton Rouge Parish                          82
Jefferson Parish                                 81
Orleans Parish                                   77
Calcasieu Parish                                 61
Caddo Parish                                     59
St. Tammany Parish                               57
Rapides Parish                                   51
Lafayette Parish                                 46
Livingston Parish                                44
Type 2 Charters                                  39
Ouachita Parish                                  37
Bossier Parish                                   35
Tangipahoa Parish                                34
St. Landry Parish                                33
Terrebonne Parish                                32
Acadia Parish                                    32
Lafourche Parish                                 32
Ascension Parish                                 31
Iberia Parish                                    24
St. Mary Par

In [11]:
ms21.loc[ms21['School System Name'] == 'Recovery School District - Louisiana']

Unnamed: 0,School System,School System Name,SIS Submit Site Code,Federal Reporting Site Code,Site Name,Total Enrollment,% Female,% Male,American Indian,Asian,Black,Hispanic,Hawaiian/Pacific Islander,White,Multiple Races (Non-Hispanic),Minority,% Fully English Proficient,% Limited English Proficient,Infants (Sp Ed),Pre-School (Sp Ed),Pre-K (Reg Ed),Kindergarten,Grade 1,Grade 2,Grade 3,Grade 4,Grade 5,Grade 6,Grade 7,Grade 8,Grade 9,Grade T9,Grade 10,Grade 11,Grade 12,Extension Academy,% Economically Disadvantaged,Nonprofit Organization,Charter Type,School System Roll Up Type,Parish Code
1397,RLA,Recovery School District - Louisiana,396211,396211,Linwood Charter School,965,0.518135,0.481865,0,0,914,39,0,3,9,962,0.974093,0.0259067,0,0,30,95,108,103,110,99,103,122,103,92,0,0,0,0,0,0,0.972021,,,RLA,9


In [12]:
nolams21 = ms21.loc[ms21['School System Name'] == 'Orleans Parish']

In [13]:
#One more time for SY 22 multistat file
ms22.head()

Unnamed: 0,School System,School System Name,SIS Submit Site Code,Federal Reporting Site Code,Site Name,Total Enrollment,% Female,% Male,American Indian,Asian,Black,Hispanic,Hawaiian/Pacific Islander,White,Multiple Races (Non-Hispanic),Minority,% Fully English Proficient,% Limited English Proficient,Infants (Sp Ed),Pre-School (Sp Ed),Pre-K (Reg Ed),Kindergarten,Grade 1,Grade 2,Grade 3,Grade 4,Grade 5,Grade 6,Grade 7,Grade 8,Grade 9,Grade T9,Grade 10,Grade 11,Grade 12,Extension Academy,% Economically Disadvantaged,Nonprofit Organization,Charter Type,School System Roll Up Type,Parish Code
0,School System,School System Name,SiteCd,FedSiteCd,SiteName,Total Students,%Female,%Male,AmInd,Asian,Black,Hispanic,HawPI,White,Multiple,Minority,%Fully-EP,%LEP,Infants SpEd,PreSchool SpEd,PreK,Kindergarten,Grade1,Grade2,Grade3,Grade4,Grade5,Grade6,Grade7,Grade8,Grade9,GradeT9,Grade10,Grade11,Grade12,Extension Academy,ED%,Nonprofit,Charter Type,RollUpType,Parish Code
1,001,Acadia Parish,001001,001001,Armstrong Middle School,338,0.455621,0.544379,0,1,160,10,0,143,24,195,0.997041,0.00295858,0,0,0,0,0,0,0,0,0,129,117,92,0,0,0,0,0,0,0.828402,,,,01
2,001,Acadia Parish,001002,001002,Branch Elementary School,353,0.470255,0.529745,0,1,39,14,0,288,11,65,0.994334,0.00566572,0,3,17,36,36,48,38,39,39,32,36,29,0,0,0,0,0,0,0.563739,,,,01
3,001,Acadia Parish,001003,001003,Central Rayne Kindergarten School,204,0.514706,0.485294,3,1,82,8,0,95,15,109,0.990196,0.00980392,0,5,71,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.808824,,,,01
4,001,Acadia Parish,001004,001004,Church Point Elementary School,601,0.450915,0.549085,3,1,255,51,0,250,41,351,0.973378,0.0266223,0,6,54,104,91,91,84,88,83,0,0,0,0,0,0,0,0,0,0.851913,,,,01


In [14]:
ms22['School System Name'].value_counts()

East Baton Rouge Parish                          84
Jefferson Parish                                 82
Orleans Parish                                   77
Calcasieu Parish                                 60
Caddo Parish                                     59
St. Tammany Parish                               56
Rapides Parish                                   48
Lafayette Parish                                 46
Livingston Parish                                44
Type 2 Charters                                  40
Ouachita Parish                                  37
Bossier Parish                                   35
Tangipahoa Parish                                34
St. Landry Parish                                34
Lafourche Parish                                 32
Ascension Parish                                 32
Acadia Parish                                    32
Terrebonne Parish                                31
Iberia Parish                                    24
St. Mary Par

In [15]:
nolams22 = ms22.loc[ms22['School System Name'] == 'Orleans Parish']

In [16]:
#PLAN - 
#Merge LEAP and multistat files for each year (2019, 2021, 2022) -> 3 separate files
#Add years to each column heading
#Merge datafiles together, using an outer join

print(nolams19.shape)
print(nolams21.shape)
print(nolams22.shape)

(79, 40)
(77, 41)
(77, 41)


In [17]:
nolams19.head(79)

Unnamed: 0,School System,School System Name,SiteCd,FedSiteCd,SiteName,Total Students,%Female,%Male,AmInd,Asian,Black,Hispanic,HawPI,White,Multiple,Minority,%Fully-EP,%LEP,Infants SpEd,PreSchool SpEd,PreK,Kindergarten,Grade1,Grade2,Grade3,Grade4,Grade5,Grade6,Grade7,Grade8,Grade9,GradeT9,Grade10,Grade11,Grade12,ED%,Nonprofit,Charter Type,RollUpType,Parish Code
746,R36,All Orleans,036011,036011,Mary Bethune Elementary Literature/Technology,688,0.521802,0.478198,0,2,669,15,0,2,0,686,0.985465,0.014535,0,2,38,78,78,80,81,79,82,54,55,61,0,0,0,0,0,0.818314,,,R36,36.0
747,R36,All Orleans,036035,036035,Warren Easton Senior High School,991,0.514632,0.485368,0,0,960,25,0,1,5,990,0.984864,0.015136,0,0,0,0,0,0,0,0,0,0,0,0,229,15,247,245,255,0.784057,"Warren Easton Senior High School Foundation, Inc.",Type 3,R36,36.0
748,R36,All Orleans,036060,036060,Edward Hynes Charter School,717,0.525802,0.474198,0,32,252,30,0,361,42,356,0.967922,0.032078,0,17,0,83,84,79,79,72,80,77,76,70,0,0,0,0,0,0.35007,Hynes Charter School Corporation,Type 3,R36,36.0
749,R36,All Orleans,036088,036088,McDonogh #35 College Preparatory School,445,0.442697,0.557303,0,1,442,2,0,0,0,445,0.995506,0.004494,0,0,0,0,0,0,0,0,0,0,0,0,10,0,146,107,182,0.88764,,,R36,36.0
750,R36,All Orleans,036132,036132,Youth Study Center,106,0.056604,0.943396,0,0,104,2,0,0,0,106,0.990566,0.009434,0,0,0,0,0,0,0,0,0,0,0,4,35,0,32,14,21,0.896226,,,R36,36.0
751,R36,All Orleans,036161,036161,Benjamin Franklin Elem. Math and Science,797,0.494354,0.505646,2,7,742,13,2,24,7,773,0.984944,0.015056,0,26,16,74,78,82,82,83,88,93,87,88,0,0,0,0,0,0.751568,,,R36,36.0
752,R36,All Orleans,036189,036189,Homer A. Plessy Community School,397,0.483627,0.516373,2,7,177,48,0,118,45,279,0.924433,0.075567,0,0,39,50,42,46,44,54,53,40,29,0,0,0,0,0,0,0.780856,Citizens' Committee for Education,Type 1,R36,36.0
753,R36,All Orleans,036192,036192,Foundation Preparatory,177,0.440678,0.559322,0,5,125,41,2,4,0,173,0.728814,0.271186,0,0,0,32,53,42,23,27,0,0,0,0,0,0,0,0,0,0.99435,Foundation Prep,Type 1,R36,36.0
754,R36,All Orleans,036193,036193,Cypress Academy,184,0.429348,0.570652,0,1,93,26,0,56,8,128,0.951087,0.048913,0,0,0,41,34,36,45,28,0,0,0,0,0,0,0,0,0,0.684783,Cypress Academy,Type 1,R36,36.0
755,R36,All Orleans,036197,036197,Elan Academy Charter School,143,0.468531,0.531469,0,1,116,6,0,11,9,132,0.965035,0.034965,0,0,18,33,48,22,22,0,0,0,0,0,0,0,0,0,0,0.783217,"Elan Academy, Inc.",Type 1,R36,36.0


In [18]:
#Next, dealing with LEAP files, starting with SY19
leap19.head()

Unnamed: 0,School System Code,School System Name,Site Code,Site Name,2018 % Mastery+ Grades 3-8,2019 % Mastery+ Grades 3-8,2018-2019 % Mastery+ Change Grades 3-8,2018 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History,2019 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History,2018-2019 % Mastery+ Change Grades 3-8 & English I-II & Algebra I & Geometry & US History
0,1,Acadia Parish,1001,Armstrong Middle School,18,27,9,18,27,9
1,1,Acadia Parish,1002,Branch Elementary School,54,54,0,54,54,0
2,1,Acadia Parish,1004,Church Point Elementary School,32,28,-4,32,28,-4
3,1,Acadia Parish,1005,Church Point High School,NR,NR,NR,31,26,-5
4,1,Acadia Parish,1006,Church Point Middle School,22,27,5,22,27,5


In [19]:
leap19['School System Name'].value_counts()

Jefferson Parish                                      84
East Baton Rouge Parish                               82
Orleans Parish                                        76
Caddo Parish                                          62
Calcasieu Parish                                      53
St. Tammany Parish                                    50
Lafayette Parish                                      42
Livingston Parish                                     42
Rapides Parish                                        42
Ouachita Parish                                       33
St. Landry Parish                                     33
Tangipahoa Parish                                     32
Terrebonne Parish                                     31
Bossier Parish                                        31
New Orleans Archdiocese                               30
Lafourche Parish                                      26
Ascension Parish                                      26
Acadia Parish                  

In [20]:
#Lycée Français and other schools at the bottom of the list seem to be Type 2 charters
#Pulling only schools labeled as "Orleans Parish" 
nolaleap19 = leap19.loc[leap19['School System Name'] == 'Orleans Parish']

In [21]:
print(nolams19.shape)
print(nolaleap19.shape)
#Seems there are three schools that either closed or did not report LEAP scores in 2019

(79, 40)
(76, 10)


In [22]:
#Joining SY19 dataframes 
#Using an outer join to preserve those 3 schools that did not report LEAP data
nolasy19 = nolams19.merge(nolaleap19, left_on=['SiteCd'], right_on=['Site Code'], how='outer', indicator=True)

In [23]:
nolasy19.shape
#Join seems to have worked as it preserves the three schools

(79, 51)

In [24]:
nolasy19.loc[nolasy19['_merge'] == 'left_only']
#Orleans Central Office, Joseph S. Clark, and Audobon Charter didn't report LEAP data for various reasons
#e.g., Central Office seems to be PreK only, but
#we'll still keep them in the final dataframe

Unnamed: 0,School System,School System Name_x,SiteCd,FedSiteCd,SiteName,Total Students,%Female,%Male,AmInd,Asian,Black,Hispanic,HawPI,White,Multiple,Minority,%Fully-EP,%LEP,Infants SpEd,PreSchool SpEd,PreK,Kindergarten,Grade1,Grade2,Grade3,Grade4,Grade5,Grade6,Grade7,Grade8,Grade9,GradeT9,Grade10,Grade11,Grade12,ED%,Nonprofit,Charter Type,RollUpType,Parish Code,School System Code,School System Name_y,Site Code,Site Name,2018 % Mastery+ Grades 3-8,2019 % Mastery+ Grades 3-8,2018-2019 % Mastery+ Change Grades 3-8,2018 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History,2019 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History,2018-2019 % Mastery+ Change Grades 3-8 & English I-II & Algebra I & Geometry & US History,_merge
11,R36,All Orleans,036700,036700,Orleans Central Office,269,0.394052,0.605948,0,8,187,12,0,53,9,216,1.0,0.0,0,269,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.620818,,,R36,36.0,,,,,,,,,,,left_only
52,R36,All Orleans,399003,W93001,Joseph S. Clark Preparatory High School,43,0.44186,0.55814,0,0,41,2,0,0,0,43,0.976744,0.023256,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,42,0.837209,"FirstLine Schools, Inc.",Type 3B,R36,36.0,,,,,,,,,,,left_only
76,R36,All Orleans,WBT001,WBT001,Audubon Charter School - Gentilly,151,0.589404,0.410596,0,5,110,6,0,23,7,128,0.993377,0.006623,0,12,22,36,39,42,0,0,0,0,0,0,0,0,0,0,0,0.662252,"French and Montessori Education, Inc.",Type 1,R36,36.0,,,,,,,,,,,left_only


In [25]:
nolasy19['SiteCd'].isna().sum()

0

In [26]:
nolasy19['SiteName'].isna().sum()

0

In [27]:
nolasy19.loc[nolasy19['_merge'] == 'right_only']
#Luckily, it seems all schools who reported LEAP scores were also present in the multistat file
#We can be confident in dropping the "Site Code" and "Site Name" columns from the dataframe 

Unnamed: 0,School System,School System Name_x,SiteCd,FedSiteCd,SiteName,Total Students,%Female,%Male,AmInd,Asian,Black,Hispanic,HawPI,White,Multiple,Minority,%Fully-EP,%LEP,Infants SpEd,PreSchool SpEd,PreK,Kindergarten,Grade1,Grade2,Grade3,Grade4,Grade5,Grade6,Grade7,Grade8,Grade9,GradeT9,Grade10,Grade11,Grade12,ED%,Nonprofit,Charter Type,RollUpType,Parish Code,School System Code,School System Name_y,Site Code,Site Name,2018 % Mastery+ Grades 3-8,2019 % Mastery+ Grades 3-8,2018-2019 % Mastery+ Change Grades 3-8,2018 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History,2019 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History,2018-2019 % Mastery+ Change Grades 3-8 & English I-II & Algebra I & Geometry & US History,_merge


In [28]:
#Cleaning up SY19 columns
#Dropping redundant columns - FedSiteCd, School System, School System Name_x and School System Name_y 
#(they are all Orleans)
#Alsop dropping _merge, Parish Code, and School System Code, Site Code, and Site Name as per the check above 
nolasy19 = nolasy19.drop(columns=['School System','School System Name_x', 'FedSiteCd','School System Name_y',
                                '_merge','Parish Code','School System Code','Site Code','Site Name'], axis=1)


In [29]:
nolasy19.shape

(79, 42)

In [30]:
nolasy19.head(1)

Unnamed: 0,SiteCd,SiteName,Total Students,%Female,%Male,AmInd,Asian,Black,Hispanic,HawPI,White,Multiple,Minority,%Fully-EP,%LEP,Infants SpEd,PreSchool SpEd,PreK,Kindergarten,Grade1,Grade2,Grade3,Grade4,Grade5,Grade6,Grade7,Grade8,Grade9,GradeT9,Grade10,Grade11,Grade12,ED%,Nonprofit,Charter Type,RollUpType,2018 % Mastery+ Grades 3-8,2019 % Mastery+ Grades 3-8,2018-2019 % Mastery+ Change Grades 3-8,2018 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History,2019 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History,2018-2019 % Mastery+ Change Grades 3-8 & English I-II & Algebra I & Geometry & US History
0,36011,Mary Bethune Elementary Literature/Technology,688,0.521802,0.478198,0,2,669,15,0,2,0,686,0.985465,0.014535,0,2,38,78,78,80,81,79,82,54,55,61,0,0,0,0,0,0.818314,,,R36,31,33,2,31,34,3


In [31]:
#Add 'SY19' to all column names 
nolasy19 = nolasy19.add_prefix('sy19_')

In [32]:
nolasy19.head()

Unnamed: 0,sy19_SiteCd,sy19_SiteName,sy19_Total Students,sy19_%Female,sy19_%Male,sy19_AmInd,sy19_Asian,sy19_Black,sy19_Hispanic,sy19_HawPI,sy19_White,sy19_Multiple,sy19_Minority,sy19_%Fully-EP,sy19_%LEP,sy19_Infants SpEd,sy19_PreSchool SpEd,sy19_PreK,sy19_Kindergarten,sy19_Grade1,sy19_Grade2,sy19_Grade3,sy19_Grade4,sy19_Grade5,sy19_Grade6,sy19_Grade7,sy19_Grade8,sy19_Grade9,sy19_GradeT9,sy19_Grade10,sy19_Grade11,sy19_Grade12,sy19_ED%,sy19_Nonprofit,sy19_Charter Type,sy19_RollUpType,sy19_2018 % Mastery+ Grades 3-8,sy19_2019 % Mastery+ Grades 3-8,sy19_2018-2019 % Mastery+ Change Grades 3-8,sy19_2018 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History,sy19_2019 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History,sy19_2018-2019 % Mastery+ Change Grades 3-8 & English I-II & Algebra I & Geometry & US History
0,36011,Mary Bethune Elementary Literature/Technology,688,0.521802,0.478198,0,2,669,15,0,2,0,686,0.985465,0.014535,0,2,38,78,78,80,81,79,82,54,55,61,0,0,0,0,0,0.818314,,,R36,31,33,2,31,34,3
1,36035,Warren Easton Senior High School,991,0.514632,0.485368,0,0,960,25,0,1,5,990,0.984864,0.015136,0,0,0,0,0,0,0,0,0,0,0,0,229,15,247,245,255,0.784057,"Warren Easton Senior High School Foundation, Inc.",Type 3,R36,NR,NR,NR,28,21,-7
2,36060,Edward Hynes Charter School,717,0.525802,0.474198,0,32,252,30,0,361,42,356,0.967922,0.032078,0,17,0,83,84,79,79,72,80,77,76,70,0,0,0,0,0,0.35007,Hynes Charter School Corporation,Type 3,R36,61,63,2,60,63,3
3,36088,McDonogh #35 College Preparatory School,445,0.442697,0.557303,0,1,442,2,0,0,0,445,0.995506,0.004494,0,0,0,0,0,0,0,0,0,0,0,0,10,0,146,107,182,0.88764,,,R36,NR,NR,NR,9,8,-1
4,36132,Youth Study Center,106,0.056604,0.943396,0,0,104,2,0,0,0,106,0.990566,0.009434,0,0,0,0,0,0,0,0,0,0,0,4,35,0,32,14,21,0.896226,,,R36,2,≤ 1,>-5,2,≤ 1,>-5


In [33]:
#Repeat LEAP filtering, merge and column cleaning process for the other three years 
leap21.head()

Unnamed: 0,School System Code,School System Name,Site Code,Site Name,2019 % Mastery+ Grades 3-8,2021 % Mastery+ Grades 3-8,2019-2021 % Mastery+ Change Grades 3-8,2019 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History & Biology,2021 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History & Biology,2019-2021 % Mastery+ Change Grades 3-8 & English I-II & Algebra I & Geometry & US History & Biology
0,1,Acadia Parish,1001,Armstrong Middle School,26,23,-3,26,23,-3
1,1,Acadia Parish,1002,Branch Elementary School,53,44,-9,53,44,-9
2,1,Acadia Parish,1004,Church Point Elementary School,28,31,3,28,31,3
3,1,Acadia Parish,1005,Church Point High School,NR,NR,NR,24,23,-1
4,1,Acadia Parish,1006,Church Point Middle School,24,22,-2,24,22,-2


In [34]:
leap21['School System Name'].value_counts()

Jefferson Parish                                      80
East Baton Rouge Parish                               77
Orleans Parish                                        74
Caddo Parish                                          57
Calcasieu Parish                                      52
St. Tammany Parish                                    51
Lafayette Parish                                      44
Rapides Parish                                        42
Livingston Parish                                     42
Ouachita Parish                                       33
Bossier Parish                                        32
St. Landry Parish                                     32
Terrebonne Parish                                     31
Tangipahoa Parish                                     31
Ascension Parish                                      29
Lafourche Parish                                      26
Acadia Parish                                         25
New Orleans Archdiocese        

In [35]:
#Again, since Lycée Français and other schools at the bottom of the list seem to be Type 2 charters,
#pulling only schools labeled as "Orleans Parish" 
nolaleap21 = leap21.loc[leap21['School System Name'] == 'Orleans Parish']

In [36]:
print(nolams21.shape)
print(nolaleap21.shape)
#Again, there seem to be 3 schools that either closed or did not report LEAP scores

(77, 41)
(74, 10)


In [37]:
nolams21.head(1)

Unnamed: 0,School System,School System Name,SIS Submit Site Code,Federal Reporting Site Code,Site Name,Total Enrollment,% Female,% Male,American Indian,Asian,Black,Hispanic,Hawaiian/Pacific Islander,White,Multiple Races (Non-Hispanic),Minority,% Fully English Proficient,% Limited English Proficient,Infants (Sp Ed),Pre-School (Sp Ed),Pre-K (Reg Ed),Kindergarten,Grade 1,Grade 2,Grade 3,Grade 4,Grade 5,Grade 6,Grade 7,Grade 8,Grade 9,Grade T9,Grade 10,Grade 11,Grade 12,Extension Academy,% Economically Disadvantaged,Nonprofit Organization,Charter Type,School System Roll Up Type,Parish Code
1314,R36,Orleans Parish,36011,36011,Mary Bethune Elementary Literature/Technology,709,0.51622,0.48378,0,0,692,17,0,0,0,709,0.988717,0.0112835,0,6,32,76,75,78,78,78,76,78,79,53,0,0,0,0,0,0,0.854725,"Significant Educators, Inc.",Type 3,R36,36


In [38]:
nolaleap21.head(1)

Unnamed: 0,School System Code,School System Name,Site Code,Site Name,2019 % Mastery+ Grades 3-8,2021 % Mastery+ Grades 3-8,2019-2021 % Mastery+ Change Grades 3-8,2019 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History & Biology,2021 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History & Biology,2019-2021 % Mastery+ Change Grades 3-8 & English I-II & Algebra I & Geometry & US History & Biology
670,R36,Orleans Parish,36011,Mary Bethune Elementary Literature/Technology,30,17,-13,30,17,-13


In [39]:
#Joining SY21 dataframes 
#Using an outer join to preserve those 3 schools that did not report LEAP data
nolasy21 = nolams21.merge(nolaleap21, left_on=['SIS Submit Site Code'], 
                          right_on=['Site Code'], how='outer', indicator=True)

In [40]:
nolasy21.shape
#Join seems to have worked as 3 extra rows are present

(77, 52)

In [41]:
nolasy21.loc[nolasy21['_merge'] == 'left_only']

Unnamed: 0,School System,School System Name_x,SIS Submit Site Code,Federal Reporting Site Code,Site Name_x,Total Enrollment,% Female,% Male,American Indian,Asian,Black,Hispanic,Hawaiian/Pacific Islander,White,Multiple Races (Non-Hispanic),Minority,% Fully English Proficient,% Limited English Proficient,Infants (Sp Ed),Pre-School (Sp Ed),Pre-K (Reg Ed),Kindergarten,Grade 1,Grade 2,Grade 3,Grade 4,Grade 5,Grade 6,Grade 7,Grade 8,Grade 9,Grade T9,Grade 10,Grade 11,Grade 12,Extension Academy,% Economically Disadvantaged,Nonprofit Organization,Charter Type,School System Roll Up Type,Parish Code,School System Code,School System Name_y,Site Code,Site Name_y,2019 % Mastery+ Grades 3-8,2021 % Mastery+ Grades 3-8,2019-2021 % Mastery+ Change Grades 3-8,2019 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History & Biology,2021 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History & Biology,2019-2021 % Mastery+ Change Grades 3-8 & English I-II & Algebra I & Geometry & US History & Biology,_merge
8,R36,Orleans Parish,036700,036700,Orleans Central Office,330,0.390909,0.609091,0,7,195,25,0,94,9,236,0.99697,0.0030303,0,330,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.630303,,,R36,36,,,,,,,,,,,left_only
46,R36,Orleans Parish,3C2002,WZD001,Edward Hynes Charter School - UNO,207,0.47343,0.52657,0,7,140,2,0,46,12,161,0.990338,0.00966184,0,0,0,104,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0.618357,Hynes Charter School Corporation,Type 1,R36,36,,,,,,,,,,,left_only
73,R36,Orleans Parish,WC2001,WC2001,Opportunities Academy,65,0.276923,0.723077,0,2,59,0,0,3,1,62,1.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,65,0,0.923077,Collegiate Academies,Type 1,R36,36,,,,,,,,,,,left_only


In [42]:
#Orleans Central Office, Hynes Charter @ UNO, and Opportunities Academy did not report LEAP data 
nolasy21.loc[nolasy21['_merge'] == 'right_only']

Unnamed: 0,School System,School System Name_x,SIS Submit Site Code,Federal Reporting Site Code,Site Name_x,Total Enrollment,% Female,% Male,American Indian,Asian,Black,Hispanic,Hawaiian/Pacific Islander,White,Multiple Races (Non-Hispanic),Minority,% Fully English Proficient,% Limited English Proficient,Infants (Sp Ed),Pre-School (Sp Ed),Pre-K (Reg Ed),Kindergarten,Grade 1,Grade 2,Grade 3,Grade 4,Grade 5,Grade 6,Grade 7,Grade 8,Grade 9,Grade T9,Grade 10,Grade 11,Grade 12,Extension Academy,% Economically Disadvantaged,Nonprofit Organization,Charter Type,School System Roll Up Type,Parish Code,School System Code,School System Name_y,Site Code,Site Name_y,2019 % Mastery+ Grades 3-8,2021 % Mastery+ Grades 3-8,2019-2021 % Mastery+ Change Grades 3-8,2019 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History & Biology,2021 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History & Biology,2019-2021 % Mastery+ Change Grades 3-8 & English I-II & Algebra I & Geometry & US History & Biology,_merge


In [43]:
#Again, no schools present in the LEAP dataset that are not already present in the multistat
print(nolasy21['SIS Submit Site Code'].isna().sum()) #How many nulls are from the multistat file? 
print(nolasy21['Site Code'].isna().sum()) #How many nulls are from the LEAP file? 

0
3


In [44]:
#Since there are no nulls present in the multistat file, we can be confident in dropping the "Site Code" column
#Cleaning up SY21 columns
#Dropping redundant columns - FedSiteCd, School System, School System Name_x and School System Name_y 
#(they are all Orleans)
#Alsop dropping _merge, Parish Code, and School System Code, Site Code, and Site Name as per the check above 
nolasy21 = nolasy21.drop(columns=['School System','School System Name_x', 'Federal Reporting Site Code',
                                  'School System Name_y',
                                '_merge','Parish Code','School System Code','Site Code','Site Name_y'], axis=1)

In [45]:
nolasy21.shape

(77, 43)

In [46]:
#Add 'SY21' to all column names 
nolasy21 = nolasy21.add_prefix('sy21_')

In [47]:
#Repeat process one last time for SY22
leap22.head()

Unnamed: 0,School System Code,School System Name,Site Code,Site Name,2021 % Mastery+ Grades 3-8,2022 % Mastery+ Grades 3-8,2021-2022 % Mastery+ Change Grades 3-8,2021 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History & Biology,2022 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History & Biology,2021-2022 % Mastery+ Change Grades 3-8 & English I-II & Algebra I & Geometry & US History & Biology
0,1,Acadia Parish,1001,Armstrong Middle School,23,16,-7,23,16,-7
1,1,Acadia Parish,1002,Branch Elementary School,44,44,0,44,44,0
2,1,Acadia Parish,1004,Church Point Elementary School,31,28,-3,31,28,-3
3,1,Acadia Parish,1005,Church Point High School,NR,NR,NR,23,20,-3
4,1,Acadia Parish,1006,Church Point Middle School,22,26,4,22,26,4


In [48]:
leap22['School System Name'].value_counts()

Jefferson Parish                                      81
East Baton Rouge Parish                               80
Orleans Parish                                        74
Caddo Parish                                          57
Calcasieu Parish                                      52
St. Tammany Parish                                    50
Lafayette Parish                                      44
Livingston Parish                                     42
Rapides Parish                                        41
Ouachita Parish                                       33
Bossier Parish                                        32
St. Landry Parish                                     31
Tangipahoa Parish                                     31
Ascension Parish                                      30
Terrebonne Parish                                     29
Lafourche Parish                                      26
New Orleans Archdiocese                               25
Acadia Parish                  

In [49]:
#Again, not including Type 2 charters in final dataframe
nolaleap22 = leap22.loc[leap22['School System Name'] == 'Orleans Parish']

In [50]:
print(nolams22.shape)
print(nolaleap22.shape)
#Again, there seem to be 3 schools that either closed or did not report LEAP scores

(77, 41)
(74, 10)


In [51]:
nolams22.head(1)

Unnamed: 0,School System,School System Name,SIS Submit Site Code,Federal Reporting Site Code,Site Name,Total Enrollment,% Female,% Male,American Indian,Asian,Black,Hispanic,Hawaiian/Pacific Islander,White,Multiple Races (Non-Hispanic),Minority,% Fully English Proficient,% Limited English Proficient,Infants (Sp Ed),Pre-School (Sp Ed),Pre-K (Reg Ed),Kindergarten,Grade 1,Grade 2,Grade 3,Grade 4,Grade 5,Grade 6,Grade 7,Grade 8,Grade 9,Grade T9,Grade 10,Grade 11,Grade 12,Extension Academy,% Economically Disadvantaged,Nonprofit Organization,Charter Type,School System Roll Up Type,Parish Code
740,R36,Orleans Parish,36011,36011,Mary Bethune Elementary Literature/Technology,699,0.513591,0.486409,0,0,678,20,1,0,0,699,0.981402,0.018598,0,8,32,70,74,72,66,70,77,74,78,78,0,0,0,0,0,0,0.927039,"Significant Educators, Inc.",Type 3,R36,36


In [52]:
nolaleap22.head(1)

Unnamed: 0,School System Code,School System Name,Site Code,Site Name,2021 % Mastery+ Grades 3-8,2022 % Mastery+ Grades 3-8,2021-2022 % Mastery+ Change Grades 3-8,2021 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History & Biology,2022 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History & Biology,2021-2022 % Mastery+ Change Grades 3-8 & English I-II & Algebra I & Geometry & US History & Biology
674,R36,Orleans Parish,36011,Mary Bethune Elementary Literature/Technology,17,20,3,17,21,4


In [53]:
#Joining SY22 dataframes 
#Using an outer join to preserve those 3 schools that did not report LEAP data
nolasy22 = nolams22.merge(nolaleap22, left_on=['SIS Submit Site Code'], 
                          right_on=['Site Code'], how='outer', indicator=True)

In [54]:
nolasy22.shape

(77, 52)

In [55]:
nolasy22.loc[nolasy22['_merge'] == 'left_only']
#Again, Central Office, Hynes @ UNO, and Opportunities Academy did not report LEAP data 

Unnamed: 0,School System,School System Name_x,SIS Submit Site Code,Federal Reporting Site Code,Site Name_x,Total Enrollment,% Female,% Male,American Indian,Asian,Black,Hispanic,Hawaiian/Pacific Islander,White,Multiple Races (Non-Hispanic),Minority,% Fully English Proficient,% Limited English Proficient,Infants (Sp Ed),Pre-School (Sp Ed),Pre-K (Reg Ed),Kindergarten,Grade 1,Grade 2,Grade 3,Grade 4,Grade 5,Grade 6,Grade 7,Grade 8,Grade 9,Grade T9,Grade 10,Grade 11,Grade 12,Extension Academy,% Economically Disadvantaged,Nonprofit Organization,Charter Type,School System Roll Up Type,Parish Code,School System Code,School System Name_y,Site Code,Site Name_y,2021 % Mastery+ Grades 3-8,2022 % Mastery+ Grades 3-8,2021-2022 % Mastery+ Change Grades 3-8,2021 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History & Biology,2022 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History & Biology,2021-2022 % Mastery+ Change Grades 3-8 & English I-II & Algebra I & Geometry & US History & Biology,_merge
5,R36,Orleans Parish,036700,036700,Orleans Central Office,365,0.350685,0.649315,0,5,235,20,0,96,9,269,1.0,0.0,0,365,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.619178,,,R36,36,,,,,,,,,,,left_only
42,R36,Orleans Parish,3C2002,WZD001,Edward Hynes Charter School - UNO,290,0.434483,0.565517,1,9,190,3,0,66,21,224,0.982759,0.0172414,0,0,0,99,100,91,0,0,0,0,0,0,0,0,0,0,0,0,0.593103,Hynes Charter School Corporation,Type 1,R36,36,,,,,,,,,,,left_only
73,R36,Orleans Parish,WC2001,WC2001,Opportunities Academy,70,0.314286,0.685714,0,2,64,1,0,2,1,68,1.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,0,0.957143,Collegiate Academies,Type 1,R36,36,,,,,,,,,,,left_only


In [56]:
nolasy22.loc[nolasy22['_merge'] == 'right_only']

Unnamed: 0,School System,School System Name_x,SIS Submit Site Code,Federal Reporting Site Code,Site Name_x,Total Enrollment,% Female,% Male,American Indian,Asian,Black,Hispanic,Hawaiian/Pacific Islander,White,Multiple Races (Non-Hispanic),Minority,% Fully English Proficient,% Limited English Proficient,Infants (Sp Ed),Pre-School (Sp Ed),Pre-K (Reg Ed),Kindergarten,Grade 1,Grade 2,Grade 3,Grade 4,Grade 5,Grade 6,Grade 7,Grade 8,Grade 9,Grade T9,Grade 10,Grade 11,Grade 12,Extension Academy,% Economically Disadvantaged,Nonprofit Organization,Charter Type,School System Roll Up Type,Parish Code,School System Code,School System Name_y,Site Code,Site Name_y,2021 % Mastery+ Grades 3-8,2022 % Mastery+ Grades 3-8,2021-2022 % Mastery+ Change Grades 3-8,2021 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History & Biology,2022 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History & Biology,2021-2022 % Mastery+ Change Grades 3-8 & English I-II & Algebra I & Geometry & US History & Biology,_merge


In [57]:
#Again, no schools present in the LEAP dataset that are not already present in the multistat
print(nolasy22['SIS Submit Site Code'].isna().sum()) #How many nulls are from the multistat file? 
print(nolasy22['Site Code'].isna().sum()) #How many nulls are from the LEAP file? 

0
3


In [58]:
#Since there are no nulls present in the multistat file, we can be confident in dropping the "Site Code" column
#Cleaning up SY22 columns
#Dropping redundant columns - FedSiteCd, School System, School System Name_x and School System Name_y 
#(they are all Orleans)
#Alsop dropping _merge, Parish Code, and School System Code, Site Code, and Site Name as per the check above 
nolasy22 = nolasy22.drop(columns=['School System','School System Name_x', 'Federal Reporting Site Code',
                                  'School System Name_y',
                                '_merge','Parish Code','School System Code','Site Code','Site Name_y'], axis=1)

In [59]:
nolasy22.shape

(77, 43)

In [60]:
#Lastly, add 'SY22' to all column names 
nolasy22 = nolasy22.add_prefix('sy22_')

In [61]:
nolasy19.head(1)

Unnamed: 0,sy19_SiteCd,sy19_SiteName,sy19_Total Students,sy19_%Female,sy19_%Male,sy19_AmInd,sy19_Asian,sy19_Black,sy19_Hispanic,sy19_HawPI,sy19_White,sy19_Multiple,sy19_Minority,sy19_%Fully-EP,sy19_%LEP,sy19_Infants SpEd,sy19_PreSchool SpEd,sy19_PreK,sy19_Kindergarten,sy19_Grade1,sy19_Grade2,sy19_Grade3,sy19_Grade4,sy19_Grade5,sy19_Grade6,sy19_Grade7,sy19_Grade8,sy19_Grade9,sy19_GradeT9,sy19_Grade10,sy19_Grade11,sy19_Grade12,sy19_ED%,sy19_Nonprofit,sy19_Charter Type,sy19_RollUpType,sy19_2018 % Mastery+ Grades 3-8,sy19_2019 % Mastery+ Grades 3-8,sy19_2018-2019 % Mastery+ Change Grades 3-8,sy19_2018 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History,sy19_2019 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History,sy19_2018-2019 % Mastery+ Change Grades 3-8 & English I-II & Algebra I & Geometry & US History
0,36011,Mary Bethune Elementary Literature/Technology,688,0.521802,0.478198,0,2,669,15,0,2,0,686,0.985465,0.014535,0,2,38,78,78,80,81,79,82,54,55,61,0,0,0,0,0,0.818314,,,R36,31,33,2,31,34,3


In [62]:
nolasy21.head(1)

Unnamed: 0,sy21_SIS Submit Site Code,sy21_Site Name_x,sy21_Total Enrollment,sy21_% Female,sy21_% Male,sy21_American Indian,sy21_Asian,sy21_Black,sy21_Hispanic,sy21_Hawaiian/Pacific Islander,sy21_White,sy21_Multiple Races (Non-Hispanic),sy21_Minority,sy21_% Fully English Proficient,sy21_% Limited English Proficient,sy21_Infants (Sp Ed),sy21_Pre-School (Sp Ed),sy21_Pre-K (Reg Ed),sy21_Kindergarten,sy21_Grade 1,sy21_Grade 2,sy21_Grade 3,sy21_Grade 4,sy21_Grade 5,sy21_Grade 6,sy21_Grade 7,sy21_Grade 8,sy21_Grade 9,sy21_Grade T9,sy21_Grade 10,sy21_Grade 11,sy21_Grade 12,sy21_Extension Academy,sy21_% Economically Disadvantaged,sy21_Nonprofit Organization,sy21_Charter Type,sy21_School System Roll Up Type,sy21_2019 % Mastery+ Grades 3-8,sy21_2021 % Mastery+ Grades 3-8,sy21_2019-2021 % Mastery+ Change Grades 3-8,sy21_2019 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History & Biology,sy21_2021 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History & Biology,sy21_2019-2021 % Mastery+ Change Grades 3-8 & English I-II & Algebra I & Geometry & US History & Biology
0,36011,Mary Bethune Elementary Literature/Technology,709,0.51622,0.48378,0,0,692,17,0,0,0,709,0.988717,0.0112835,0,6,32,76,75,78,78,78,76,78,79,53,0,0,0,0,0,0,0.854725,"Significant Educators, Inc.",Type 3,R36,30,17,-13,30,17,-13


In [63]:
print(nolasy19.shape)
print(nolasy21.shape)

(79, 42)
(77, 43)


In [64]:
print(nolasy19.duplicated(subset=['sy19_SiteCd']).sum())
print(nolasy19.duplicated(subset=['sy19_SiteName']).sum())

0
0


In [65]:
print(nolasy21.duplicated(subset=['sy21_SIS Submit Site Code']).sum())
print(nolasy21.duplicated(subset=['sy21_Site Name_x']).sum())

0
0


In [66]:
#Joining all three dataframes longitudinally to create final table 
#Join SY19 and SY21 data
sy19to21 = nolasy19.merge(nolasy21, left_on=['sy19_SiteCd'], 
                          right_on=['sy21_SIS Submit Site Code'], how='outer', indicator=True)

In [67]:
sy19to21.shape

(93, 86)

In [68]:
potential_sy21_closures = sy19to21.loc[sy19to21['_merge'] == 'left_only']
#School codes that appear only in the FY19 datafiles

In [69]:
potential_sy21_closures.shape
#16 schools appear only in the FY19 datafiles but not in the FY21 datafiles

(16, 86)

In [70]:
potential_sy21_new_schools = sy19to21.loc[sy19to21['_merge'] == 'right_only']
#School codes that appear only in the FY21 datafiles 

In [71]:
potential_sy21_new_schools.shape
#14 schools appear in the FY21 datafile but not in the FY19 datafile

(14, 86)

In [72]:
potential_sy21_closures 

Unnamed: 0,sy19_SiteCd,sy19_SiteName,sy19_Total Students,sy19_%Female,sy19_%Male,sy19_AmInd,sy19_Asian,sy19_Black,sy19_Hispanic,sy19_HawPI,sy19_White,sy19_Multiple,sy19_Minority,sy19_%Fully-EP,sy19_%LEP,sy19_Infants SpEd,sy19_PreSchool SpEd,sy19_PreK,sy19_Kindergarten,sy19_Grade1,sy19_Grade2,sy19_Grade3,sy19_Grade4,sy19_Grade5,sy19_Grade6,sy19_Grade7,sy19_Grade8,sy19_Grade9,sy19_GradeT9,sy19_Grade10,sy19_Grade11,sy19_Grade12,sy19_ED%,sy19_Nonprofit,sy19_Charter Type,sy19_RollUpType,sy19_2018 % Mastery+ Grades 3-8,sy19_2019 % Mastery+ Grades 3-8,sy19_2018-2019 % Mastery+ Change Grades 3-8,sy19_2018 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History,sy19_2019 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History,sy19_2018-2019 % Mastery+ Change Grades 3-8 & English I-II & Algebra I & Geometry & US History,sy21_SIS Submit Site Code,sy21_Site Name_x,sy21_Total Enrollment,sy21_% Female,sy21_% Male,sy21_American Indian,sy21_Asian,sy21_Black,sy21_Hispanic,sy21_Hawaiian/Pacific Islander,sy21_White,sy21_Multiple Races (Non-Hispanic),sy21_Minority,sy21_% Fully English Proficient,sy21_% Limited English Proficient,sy21_Infants (Sp Ed),sy21_Pre-School (Sp Ed),sy21_Pre-K (Reg Ed),sy21_Kindergarten,sy21_Grade 1,sy21_Grade 2,sy21_Grade 3,sy21_Grade 4,sy21_Grade 5,sy21_Grade 6,sy21_Grade 7,sy21_Grade 8,sy21_Grade 9,sy21_Grade T9,sy21_Grade 10,sy21_Grade 11,sy21_Grade 12,sy21_Extension Academy,sy21_% Economically Disadvantaged,sy21_Nonprofit Organization,sy21_Charter Type,sy21_School System Roll Up Type,sy21_2019 % Mastery+ Grades 3-8,sy21_2021 % Mastery+ Grades 3-8,sy21_2019-2021 % Mastery+ Change Grades 3-8,sy21_2019 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History & Biology,sy21_2021 % Mastery+ Grades 3-8 & English I-II & Algebra I & Geometry & US History & Biology,sy21_2019-2021 % Mastery+ Change Grades 3-8 & English I-II & Algebra I & Geometry & US History & Biology,_merge
1,036035,Warren Easton Senior High School,991.0,0.514632,0.485368,0.0,0.0,960.0,25.0,0.0,1.0,5.0,990.0,0.984864,0.015136,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,229.0,15.0,247.0,245.0,255.0,0.784057,"Warren Easton Senior High School Foundation, Inc.",Type 3,R36,NR,NR,NR,28.0,21.0,-7.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
2,036060,Edward Hynes Charter School,717.0,0.525802,0.474198,0.0,32.0,252.0,30.0,0.0,361.0,42.0,356.0,0.967922,0.032078,0.0,17.0,0.0,83.0,84.0,79.0,79.0,72.0,80.0,77.0,76.0,70.0,0.0,0.0,0.0,0.0,0.0,0.35007,Hynes Charter School Corporation,Type 3,R36,61,63,2,60.0,63.0,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
7,036192,Foundation Preparatory,177.0,0.440678,0.559322,0.0,5.0,125.0,41.0,2.0,4.0,0.0,173.0,0.728814,0.271186,0.0,0.0,0.0,32.0,53.0,42.0,23.0,27.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.99435,Foundation Prep,Type 1,R36,26,28,2,26.0,28.0,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
8,036193,Cypress Academy,184.0,0.429348,0.570652,0.0,1.0,93.0,26.0,0.0,56.0,8.0,128.0,0.951087,0.048913,0.0,0.0,0.0,41.0,34.0,36.0,45.0,28.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.684783,Cypress Academy,Type 1,R36,37,35,-2,37.0,35.0,-2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
12,300001,Pierre A. Capdau Charter School at Avery Alexa...,754.0,0.466844,0.533156,1.0,1.0,705.0,37.0,3.0,7.0,0.0,747.0,0.952255,0.047745,0.0,0.0,39.0,49.0,65.0,71.0,73.0,75.0,98.0,96.0,97.0,91.0,0.0,0.0,0.0,0.0,0.0,0.994695,New Beginnings Schools Foundation,type 3B,R36,14,16,2,14.0,16.0,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
13,300002,Nelson Elementary School,277.0,0.440433,0.559567,7.0,1.0,265.0,4.0,0.0,0.0,0.0,277.0,0.981949,0.018051,0.0,0.0,19.0,17.0,26.0,23.0,21.0,32.0,36.0,27.0,40.0,36.0,0.0,0.0,0.0,0.0,0.0,1.0,New Beginnings Schools Foundation,Type 3B,R36,10,11,1,10.0,11.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
14,300003,John F. Kennedy High School,683.0,0.493411,0.506589,0.0,1.0,661.0,18.0,0.0,2.0,1.0,681.0,0.976574,0.023426,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,203.0,0.0,152.0,160.0,168.0,0.995608,New Beginnings Schools Foundation,type 3B,R36,NR,NR,NR,8.0,9.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
20,367001,Edgar P. Harney Spirit of Excellence Academy,228.0,0.5,0.5,0.0,0.0,207.0,19.0,0.0,2.0,0.0,226.0,0.916667,0.083333,0.0,0.0,0.0,15.0,18.0,16.0,30.0,23.0,37.0,31.0,33.0,25.0,0.0,0.0,0.0,0.0,0.0,1.0,"Spirit of Excellence Academy, Inc.",type 3B,R36,15,7,-8,15.0,7.0,-8.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
24,369005,ReNEW Accelerated High School,230.0,0.478261,0.521739,1.0,0.0,206.0,21.0,0.0,2.0,0.0,228.0,0.908696,0.091304,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,36.0,0.0,45.0,52.0,97.0,0.934783,ReNEW-Reinventing Education,Type 3B,R36,NR,NR,NR,2.0,4.0,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
36,393001,Lafayette Academy,972.0,0.490741,0.509259,0.0,1.0,906.0,50.0,1.0,4.0,10.0,968.0,0.961934,0.038066,0.0,2.0,17.0,70.0,79.0,100.0,94.0,108.0,118.0,131.0,132.0,121.0,0.0,0.0,0.0,0.0,0.0,0.932099,Choice Foundation,type 3B,R36,24,11,-13,24.0,11.0,-13.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only


In [73]:
potential_sy21_closures = potential_sy21_closures[['sy19_SiteCd','sy19_SiteName']]

In [74]:
potential_sy21_closures

Unnamed: 0,sy19_SiteCd,sy19_SiteName
1,036035,Warren Easton Senior High School
2,036060,Edward Hynes Charter School
7,036192,Foundation Preparatory
8,036193,Cypress Academy
12,300001,Pierre A. Capdau Charter School at Avery Alexa...
13,300002,Nelson Elementary School
14,300003,John F. Kennedy High School
20,367001,Edgar P. Harney Spirit of Excellence Academy
24,369005,ReNEW Accelerated High School
36,393001,Lafayette Academy


In [75]:
potential_sy21_new_schools = potential_sy21_new_schools[['sy21_SIS Submit Site Code','sy21_Site Name_x']]

In [76]:
potential_sy21_new_schools

Unnamed: 0,sy21_SIS Submit Site Code,sy21_Site Name_x
79,036021,Mary D. Coghill Elementary School
80,360003,New Orleans Accelerated High School
81,398009,John F. Kennedy High School
82,3C2001,Edward Hynes Charter School - Lakeview
83,3C2002,Edward Hynes Charter School - UNO
84,3C3001,Foundation Preparatory Academy
85,3C3002,Lafayette Academy Charter School
86,3C3003,Esperanza Charter School
87,WBW001,Living School
88,WBZ001,McDonogh 35 Senior High School


In [77]:
potential_openers_list = potential_sy21_new_schools['sy21_Site Name_x'].tolist()

In [78]:
#Use fuzzy matching since school names seem to differ slightly 

def fuzzy(col, potential_openers_list, score):
    fuzzy_words, score = process.extractOne(col, potential_openers_list, scorer=score)
    if score<75:
        return 'no_high_matches'
    else:
        return fuzzy_words,score
    
potential_sy21_closures['fuzzy_match'] = potential_sy21_closures['sy19_SiteName'].apply(fuzzy, potential_openers_list=potential_openers_list, score=fuzz.ratio)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  potential_sy21_closures['fuzzy_match'] = potential_sy21_closures['sy19_SiteName'].apply(fuzzy, potential_openers_list=potential_openers_list, score=fuzz.ratio)


In [79]:
potential_sy21_closures

Unnamed: 0,sy19_SiteCd,sy19_SiteName,fuzzy_match
1,036035,Warren Easton Senior High School,"(Warren Easton Charter High School, 86)"
2,036060,Edward Hynes Charter School,"(Edward Hynes Charter School - UNO, 90)"
7,036192,Foundation Preparatory,"(Foundation Preparatory Academy, 85)"
8,036193,Cypress Academy,no_high_matches
12,300001,Pierre A. Capdau Charter School at Avery Alexa...,"(Pierre A. Capdau Charter School, 77)"
13,300002,Nelson Elementary School,no_high_matches
14,300003,John F. Kennedy High School,"(John F. Kennedy High School, 100)"
20,367001,Edgar P. Harney Spirit of Excellence Academy,no_high_matches
24,369005,ReNEW Accelerated High School,"(New Orleans Accelerated High School, 84)"
36,393001,Lafayette Academy,no_high_matches


In [80]:
sy19_closures = potential_sy21_closures.loc[potential_sy21_closures['fuzzy_match'] == 'no_high_matches']
sy19_closure_list = dict(zip(sy19_closures.sy19_SiteCd, sy19_closures.sy19_SiteName))

In [81]:
sy19_closure_list

{'036193': 'Cypress Academy',
 '300002': 'Nelson Elementary School',
 '367001': 'Edgar P. Harney Spirit of Excellence Academy',
 '393001': 'Lafayette Academy',
 '395003': 'William J. Fischer Accelerated Academy',
 '395004': 'McDonogh #32 Literacy Charter School',
 '399003': 'Joseph S. Clark Preparatory High School',
 'W32001': 'Joseph A. Craig Charter School'}

In [82]:
len(sy19_closure_list)

8

Looks like 8 schools no longer enroll students in 2021. 

But fuzzy matching isn't perfect - the function didn't pick up on ReNEW Accelerated closing and turning into New Orleans Accelerated High, which brings the total schools that did not serve students in 2021 to **9**. It also should not include Lafayette Academy, which simply switched buildings. 

In [112]:
sy19_closure_list.update({'369005':'ReNEW Accelerated High School'})
del sy19_closure_list['393001']

In [113]:
sy19_closure_list

{'036193': 'Cypress Academy',
 '300002': 'Nelson Elementary School',
 '367001': 'Edgar P. Harney Spirit of Excellence Academy',
 '395003': 'William J. Fischer Accelerated Academy',
 '395004': 'McDonogh #32 Literacy Charter School',
 '399003': 'Joseph S. Clark Preparatory High School',
 'W32001': 'Joseph A. Craig Charter School',
 '369005': 'ReNEW Accelerated High School'}

In [85]:
code_changers = potential_sy21_closures.loc[potential_sy21_closures['fuzzy_match'] != 'no_high_matches']
code_change_list = dict(zip(code_changers.sy19_SiteCd, code_changers.sy19_SiteName))

In [86]:
code_change_list

{'036035': 'Warren Easton Senior High School',
 '036060': 'Edward Hynes Charter School',
 '036192': 'Foundation Preparatory',
 '300001': 'Pierre A. Capdau Charter School at Avery Alexander',
 '300003': 'John F. Kennedy High School',
 '369005': 'ReNEW Accelerated High School',
 '393002': 'Esperanza Charter School',
 '3A5001': 'Mary D. Coghill Charter School'}

I know ReNEW Accelerated High School in actuality closed, so that brings the list of code changers down to 7. I also know Lafayette Academy should be on this list, which the function also missed. 

In [116]:
code_change_list.update({'393001':'Lafayette Academy'})
del code_change_list['369005']

In [117]:
code_change_list

{'036035': 'Warren Easton Senior High School',
 '036060': 'Edward Hynes Charter School',
 '036192': 'Foundation Preparatory',
 '300001': 'Pierre A. Capdau Charter School at Avery Alexander',
 '300003': 'John F. Kennedy High School',
 '393002': 'Esperanza Charter School',
 '3A5001': 'Mary D. Coghill Charter School',
 '393001': 'Lafayette Academy'}

In [99]:
len(code_change_list)

7

In [100]:
potential_sy21_new_schools

Unnamed: 0,sy21_SIS Submit Site Code,sy21_Site Name_x
79,036021,Mary D. Coghill Elementary School
80,360003,New Orleans Accelerated High School
81,398009,John F. Kennedy High School
82,3C2001,Edward Hynes Charter School - Lakeview
83,3C2002,Edward Hynes Charter School - UNO
84,3C3001,Foundation Preparatory Academy
85,3C3002,Lafayette Academy Charter School
86,3C3003,Esperanza Charter School
87,WBW001,Living School
88,WBZ001,McDonogh 35 Senior High School


In [103]:
potential_openers_list

['Mary D. Coghill Elementary School',
 'New Orleans Accelerated High School',
 'John F. Kennedy High School',
 'Edward Hynes Charter School - Lakeview',
 'Edward Hynes Charter School - UNO',
 'Foundation Preparatory Academy',
 'Lafayette Academy Charter School',
 'Esperanza Charter School',
 'Living School',
 'McDonogh 35 Senior High School',
 'Opportunities Academy',
 'IDEA Oscar Dunn',
 'Warren Easton Charter High School',
 'Pierre A. Capdau Charter School']

In [104]:
potential_sy21_new_schools

Unnamed: 0,sy21_SIS Submit Site Code,sy21_Site Name_x
79,036021,Mary D. Coghill Elementary School
80,360003,New Orleans Accelerated High School
81,398009,John F. Kennedy High School
82,3C2001,Edward Hynes Charter School - Lakeview
83,3C2002,Edward Hynes Charter School - UNO
84,3C3001,Foundation Preparatory Academy
85,3C3002,Lafayette Academy Charter School
86,3C3003,Esperanza Charter School
87,WBW001,Living School
88,WBZ001,McDonogh 35 Senior High School


In [106]:
changers_list = list(code_change_list.values())

In [107]:
changers_list

['Warren Easton Senior High School',
 'Edward Hynes Charter School',
 'Foundation Preparatory',
 'Pierre A. Capdau Charter School at Avery Alexander',
 'John F. Kennedy High School',
 'Esperanza Charter School',
 'Mary D. Coghill Charter School']

In [109]:
#Repeat the process to find the new schools from 2021 which were not open in 2019 

def fuzzy2(col, changers_list, score):
    fuzzy_words, score = process.extractOne(col, changers_list, scorer=score)
    if score<75:
        return 'no_high_matches'
    else:
        return fuzzy_words,score
    
potential_sy21_new_schools['fuzzy_match'] = potential_sy21_new_schools['sy21_Site Name_x'].apply(fuzzy2, changers_list=changers_list, score=fuzz.ratio)



In [110]:
#If there is NOT a match, it means that school started enrolling students in 2021 
#Except for Lafayette Academy, which the funciton is missing
potential_sy21_new_schools

Unnamed: 0,sy21_SIS Submit Site Code,sy21_Site Name_x,fuzzy_match
79,036021,Mary D. Coghill Elementary School,"(Mary D. Coghill Charter School, 79)"
80,360003,New Orleans Accelerated High School,no_high_matches
81,398009,John F. Kennedy High School,"(John F. Kennedy High School, 100)"
82,3C2001,Edward Hynes Charter School - Lakeview,"(Edward Hynes Charter School, 83)"
83,3C2002,Edward Hynes Charter School - UNO,"(Edward Hynes Charter School, 90)"
84,3C3001,Foundation Preparatory Academy,"(Foundation Preparatory, 85)"
85,3C3002,Lafayette Academy Charter School,no_high_matches
86,3C3003,Esperanza Charter School,"(Esperanza Charter School, 100)"
87,WBW001,Living School,no_high_matches
88,WBZ001,McDonogh 35 Senior High School,no_high_matches


In [120]:
sy21_new_schools = potential_sy21_new_schools.loc[potential_sy21_new_schools['fuzzy_match'] == 'no_high_matches']

In [125]:
sy21_new_schools_list = sy21_new_schools.set_index('sy21_SIS Submit Site Code').to_dict()['sy21_Site Name_x']

In [126]:
sy21_new_schools_list

{'360003': 'New Orleans Accelerated High School',
 '3C3002': 'Lafayette Academy Charter School',
 'WBW001': 'Living School',
 'WBZ001': 'McDonogh 35 Senior High School',
 'WC2001': 'Opportunities Academy',
 'WC3001': 'IDEA Oscar Dunn'}

Question 5: I didn't individually Google all the schools whose school name was also their "School System Name" in the multistat files. I checked Lycee Francais and confirmed it to be a Type 2 charter, so I assumed all the others were Type 2 charters, but did not check each one. I would advise anyone using this code to review the "School System Name" lists and make sure that the table does not exclude schools they would like to include - especially if they are looking at schools outside of Orleans Parish. I am also unsure how fluid this Type 2 category is (do schools move between Type 1 and Type 2 from year to year?), so colleagues should exercise caution and double-check the status of the schools they are interested in to ensure that they aren't leaving out relevant data. 
