In [1]:
# load the demographic data
import pandas as pd
url = "https://data.cityofnewyork.us/resource/vmmu-wj3w.csv?$limit=1000000"
df = pd.read_csv(url)


In [2]:
# add district and boro info
boros = {"K":"Brooklyn", "X":"Bronx", "M": "Manhattan", "Q": "Queens", "R": "Staten Island"}

def district(dbn): return int(dbn[:2])
def boro(dbn): return boros[dbn[2]]

df["district"] = df["dbn"].apply(district)
df["boro"] = df["dbn"].apply(boro)

# just the most recent year
df = df[ df["year"] == "2020-21" ]



In [3]:
def str_pct(row):
    pct = row["poverty_1"][:-1]
    try:
        pct = float(pct) / 100
    except:
        if "Above" in pct:
            pct = row["total_enrollment"] * .96 / row["total_enrollment"]
        elif "Below" in pct:
            pct = row["total_enrollment"] * .04 / row["total_enrollment"]
    return float(pct)

df["poverty_1"] = df.apply(str_pct, axis = 1)

df["poverty_1"]

4       0.819
9       0.712
14      0.709
19      0.960
24      0.769
        ...  
9148    0.960
9153    0.832
9158    0.878
9163    0.819
9168    0.864
Name: poverty_1, Length: 1878, dtype: float64

In [4]:
# the columns we want to look at
cols = ['dbn', 
        'district',
        'boro',
        'school_name', 
        'total_enrollment',
        'female_1',
        'male_1',
        'asian_1', 
        'black_1', 
        'hispanic_1', 
        'multi_racial_1', 
        'native_american_1', 
        'white_1', 
        'students_with_disabilities_1', 
        'english_language_learners_1',  
        'poverty_1']

df[cols]
# df["boro"]

Unnamed: 0,dbn,district,boro,school_name,total_enrollment,female_1,male_1,asian_1,black_1,hispanic_1,multi_racial_1,native_american_1,white_1,students_with_disabilities_1,english_language_learners_1,poverty_1
4,01M015,1,Manhattan,P.S. 015 Roberto Clemente,193,0.523000,0.477000,0.135000,0.275000,0.528000,0.005000,0.000000,0.057000,0.223000,0.109,0.819
9,01M019,1,Manhattan,P.S. 019 Asher Levy,212,0.467000,0.533000,0.061000,0.193000,0.613000,0.033000,0.005000,0.080000,0.392000,0.042,0.712
14,01M020,1,Manhattan,P.S. 020 Anna Silver,412,0.488000,0.512000,0.248000,0.133000,0.522000,0.015000,0.007000,0.073000,0.218000,0.119,0.709
19,01M034,1,Manhattan,P.S. 034 Franklin D. Roosevelt,273,0.462000,0.538000,0.026000,0.381000,0.557000,0.004000,0.004000,0.029000,0.392000,0.062,0.960
24,01M063,1,Manhattan,The STAR Academy - P.S.63,208,0.476000,0.524000,0.029000,0.192000,0.635000,0.034000,0.005000,0.091000,0.279000,0.014,0.769
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9148,84X705,84,Bronx,Family Life Academy Charter School,416,0.548077,0.451923,0.007212,0.211538,0.776442,0.000000,0.002404,0.000000,0.100962,0.197,0.960
9153,84X706,84,Bronx,Harriet Tubman Charter School,647,0.514683,0.485317,0.000000,0.616692,0.358578,0.006182,0.012365,0.006182,0.097372,0.110,0.832
9158,84X717,84,Bronx,Icahn Charter School,328,0.512195,0.487805,0.012195,0.500000,0.478659,0.000000,0.009146,0.000000,0.070122,0.076,0.878
9163,84X718,84,Bronx,Bronx Charter School for Better Learning,570,0.522807,0.477193,0.008772,0.845614,0.100000,0.015789,0.014035,0.014035,0.108772,0.012,0.819


In [5]:
public = df[df["district"] < 75]
charter = df[df["district"] == 84]


In [6]:
p_schools = df[ df["poverty_1"] > .2] 

In [7]:
print("charter", charter["total_enrollment"].sum())
print("public", public["total_enrollment"].sum())

charter 138648
public 877775


In [17]:
cols = [
        'total_enrollment',
        'female_1',
        'male_1',
        'asian_1', 
        'black_1', 
        'hispanic_1', 
        'multi_racial_1', 
        'native_american_1', 
        'white_1', 
        'students_with_disabilities_1', 
        'english_language_learners_1',  
        'poverty_1']

charter = charter[cols]
print("Charter School Aggregates")
charter = charter.agg(["mean"]).transpose()

Charter School Aggregates


In [18]:
public = public[cols]
print("Public School Aggregates")
public = public.agg(["mean"]).transpose()

Public School Aggregates


In [21]:
combined = pd.DataFrame({"public": public["mean"], "charter": charter["mean"]})
combined


Unnamed: 0,public,charter
total_enrollment,569.2444876783399,519.2808988764045
female_1,0.4878677042801556,0.5007736275763102
male_1,0.5121335927367056,0.4992263725911186
asian_1,0.1325888456549935,0.0315420778284253
black_1,0.2704267185473411,0.469826465312238
hispanic_1,0.4388800259403372,0.4238911089220743
multi_racial_1,0.0135103761348897,0.0115861604398932
native_american_1,0.0120395590142671,0.0110433270725341
white_1,0.1284494163424124,0.0481807029857178
students_with_disabilities_1,0.2135460440985732,0.1942850116256917
