In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns
from nycschools import schools, geo, segregation as seg
import warnings
warnings.filterwarnings('ignore')

In [2]:
# get long data for all schools, all years
demos = schools.load_school_demographics()
groups = [ 'asian_n', 'black_n', 'hispanic_n', 'multi_racial_n', 'native_american_n', 'white_n', 'missing_race_ethnicity_data_n', ]
index = ['dbn', 'ay', 'district']
data = demos[index + groups]
data = pd.melt(demos, id_vars=index, value_vars=groups, var_name='g', value_name='n')
data


Unnamed: 0,dbn,ay,district,g,n
0,01M015,2005,1,asian_n,10
1,01M015,2006,1,asian_n,18
2,01M015,2007,1,asian_n,16
3,01M015,2008,1,asian_n,16
4,01M015,2009,1,asian_n,16
...,...,...,...,...,...
198403,84X730,2018,84,missing_race_ethnicity_data_n,0
198404,84X730,2019,84,missing_race_ethnicity_data_n,0
198405,84X730,2020,84,missing_race_ethnicity_data_n,1
198406,84X730,2021,84,missing_race_ethnicity_data_n,2


In [3]:
def calc_M(district):
    """Calculate a district's mutual information index for each academic year"""
    x = data[data.district == district].copy()
    x.rename(columns={'dbn': 'u'}, inplace=True)

    x = x[x.n > 0]
    t = [[year] + list(seg.mutual_information(x[x.ay == year])) for year in x.ay.unique()]
    result = pd.DataFrame(columns=['ay', 'M', 'H'], data=t)
    result = result[["ay","M"]]
    result["district"] = district
    result = result.sort_values(by='ay')

    return result


m_data = pd.concat([calc_M(district) for district in data.district.unique()])
all_schools = data.rename(columns={'dbn': 'u'})
all_schools = all_schools[all_schools.n > 0]
all_schools = all_schools[all_schools.ay == 2022]
M, H = seg.mutual_information(all_schools)
M, m_data[m_data.ay == 2022].M.mean()

(0.4077531081698128, 0.17255912753825645)

In [4]:
bk = demos[demos.boro == 'Brooklyn']["district"].unique()
bk = sorted([x for x in bk if x <33])

graph_data = m_data[m_data.district.isin(bk)]
graph_data.sort_values(by=["district", "ay"], inplace=True)


fig = px.line(graph_data, x='ay', y='M', color='district', markers=True,
              labels={
                  "ay": "Academic Year",
                  "M": "M Index"
              },
              title="Thiel's M Index, Brooklyn Schools")

fig.update_traces(mode="markers+lines", hoverinfo="name+y+x")

fig.update_layout(
    hovermode="closest",
    plot_bgcolor="white",
    legend_title_text='District',
    xaxis=dict(title='Academic Year'),
    yaxis=dict(title='M Index'),
    legend=dict(
        title_font_size=16,
        font_size=12,
        yanchor="top",
        y=0.99,
        xanchor="left",
        x=0.01
    )
)

fig.show()

In [5]:
demos["black_hispanic_n"] = demos["black_n"] + demos["hispanic_n"]
demos["black_hispanic_pct"] = demos["black_hispanic_n"] / demos["total_enrollment"]
bar_data = demos[(demos.district.isin(bk)) & (demos.ay.isin([2005, 2022]))]
bar_data = bar_data[["district", "ay", "black_hispanic_pct"]]
bar_data = bar_data.groupby(["district", "ay"]).mean().reset_index()

bar_data['black_hispanic_pct'] *= 100
bar_data = bar_data.merge(m_data, on=["district", "ay"], how="inner")



bar_data.district = bar_data.district.astype(str)
bar_data.ay = bar_data.ay.astype(str)   
bar_data['M_label'] = bar_data['M'].apply(lambda x: f'M ({x:.2f})')

fig = px.bar(bar_data, x='district', y='black_hispanic_pct', color='ay', barmode='group',
             labels={'district': 'District',
                     'black_hispanic_pct': '% Black/Hispanic', 'ay': 'Academic Year'},
             title='Percentage of Black and Hispanic Students in Brooklyn School Districts (2005, 2022)',
             text='M_label',
             hover_data=["M", "ay"])


fig.update_traces( hovertemplate='District: %{x}<br>AY: %{customdata[1]}<br>Percentage: %{y:.2f}%<br>M Index: %{customdata[0]:.2f}')

fig.update_layout(barmode='group', 
                  xaxis_title="District",
                  yaxis_title="Precent Black/Hispanic")

fig.show()

In [9]:
from nycschools.dataloader import load
x = load("neighborhood-names.geojson")
x

Unnamed: 0,stacked,name,annoline1,annoline3,objectid,annoangle,annoline2,borough,geometry
0,1,Wakefield,Wakefield,,1,0.0,,Bronx,POINT (-73.84720 40.89471)
1,2,Co-op City,Co-op,,2,0.0,City,Bronx,POINT (-73.82994 40.87429)
2,1,Eastchester,Eastchester,,3,0.0,,Bronx,POINT (-73.82781 40.88756)
3,1,Fieldston,Fieldston,,4,0.0,,Bronx,POINT (-73.90564 40.89544)
4,1,Riverdale,Riverdale,,5,0.0,,Bronx,POINT (-73.91259 40.89083)
...,...,...,...,...,...,...,...,...,...
294,2,Lighthouse Hill,Lighthouse,,295,0.0,Hill,Staten Island,POINT (-74.13793 40.57651)
295,2,Richmond Valley,Richmond,,296,0.0,Valley,Staten Island,POINT (-74.22957 40.51954)
296,1,Malba,Malba,,297,0.0,,Queens,POINT (-73.82668 40.79060)
297,2,Highland Park,Highland,,298,0.0,Park,Brooklyn,POINT (-73.89028 40.68249)
