# Analysis of CRDC referral and arrest data

In [2]:
import numpy as np
import pandas as pd
from src.transform_crdc_data import INDEX_COLS


In [3]:
enr = pd.read_csv("input/enrollment.csv", low_memory=False)
ref = pd.read_csv("input/referrals.csv", low_memory=False)
arr = pd.read_csv("input/arrests.csv", low_memory=False)
cha = pd.read_csv("input/characteristics.csv", low_memory=False)


## United States 

### By disability status

In [64]:
(
    enr.groupby("variable")
    .enrollment.sum()
    .to_frame("enrollment")
    # add a "without_disabilities" value and remove overall enrollment
    .transpose()
    .assign(without_disabilities=lambda df: df.overall - (df.idea + df.section_504))
    .transpose()
    .drop("overall")
    .join(ref.groupby("variable").referrals.sum().to_frame("referrals"))
    .join(arr.groupby("variable").arrests.sum().to_frame("arrests"))
    .assign(referral_rate=lambda df: df.referrals / df.enrollment)
    .assign(arrest_rate=lambda df: df.arrests / df.enrollment)
)


Unnamed: 0_level_0,enrollment,referrals,arrests,referral_rate,arrest_rate
variable,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
idea,6728064.0,59951.0,13996.0,0.008911,0.00208
section_504,1380146.0,8167.0,2021.0,0.005917,0.001464
without_disabilities,42814191.0,161352.0,37784.0,0.003769,0.000883


### By race

In [40]:
(
    # use only "overall" from enrollment because overall category counts
    # kids with disabilites already
    enr.query("variable == 'overall'")
    .groupby("race")
    .enrollment.sum()
    .to_frame("enrollment")
    .join(ref.groupby("race").referrals.sum().to_frame("referrals"))
    .join(arr.groupby("race").arrests.sum().to_frame("arrests"))
    .assign(referral_rate=lambda df: df.referrals / df.enrollment)
    .assign(arrest_rate=lambda df: df.arrests / df.enrollment)
)


Unnamed: 0_level_0,enrollment,referrals,arrests,referral_rate,arrest_rate
race,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
american indian/alaskan native,502471.0,3703.0,842.0,0.00737,0.001676
asian,2626109.0,3516.0,507.0,0.001339,0.000193
black,7696714.0,63533.0,16528.0,0.008255,0.002147
hispanic,13862483.0,56888.0,13778.0,0.004104,0.000994
native hawaiian/pacific islander,193424.0,876.0,213.0,0.004529,0.001101
two or more races,1944875.0,9045.0,1955.0,0.004651,0.001005
white,24096325.0,83742.0,17957.0,0.003475,0.000745


### By sex and race

In [79]:
(
    enr.query("variable == 'overall'")
    .groupby(["race", "sex"])
    .enrollment.sum()
    .to_frame("enrollment")
    .join(ref.groupby(["race", "sex"]).referrals.sum().to_frame("referrals"))
    .join(arr.groupby(["race", "sex"]).arrests.sum().to_frame("arrests"))
    .assign(referral_rate=lambda df: df.referrals / df.enrollment)
    .assign(arrest_rate=lambda df: df.arrests / df.enrollment)
)


Unnamed: 0_level_0,Unnamed: 1_level_0,enrollment,referrals,arrests,referral_rate,arrest_rate
race,sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
american indian/alaskan native,female,245129.0,1279.0,291.0,0.005218,0.001187
american indian/alaskan native,male,257342.0,2424.0,551.0,0.009419,0.002141
asian,female,1281702.0,925.0,139.0,0.000722,0.000108
asian,male,1344407.0,2591.0,368.0,0.001927,0.000274
black,female,3763447.0,21998.0,5772.0,0.005845,0.001534
black,male,3933267.0,41535.0,10756.0,0.01056,0.002735
hispanic,female,6763088.0,17714.0,4149.0,0.002619,0.000613
hispanic,male,7099395.0,39174.0,9629.0,0.005518,0.001356
native hawaiian/pacific islander,female,93838.0,266.0,59.0,0.002835,0.000629
native hawaiian/pacific islander,male,99586.0,610.0,154.0,0.006125,0.001546
