In [17]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestRegressor

In [18]:
numbers = pd.read_csv('/Users/flatironschool/Absenteeism_Project/data/processed/combo_cleaned.csv')


In [19]:
numbers.tail()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,STNAM,LEANM,NCESSCH,SCHNAM,ALL_COHORT_1516,ALL_RATE_1516,LEA_STATE,LEA_STATE_NAME,...,sports_part,sports_rate,total_suspension_days,suspensed_day_rate,harassed,harassed_rate,activities_funds_rate,non_cert_rate,counselor_rate,absent_teacher_rate
21862,21862,21862,WYOMING,Washakie County School District #2,560582000393,Ten Sleep K-12,10,GE50,WY,WYOMING,...,31.0,0.281818,0.0,0.0,0.0,0.0,4407.512636,0.0,0.009091,0.0
21863,21863,21863,WYOMING,Teton County School District #1,560583000335,Jackson Hole High School,127,GE95,WY,WYOMING,...,355.0,0.554688,26.0,0.040625,0.0,0.0,4153.518984,0.0,0.004687,0.118939
21864,21864,21864,WYOMING,Teton County School District #1,560583000512,Summit High School,18,GE80,WY,WYOMING,...,,,10.0,0.185185,0.0,0.0,5469.872593,0.0,0.0,0.091491
21865,21865,21865,WYOMING,Weston County School District #7,560609000401,Upton High School,21,GE80,WY,WYOMING,...,85.0,0.85,0.0,0.0,0.0,0.0,6875.37,0.0,0.0,0.162162
21866,21866,21866,WYOMING,Washakie County School District #1,560624000343,Worland High School,105,75-79,WY,WYOMING,...,,,10.0,0.025,0.0,0.0,3015.755325,0.0,0.004275,0.331544


## Clean up graduation rates and add grad rate bins

In [20]:
#need to keep original reported grade and need column to modify and clean data
numbers['grad_slice'] = numbers['ALL_RATE_1516']

In [23]:
#remove "GE" and "LE" from ranges
numbers['grad_slice'].replace(['GE99'], '+100', inplace=True) # need to replace with 
numbers['grad_slice'].replace(['GE95'], '95', inplace=True)
numbers['grad_slice'].replace(['GE90'], '90', inplace=True)
numbers['grad_slice'].replace(['LE10'], '10', inplace=True)
numbers['grad_slice'].replace(['LE1'], '1', inplace=True)
numbers['grad_slice'].replace(['LE5'], '05', inplace=True)


In [22]:
#smallest range needs to be dealt with, has one digit before '-'
numbers['grad_slice'].replace(['6-9'], '6', inplace=True)

In [24]:
#take first two digits of rates to get rid of ranges
numbers['grad_slice'] = numbers['grad_slice'].str[:2]

In [26]:
#fix 100
numbers['grad_slice'].replace(['+1'], '100', inplace=True)

In [27]:
numbers.reset_index(inplace=True)

In [35]:
#get rid of very small schools
grad_num = numbers[numbers['ALL_COHORT_1516'] >= 31]

In [38]:
#create the binned categories
grad_num['grad_rate_bin'] = pd.cut(grad_num['grad_slice'].astype(int), [0, 59, 79, 89, 99, 100],
      labels = ['0-59%', '60-79%', '80-89%', '90-99%', '100%'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [40]:
grad_num.tail()

Unnamed: 0.2,index,Unnamed: 0,Unnamed: 0.1,STNAM,LEANM,NCESSCH,SCHNAM,ALL_COHORT_1516,ALL_RATE_1516,LEA_STATE,...,total_suspension_days,suspensed_day_rate,harassed,harassed_rate,activities_funds_rate,non_cert_rate,counselor_rate,absent_teacher_rate,grad_slice,grad_rate_bin
21854,21854,21854,21854,WYOMING,Sheridan County School District #1,560569000311,Big Horn High School,37,GE90,WY,...,2.0,0.013245,4.0,0.02649,1738.913907,0.0,0.006623,0.846154,90,90-99%
21858,21858,21858,21858,WYOMING,Sheridan County School District #2,560569500360,Sheridan High School,236,89,WY,...,46.0,0.046796,0.0,0.0,1694.559176,0.0,0.005086,0.198779,89,80-89%
21861,21861,21861,21861,WYOMING,Sweetwater County School District #2,560576200324,Green River High School,176,85-89,WY,...,110.0,0.14157,1.0,0.001287,1840.87749,0.0,0.003861,0.2,85,80-89%
21863,21863,21863,21863,WYOMING,Teton County School District #1,560583000335,Jackson Hole High School,127,GE95,WY,...,26.0,0.040625,0.0,0.0,4153.518984,0.0,0.004687,0.118939,95,90-99%
21866,21866,21866,21866,WYOMING,Washakie County School District #1,560624000343,Worland High School,105,75-79,WY,...,10.0,0.025,0.0,0.0,3015.755325,0.0,0.004275,0.331544,75,60-79%


## Create Level Up Bins 

In [41]:
#level up bins
#create the binned categories
grad_num['level_up_bins'] = pd.cut(grad_num['grad_slice'].astype(int), 
    [0, 59, 79, 89, 99, 100], labels = ['60-79% Level Up Rate', '80-89% Level Up Rate', '90-99% Level Up Rate', '100% Level Up Rate', '100% Top Rate'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


In [None]:
numbers.tail()

In [42]:
grad_num['level_up_chronic_absent_a'] = grad_num[grad_num['grad_rate_bin']=='60-79% Level Up Rate'].chronic_absent_rate.quantile(.25, axis=1)
grad_num['level_up_chronic_absent_b'] = grad_num[grad_num['grad_rate_bin']=='60-79% Level Up Rate'].chronic_absent_rate.quantile(.75, axis=1)
grad_num['level_up_chronic_absent_a'] = grad_num[grad_num['grad_rate_bin']=='60-79% Level Up Rate'].chronic_absent_rate.quantile(.25, axis=1)
grad_num['level_up_chronic_absent_b'] = grad_num[grad_num['grad_rate_bin']=='60-79% Level Up Rate'].chronic_absent_rate.quantile(.75, axis=1)
grad_num['level_up_chronic_absent_a'] = grad_num[grad_num['grad_rate_bin']=='60-79% Level Up Rate'].chronic_absent_rate.quantile(.25, axis=1)
grad_num['level_up_chronic_absent_b'] = grad_num[grad_num['grad_rate_bin']=='60-79% Level Up Rate'].chronic_absent_rate.quantile(.75, axis=1)
grad_num['level_up_chronic_absent_a'] = grad_num[grad_num['grad_rate_bin']=='60-79% Level Up Rate'].chronic_absent_rate.quantile(.25, axis=1)
grad_num['level_up_chronic_absent_b'] = grad_num[grad_num['grad_rate_bin']=='60-79% Level Up Rate'].chronic_absent_rate.quantile(.75, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See

In [44]:
grad_num.head()

Unnamed: 0.2,index,Unnamed: 0,Unnamed: 0.1,STNAM,LEANM,NCESSCH,SCHNAM,ALL_COHORT_1516,ALL_RATE_1516,LEA_STATE,...,harassed_rate,activities_funds_rate,non_cert_rate,counselor_rate,absent_teacher_rate,grad_slice,grad_rate_bin,level_up_bins,level_up_chronic_absent_a,level_up_chronic_absent_b
0,0,0,0,ALABAMA,Albertville City,10000500871,Albertville High Sch,296,92,AL,...,0.0,2811.937359,0.0,0.003475,0.378788,92,90-99%,100% Level Up Rate,,
1,1,1,1,ALABAMA,Marshall County,10000600872,Asbury Sch,67,GE95,AL,...,0.003711,4825.189777,0.0,0.002783,0.1,95,90-99%,100% Level Up Rate,,
2,2,2,2,ALABAMA,Marshall County,10000600878,Douglas High Sch,153,85-89,AL,...,0.008418,5317.932795,0.0,0.001684,0.105263,85,80-89%,90-99% Level Up Rate,,
3,3,3,3,ALABAMA,Marshall County,10000600883,Kate D Smith DAR High Sch,120,80-84,AL,...,0.0,5909.375686,0.0,0.002179,0.068966,80,80-89%,90-99% Level Up Rate,,
4,4,4,4,ALABAMA,Marshall County,10000601585,Brindlee Mt High Sch,94,85-89,AL,...,0.0,3962.305785,0.0,0.002755,0.10101,85,80-89%,90-99% Level Up Rate,,


In [None]:
for each_school in rate_bin:
    if numbers['level_up_bins'] == '90%+ Level Up Rate'
        numbers['level_up_chronic_absent_a'] = numbers['ALL_RATE_1516'].quantile(.25)
        nubmers['level_up_chronic_absent_b'] = numbers['ALL_RATE_1516'].quantile(.75)
    if numbers['level_up_bins'] == '80-89%+ Level Up Rate']
        numbers['level_up_chronic_absent_a'] = numbers['ALL_RATE_1516'].quantile(.25)
        nubmers['level_up_chronic_absent_b'] = numbers['ALL_RATE_1516'].quantile(.75)
    if numbers['level_up_bins'] == '60-79%+ Level Up Rate']
        numbers['level_up_chronic_absent_a'] = numbers['ALL_RATE_1516'].quantile(.25)
        nubmers['level_up_chronic_absent_b'] = numbers['ALL_RATE_1516'].quantile(.75)
        

In [None]:
grad_90 = simple[simple['four_rate_bins'] == '90%+']
grad_80 = simple[simple['four_rate_bins'] == '80-89%']
grad_70 = simple[simple['four_rate_bins'] == '60-79%']
grad_50 = simple[simple['four_rate_bins'] == '0-59%']

In [None]:
grad_90.head()

In [None]:
plt.figure(figsize=(15,8))
plt.hist(grad_90['chronic_absent_rate'], color = 'blue', label = '90%+ Graduation Rate')
plt.hist(grad_80['chronic_absent_rate'], color = 'red', label = '80-89% Graduation Rate')
plt.hist(grad_70['chronic_absent_rate'], color = 'purple', label = '60-79% Graduation Rate')
plt.legend()

In [None]:
plt.figure(figsize=(15,8))
plt.hist(grad_90['ap_ib_de_rate'], color = 'blue', label = '90%+ Graduation Rate')
plt.hist(grad_80['ap_ib_de_rate'], color = 'red', label = '80-89% Graduation Rate')
plt.hist(grad_70['ap_ib_de_rate'], color = 'purple', label = '60-79% Graduation Rate')
plt.legend()

In [None]:
plt.figure(figsize=(15,8))
plt.hist(grad_90['sat_act_rate'], color = 'blue', label = '90%+ Graduation Rate')
plt.hist(grad_80['sat_act_rate'], color = 'red', label = '80-89% Graduation Rate')
plt.hist(grad_70['sat_act_rate'], color = 'purple', label = '60-79% Graduation Rate')
plt.legend()

In [None]:
plt.figure(figsize=(15,8))
plt.hist(grad_90['pass_algebra_rate'], color = 'blue', label = '90%+ Graduation Rate')
plt.hist(grad_80['pass_algebra_rate'], color = 'red', label = '80-89% Graduation Rate')
plt.hist(grad_70['pass_algebra_rate'], color = 'purple', label = '60-79% Graduation Rate')
plt.legend()

In [None]:
plt.figure(figsize=(15,8))
plt.hist(grad_90['geometry_rate'], color = 'blue', label = '90%+ Graduation Rate')
plt.hist(grad_80['geometry_rate'], color = 'red', label = '80-89% Graduation Rate')
plt.hist(grad_70['geometry_rate'], color = 'purple', label = '60-79% Graduation Rate')
plt.legend()

In [None]:
plt.figure(figsize=(15,8))
plt.hist(grad_90['algebra2_rate'], color = 'blue', label = '90%+ Graduation Rate')
plt.hist(grad_80['algebra2_rate'], color = 'red', label = '80-89% Graduation Rate')
plt.hist(grad_70['algebra2_rate'], color = 'purple', label = '60-79% Graduation Rate')
plt.legend()

In [None]:
plt.figure(figsize=(15,8))
plt.hist(grad_90['calc_rate'], color = 'blue', label = '90%+ Graduation Rate')
plt.hist(grad_80['calc_rate'], color = 'red', label = '80-89% Graduation Rate')
plt.hist(grad_70['calc_rate'], color = 'purple', label = '60-79% Graduation Rate')
plt.legend()

In [None]:
plt.figure(figsize=(15,8))
plt.hist(grad_90['sports_rate'], color = 'blue', label = '90%+ Graduation Rate')
plt.hist(grad_80['sports_rate'], color = 'red', label = '80-89% Graduation Rate')
plt.hist(grad_70['sports_rate'], color = 'purple', label = '60-79% Graduation Rate')
plt.legend()

In [None]:
plt.figure(figsize=(15,8))
plt.hist(grad_90['suspensed_day_rate'], color = 'blue', label = '90%+ Graduation Rate')
plt.hist(grad_80['suspensed_day_rate'], color = 'red', label = '80-89% Graduation Rate')
plt.hist(grad_70['suspensed_day_rate'], color = 'purple', label = '60-79% Graduation Rate')
plt.legend()

In [None]:
plt.figure(figsize=(15,8))
plt.hist(grad_90['harassed_rate'], color = 'blue', label = '90%+ Graduation Rate')
plt.hist(grad_80['harassed_rate'], color = 'red', label = '80-89% Graduation Rate')
plt.hist(grad_70['harassed_rate'], color = 'purple', label = '60-79% Graduation Rate')
plt.legend()

In [None]:
plt.figure(figsize=(15,8))
plt.hist(grad_90['non_cert_rate'], color = 'blue', label = '90%+ Graduation Rate')
plt.hist(grad_80['non_cert_rate'], color = 'red', label = '80-89% Graduation Rate')
plt.hist(grad_70['non_cert_rate'], color = 'purple', label = '60-79% Graduation Rate')
plt.legend()

In [None]:
plt.figure(figsize=(15,8))
plt.hist(grad_90['counselor_rate'], color = 'blue', label = '90%+ Graduation Rate')
plt.hist(grad_80['counselor_rate'], color = 'red', label = '80-89% Graduation Rate')
plt.hist(grad_70['counselor_rate'], color = 'purple', label = '60-79% Graduation Rate')
plt.legend()

In [None]:
plt.figure(figsize=(15,8))
plt.hist(grad_90['absent_teacher_rate'], color = 'blue', label = '90%+ Graduation Rate')
plt.hist(grad_80['absent_teacher_rate'], color = 'red', label = '80-89% Graduation Rate')
plt.hist(grad_70['absent_teacher_rate'], color = 'purple', label = '60-79% Graduation Rate')
plt.legend()

In [None]:
sns.catplot(x="four_rate_bins", y="chronic_absent_rate", kind="box", data=simple);

In [None]:
sns.boxplot(x=grad_50['chronic_absent_rate'],palette="Set3")
sns.boxplot(x=grad_70['chronic_absent_rate'],palette="Set3")
sns.boxplot(x=grad_80['chronic_absent_rate'],palette="Set3")
sns.boxplot(x=grad_90['chronic_absent_rate'],palette="Set3")

In [None]:
simple.chronic_absent_rate.quantile([0.25,0.5,0.75])


In [None]:
np.nanpercentile(grad_90.chronic_absent_rate, (25, 50, 75), axis=0)

In [None]:
#great metric showing steady change with graduation rates
print('90%+ Graduate Chronic Absenteeism Rate (25th, 50th, 75th percentile):\n',grad_90.chronic_absent_rate.quantile([.25, .50, .75]))
print('80%+ Graduate Chronic Absenteeism Rate (25th, 50th, 75th percentile):\n',grad_80.chronic_absent_rate.quantile([0.25,0.5,0.75]))
print('60%+ Graduate Chronic Absenteeism Rate (25th, 50th, 75th percentile):\n',grad_70.chronic_absent_rate.quantile([0.25,0.5,0.75]))
print('50%+ Graduate Chronic Absenteeism Rate (25th, 50th, 75th percentile):\n',grad_50.chronic_absent_rate.quantile([0.25,0.5,0.75]))

In [None]:
#great metric as it shows steady change with graduation rate
print('90%+ Graduate Sports Participation Rate (25th, 50th, 75th percentile):\n',grad_90.sports_rate.quantile([.25, .50, .75]))
print('80%+ Graduate Sports Participation Rate (25th, 50th, 75th percentile):\n',grad_80.sports_rate.quantile([.25, .50, .75]))
print('60%+ Graduate Sports Participation Rate (25th, 50th, 75th percentile):\n',grad_70.sports_rate.quantile([.25, .50, .75]))
print('50%+ Graduate Sports Participation Rate (25th, 50th, 75th percentile):\n',grad_50.sports_rate.quantile([.25, .50, .75]))

In [None]:
sns.catplot(x="four_rate_bins", y="sports_rate", kind="box", data=simple);

In [None]:
#Useful metric
print('90%+ Graduate AP/IB/DE Rate (25th, 50th, 75th percentile):\n',grad_90.ap_ib_de_rate.quantile([.25, .50, .75]))
print('80%+ Graduate AP/IB/DE Rate (25th, 50th, 75th percentile):\n',grad_80.ap_ib_de_rate.quantile([.25, .50, .75]))
print('60%+ Graduate AP/IB/DE Rate (25th, 50th, 75th percentile):\n',grad_70.ap_ib_de_rate.quantile([.25, .50, .75]))
print('50%+ Graduate AP/IB/DE Rate (25th, 50th, 75th percentile):\n',grad_50.ap_ib_de_rate.quantile([.25, .50, .75]))

In [None]:
sns.catplot(x="four_rate_bins", y="ap_ib_de_rate", kind="box", data=simple);

In [None]:
#useful metric for most categories
print('90%+ Graduate Teacher Absenteeism Rate (25th, 50th, 75th percentile):\n',grad_90.absent_teacher_rate.quantile([.25, .50, .75]))
print('80%+ Graduate Teacher Absenteeism Rate (25th, 50th, 75th percentile):\n',grad_80.absent_teacher_rate.quantile([.25, .50, .75]))
print('60%+ Graduate Teacher Absenteeism Rate (25th, 50th, 75th percentile):\n',grad_70.absent_teacher_rate.quantile([.25, .50, .75]))
print('50%+ Graduate Teacher Absenteeism Rate (25th, 50th, 75th percentile):\n',grad_50.absent_teacher_rate.quantile([.25, .50, .75]))

In [None]:
sns.catplot(x="four_rate_bins", y="absent_teacher_rate", kind="box", data=simple);

In [None]:
#No real difference for three categories.
print('90%+ Graduate Counselor Rate (25th, 50th, 75th percentile):\n',grad_90.counselor_rate.quantile([.25, .50, .75]))
print('80%+ Graduate Counselor Rate (25th, 50th, 75th percentile):\n',grad_80.counselor_rate.quantile([.25, .50, .75]))
print('60%+ Graduate counselor Rate (25th, 50th, 75th percentile):\n',grad_70.counselor_rate.quantile([.25, .50, .75]))
print('50%+ Graduate Counselor Rate (25th, 50th, 75th percentile):\n',grad_50.counselor_rate.quantile([.25, .50, .75]))

In [None]:
#Useful metric for 60, 80 and 90 categories
print('90%+ Graduate Non-Certified Teacher Rate (25th, 50th, 75th percentile):\n',grad_90.non_cert_rate.quantile([.25, .50, .75]))
print('80%+ Graduate Non-Certified Teacher Rate (25th, 50th, 75th percentile):\n',grad_80.non_cert_rate.quantile([.25, .50, .75]))
print('60%+ Graduate Non-Certified Teacher Rate (25th, 50th, 75th percentile):\n',grad_70.non_cert_rate.quantile([.25, .50, .75]))
print('50%+ Graduate Non-Certified Teacher Rate (25th, 50th, 75th percentile):\n',grad_50.non_cert_rate.quantile([.25, .50, .75]))

In [None]:
sns.catplot(x="four_rate_bins", y="non_cert_rate", kind="box", data=simple);

In [None]:
#similar rates for all categories
print('90%+ Graduate Geometry Rate (25th, 50th, 75th percentile):\n',grad_90.geometry_rate.quantile([.25, .50, .75]))
print('80%+ Graduate Geometry Rate (25th, 50th, 75th percentile):\n',grad_80.geometry_rate.quantile([.25, .50, .75]))
print('60%+ Graduate Geometry Rate (25th, 50th, 75th percentile):\n',grad_70.geometry_rate.quantile([.25, .50, .75]))
print('50%+ Graduate Geometry Rate (25th, 50th, 75th percentile):\n',grad_50.geometry_rate.quantile([.25, .50, .75]))

In [None]:
#nothing
print('90%+ Graduate Harassment & Bullying Rate (25th, 50th, 75th percentile):\n',grad_90.harassed_rate.quantile([.25, .50, .75]))
print('80%+ Graduate Harassment & Bullying Rate (25th, 50th, 75th percentile):\n',grad_80.harassed_rate.quantile([.25, .50, .75]))
print('60%+ Graduate Harassment & Bullying Rate (25th, 50th, 75th percentile):\n',grad_70.harassed_rate.quantile([.25, .50, .75]))
print('50%+ Graduate Harassment & Bullying Rate (25th, 50th, 75th percentile):\n',grad_50.harassed_rate.quantile([.25, .50, .75]))

In [None]:
#Could be a good metric. 
print('90%+ Graduate Suspension Day Rate (25th, 50th, 75th percentile):\n',grad_90.suspensed_day_rate.quantile([.25, .50, .75]))
print('80%+ Graduate Suspension Day Rate (25th, 50th, 75th percentile):\n',grad_80.suspensed_day_rate.quantile([.25, .50, .75]))
print('60%+ Graduate Suspension Day Rate (25th, 50th, 75th percentile):\n',grad_70.suspensed_day_rate.quantile([.25, .50, .75]))
print('50%+ Graduate Suspension Day Rate (25th, 50th, 75th percentile):\n',grad_50.suspensed_day_rate.quantile([.25, .50, .75]))

In [None]:
sns.catplot(x="four_rate_bins", y="suspensed_day_rate", kind="box", data=simple);


In [None]:
#another good metric
print('90%+ Graduate Calculus Rate (25th, 50th, 75th percentile):\n',grad_90.calc_rate.quantile([.25, .50, .75]))
print('80%+ Graduate Calculus Rate (25th, 50th, 75th percentile):\n',grad_80.calc_rate.quantile([.25, .50, .75]))
print('60%+ Graduate Calculus Rate (25th, 50th, 75th percentile):\n',grad_70.calc_rate.quantile([.25, .50, .75]))
print('50%+ Graduate Calculus Rate (25th, 50th, 75th percentile):\n',grad_50.calc_rate.quantile([.25, .50, .75]))

In [None]:
sns.catplot(x="four_rate_bins", y="calc_rate", kind="box", data=simple);

In [None]:
#no much here
print('90%+ Graduate Algebra II Rate (25th, 50th, 75th percentile):\n',grad_90.algebra2_rate.quantile([.25, .50, .75]))
print('80%+ Graduate Algebra II Rate (25th, 50th, 75th percentile):\n',grad_80.algebra2_rate.quantile([.25, .50, .75]))
print('60%+ Graduate Algebra II Rate (25th, 50th, 75th percentile):\n',grad_70.algebra2_rate.quantile([.25, .50, .75]))
print('50%+ Graduate Algebra II Rate (25th, 50th, 75th percentile):\n',grad_50.algebra2_rate.quantile([.25, .50, .75]))

In [None]:
#not much here
print('90%+ Graduate Pass Algebra I Rate (25th, 50th, 75th percentile):\n',grad_90.pass_algebra_rate.quantile([.25, .50, .75]))
print('80%+ Graduate Pass Algebra I Rate (25th, 50th, 75th percentile):\n',grad_80.pass_algebra_rate.quantile([.25, .50, .75]))
print('60%+ Graduate Pass Algebra I Rate (25th, 50th, 75th percentile):\n',grad_70.pass_algebra_rate.quantile([.25, .50, .75]))
print('50%+ Graduate Pass Algebra I Rate (25th, 50th, 75th percentile):\n',grad_50.pass_algebra_rate.quantile([.25, .50, .75]))

In [None]:
#useful metric
print('90%+ Graduate SAT/ACT Rate (25th, 50th, 75th percentile):\n',grad_90.sat_act_rate.quantile([.25, .50, .75]))
print('80%+ Graduate SAT/ACT Rate (25th, 50th, 75th percentile):\n',grad_80.sat_act_rate.quantile([.25, .50, .75]))
print('60%+ Graduate SAT/ACT Rate (25th, 50th, 75th percentile):\n',grad_70.sat_act_rate.quantile([.25, .50, .75]))
print('50%+ Graduate SAT/ACT Rate (25th, 50th, 75th percentile):\n',grad_50.sat_act_rate.quantile([.25, .50, .75]))

In [None]:
sns.catplot(x="four_rate_bins", y="sat_act_rate", kind="box", data=simple);

In [None]:
#level up bins
#create the binned categories
simple['level_up_bins'] = pd.cut(simple['grad_slice'].astype(int), 
    [0, 59, 79, 89, 100], labels = ['60-79% Level Up Rate', '80-89% Level Up Rate', '90%+ Level Up Rate', '100% Level Up Rate'])

In [None]:
simple.head()

In [None]:
len(simple['grad_slice'] == 100)

In [None]:
print('100% Graduate Sports Participation Rate (25th, 50th, 75th percentile):\n',grad_100.sports_rate.quantile([.25, .50, .75]))

In [None]:
print('100% Graduate Chronic Absenteeism Rate (25th, 50th, 75th percentile):\n',grad_100.chronic_absent_rate.quantile([.25, .50, .75]))

In [None]:
#code from greg to make loop to go through variables to choose most important 
#one for each school
var_sweep = {}
for col in cols:
  delta = np.stdev(data[col])
  tmp = data.copy()
  tmp[col] = tmp[col] + delta
  var_sweep[col] = model.predict(tmp.values)
