In [1]:
import requests
import pandas as pd
import NAEP_helpers

In [2]:
url = 'https://www.nationsreportcard.gov/Dataservice/GetAdhocData.aspx?type=data&subject=mathematics&grade=4&variable=TOTAL&jurisdiction=NT,AL,AK,AZ,AR,CA,CO,CT,DE,DC,FL,GA,HI,ID,IL,IN,IA,KS,KY,LA,ME,MD,MA,MI,MN,MS,MO,MT,NE,NV,NH,NJ,NM,NY,NC,ND,OH,OK,OR,PA,RI,SC,SD,TN,TX,UT,VT,VA,WA,WV,WI,WY&stattype=MN:MN,SD:SD,ALD:BA,ALD:PR,ALD:AD&Year=2015,2017,2019'

In [None]:
#Return a dataframe using the NAEP API. This will take about 90 seconds if you use the above url.
#df_helper = NAEP_helpers.NAEP_df(url2,write_csv=False, csv_name='NAEP_all_2015-2019.csv')

In [6]:
#Return a dataframe using a CSV file previously generated using the NAEP data. Much quicker than using the NAEP API
df_NAEP = pd.read_csv('data/NAEP_all_2015-2019.csv')
df_NAEP.head()

Unnamed: 0.1,Unnamed: 0,year,sample,yearSampleLabel,Cohort,CohortLabel,stattype,subject,grade,scale,jurisdiction,jurisLabel,variable,variableLabel,varValue,varValueLabel,value,isStatDisplayable,errorFlag
0,0,2015,R3,2015,1,Grade 4,ALD:AD,MAT,4,MRPCM,AK,Alaska,TOTAL,All students,1,All students,6.010586,1,0
1,1,2015,R3,2015,1,Grade 4,ALD:BA,MAT,4,MRPCM,AK,Alaska,TOTAL,All students,1,All students,42.594169,1,0
2,2,2015,R3,2015,1,Grade 4,MN:MN,MAT,4,MRPCM,AK,Alaska,TOTAL,All students,1,All students,236.328417,1,0
3,3,2015,R3,2015,1,Grade 4,ALD:PR,MAT,4,MRPCM,AK,Alaska,TOTAL,All students,1,All students,29.102937,1,0
4,4,2015,R3,2015,1,Grade 4,SD:SD,MAT,4,MRPCM,AK,Alaska,TOTAL,All students,1,All students,31.032398,1,0


In [7]:
#The issue with the above dataframe is that data isn't organized nicely by date or statistic, and some of the column labels aren't clear. The following helper function will fix this
df_NAEP_nice = NAEP_helpers.make_df_nice(df_NAEP)
df_NAEP_nice.head()

Unnamed: 0_level_0,stattype,mean_score,std_dev,percent_basic,percent_proficient,percent_advanced
year,jurisdiction,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015,AK,236.328417,31.032398,42.594169,29.102937,6.010586
2015,AL,230.977423,27.435285,48.51795,23.825388,2.254539
2015,AR,235.21547,27.520415,46.649121,28.472141,3.482822
2015,AZ,237.611755,30.20949,40.927175,31.843851,5.99887
2015,CA,231.549345,31.112294,43.19834,24.504857,4.65812


In [15]:
#The above dataframe is multi-indexed, which is panda's way of allowing higher-dimensional data. We can pick out a specific year by calling the year index first
#(note the years included here are 2015, 2017, and 2019):
df_NAEP_2015 = df_NAEP_nice.loc[2015]
df_NAEP_2015

stattype,mean_score,std_dev,percent_basic,percent_proficient,percent_advanced
jurisdiction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AK,236.328417,31.032398,42.594169,29.102937,6.010586
AL,230.977423,27.435285,48.51795,23.825388,2.254539
AR,235.21547,27.520415,46.649121,28.472141,3.482822
AZ,237.611755,30.20949,40.927175,31.843851,5.99887
CA,231.549345,31.112294,43.19834,24.504857,4.65812
CO,241.563576,30.794455,39.349124,34.224897,8.434812
CT,240.159469,30.202679,39.63612,33.736501,7.155954
DC,231.307525,34.304966,38.499983,23.638857,7.045604
DE,238.678019,27.943738,44.962958,31.719255,5.079614
FL,242.664321,27.562625,42.956734,34.921208,7.103973


In [20]:
#This dataframe can now be indexed as usual:
print('2015 data from Virginia:\n', df_NAEP_2015.loc['VA'])
print('\n')
print('2015 percent_basic from all states:\n', df_NAEP_2015['percent_basic'])

2015 data from Virginia:
 stattype
mean_score            246.612622
std_dev                28.326821
percent_basic          39.969423
percent_proficient     36.962754
percent_advanced       10.386579
Name: VA, dtype: float64


2015 percent_basic from all states:
 jurisdiction
AK    42.594169
AL    48.517950
AR    46.649121
AZ    40.927175
CA    43.198340
CO    39.349124
CT    39.636120
DC    38.499983
DE    44.962958
FL    42.956734
GA    43.090523
HI    40.474543
IA    39.815226
ID    42.023292
IL    40.423855
IN    39.520797
KS    41.554863
KY    43.594546
LA    47.882892
MA    35.663281
MD    38.782547
ME    43.704819
MI    43.543420
MN    33.784223
MO    43.502680
MS    48.114841
MT    42.796060
NC    40.855376
ND    42.763857
NE    40.138156
NH    39.578462
NJ    39.399212
NM    46.573478
NT    41.611764
NV    44.068246
NY    43.961078
OH    40.610570
OK    47.589267
OR    41.939772
PA    37.685427
RI    42.639799
SC    43.029824
SD    43.075019
TN    42.325043
TX    41.897020
UT 

In [21]:
#If we instead want the state index first, then year, we can use the year_state Boolean argument in make_df_nice():
df_NAEP_nice_stateFirst = NAEP_helpers.make_df_nice(df_NAEP, year_state=False)
df_NAEP_nice_stateFirst.head()

Unnamed: 0_level_0,stattype,mean_score,std_dev,percent_basic,percent_proficient,percent_advanced
jurisdiction,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AK,2015,236.328417,31.032398,42.594169,29.102937,6.010586
AK,2017,230.456278,34.794513,39.275507,26.43152,5.224436
AK,2019,232.307479,34.000678,39.690383,27.855508,5.31105
AL,2015,230.977423,27.435285,48.51795,23.825388,2.254539
AL,2017,232.170688,30.118126,42.140235,27.637375,3.559761
