## Import Dependencies

In [4]:
# Dependencies
from matplotlib import pyplot as plt
from scipy import stats
import numpy as np
import pandas as pd
import json
import requests

## Educationdata.urban.org

In [11]:
topic = 'colleges'
source = 'ccd'
endpoint = 'directory'
year = '2021'

url = f'https://educationdata.urban.org/api/v1/college-university/ipeds/enrollment-headcount/{year}/1/'

print(url)
response = requests.get(url)
print(response)
data = response.json()
data

https://educationdata.urban.org/api/v1/college-university/ipeds/enrollment-headcount/2021/1/
<Response [200]>


{'count': 174060,
 'next': 'https://educationdata.urban.org/api/v1/college-university/ipeds/enrollment-headcount/2021/1/?page=2',
 'previous': None,
 'results': [{'unitid': 100654,
   'fips': 1,
   'year': 2021,
   'level_of_study': 1,
   'race': 1,
   'sex': 1,
   'headcount': 59},
  {'unitid': 100654,
   'fips': 1,
   'year': 2021,
   'level_of_study': 1,
   'race': 1,
   'sex': 99,
   'headcount': 102},
  {'unitid': 100654,
   'fips': 1,
   'year': 2021,
   'level_of_study': 1,
   'race': 1,
   'sex': 2,
   'headcount': 43},
  {'unitid': 100663,
   'fips': 1,
   'year': 2021,
   'level_of_study': 1,
   'race': 1,
   'sex': 1,
   'headcount': 3440},
  {'unitid': 100663,
   'fips': 1,
   'year': 2021,
   'level_of_study': 1,
   'race': 1,
   'sex': 99,
   'headcount': 8575},
  {'unitid': 100663,
   'fips': 1,
   'year': 2021,
   'level_of_study': 1,
   'race': 1,
   'sex': 2,
   'headcount': 5135},
  {'unitid': 100690,
   'fips': 1,
   'year': 2021,
   'level_of_study': 1,
   'race': 

In [36]:
baseurl = 'https://educationdata.urban.org/api/v1/college-university/ipeds/enrollment-headcount/summaries?'
params = {
    'var': 'headcount',
    'stat': 'sum',
    'by': 'unitid',
    # 1—Undergraduate 2—Graduate 3—First professional 4—Postbaccalaureate 99—Total
    'level_of_study': '1',
    'year': ''
}

years = ['2017', '2018', '2019', '2020', '2021']
undergrad_headcount_data = {}

for year in years:
    params['year'] = year
    print(f'Getting {year} data.')
    response = requests.get(baseurl, params=params)
    data = response.json()
    undergrad_headcount_data[f'{year}'] = pd.DataFrame(data['results'])['headcount'].sum()

undergrad_headcount_data

Getting 2017 data.
Getting 2018 data.
Getting 2019 data.
Getting 2020 data.
Getting 2021 data.


{'2017': 22962495,
 '2018': 22723047,
 '2019': 22723047,
 '2020': 22525259,
 '2021': 21687926}

In [20]:
undergrad_counts = []
dflist = []
for year in years:
    df = pd.DataFrame(undergrad_headcount_data[f'{year}']['results'])
    dflist.append(df)
    undergrad_counts.append(df['headcount'].sum())

Unnamed: 0,Year,Undergrad Headcount
0,2017,22962495
1,2018,22723047
2,2019,22723047
3,2020,22525259
4,2021,21687926


In [49]:
params['level_of_study'] = 99

total_headcount_data = {}

for year in years:
    params['year'] = year
    print(f'Getting {year} data.')
    response = requests.get(baseurl, params=params)
    data = response.json()
    total_headcount_data[f'{year}'] = pd.DataFrame(data['results'])['headcount'].sum()

total_headcount_data

Getting 2017 data.
Getting 2018 data.
Getting 2019 data.
Getting 2020 data.
Getting 2021 data.


{'2017': 26887067,
 '2018': 26685592,
 '2019': 26685592,
 '2020': 26497087,
 '2021': 25762172}

In [51]:
edu_df = pd.DataFrame({
    'Undergraduate': undergrad_headcount_data,
    'Total Postsecondary': total_headcount_data
})
edu_df[]

Unnamed: 0,Undergraduate,Total Postsecondary
2017,22962495,26887067
2018,22723047,26685592
2019,22723047,26685592
2020,22525259,26497087
2021,21687926,25762172


In [54]:
baseurl = 'https://educationdata.urban.org/api/v1/schools/ccd/enrollment/summaries?'

params = {
    'var': 'enrollment',
    'stat': 'sum',
    'by': 'grade',
    'year': ''
}

years = ['2017', '2018', '2019', '2020']
enrollment_by_grade = {}

for year in years:
    params['year'] = year
    print(f'Getting {year} data.')
    response = requests.get(baseurl, params=params)
    data = response.json()
    enrollment = {}
    for x in data['results']:
        grade = x['grade']
        enrollment[f'{grade}'] = x['enrollment']
    enrollment_by_grade[f'{year}'] = enrollment

enrollment_by_grade

Getting 2017 data.
Getting 2018 data.
Getting 2019 data.
Getting 2020 data.


{'2017': {'-1': 1255505,
  '0': 3710571,
  '1': 3697231,
  '2': 3711904,
  '3': 3816334,
  '4': 3887802,
  '5': 3905590,
  '6': 3854926,
  '7': 3805355,
  '8': 3798345,
  '9': 4018689,
  '10': 3855133,
  '11': 3695341,
  '12': 3640445,
  '13': 1778,
  '14': 7109,
  '15': 135046,
  '99': 50797126},
 '2018': {'-1': 1318877,
  '0': 3702779,
  '1': 3666344,
  '2': 3678424,
  '3': 3732953,
  '4': 3802452,
  '5': 3901341,
  '6': 3916316,
  '7': 3872358,
  '8': 3811121,
  '9': 4025850,
  '10': 3866284,
  '11': 3667744,
  '12': 3654021,
  '13': 1787,
  '14': 6463,
  '15': 125961,
  '99': 50751092},
 '2019': {'-1': 1360941,
  '0': 3737937,
  '1': 3669432,
  '2': 3661781,
  '3': 3709415,
  '4': 3728758,
  '5': 3824609,
  '6': 3919004,
  '7': 3940340,
  '8': 3884860,
  '9': 4065478,
  '10': 3885743,
  '11': 3685420,
  '12': 3624904,
  '13': 1803,
  '14': 4467,
  '15': 127450,
  '99': 50833994},
 '2020': {'-1': 1080497,
  '0': 3391418,
  '1': 3538666,
  '2': 3545463,
  '3': 3568901,
  '4': 3624353

In [63]:
k_12_enrollmentdata = pd.DataFrame(enrollment_by_grade)
k_12_enrollmentdata = k_12_enrollmentdata.transpose()
k_12_enrollmentdata = k_12_enrollmentdata[['-1','1','2','3','4','5','6','7','8','9','10','11','12']]
k_12_enrollmentdata

Unnamed: 0,-1,1,2,3,4,5,6,7,8,9,10,11,12
2017,1255505,3697231,3711904,3816334,3887802,3905590,3854926,3805355,3798345,4018689,3855133,3695341,3640445
2018,1318877,3666344,3678424,3732953,3802452,3901341,3916316,3872358,3811121,4025850,3866284,3667744,3654021
2019,1360941,3669432,3661781,3709415,3728758,3824609,3919004,3940340,3884860,4065478,3885743,3685420,3624904
2020,1080497,3538666,3545463,3568901,3624353,3663730,3765069,3878814,3907716,4031746,3912087,3714309,3665497
