In [6]:
import os 
import requests

class CollegeScorecardClient:
    def __init__(self, api_key=None):
        self.base_url = "https://api.data.gov/ed/collegescorecard/v1/"
        self.api_key = api_key or "your_api_key_here"
        
    def get_data(self, endpoint, params=None):
        """
        Get data from the College Scorecard API
        
        Args:
            endpoint (str): API endpoint to query
            params (dict): Query parameters
            
        Returns:
            dict: JSON response from the API
        """
        if params is None:
            params = {}
            
        params['api_key'] = self.api_key
        
        response = requests.get(self.base_url + endpoint, params=params)
        response.raise_for_status()
        return response.json()
        
    def get_institutions(self, fields=None, filters=None, page=0, per_page=100):
        """
        Get institution-level data
        
        Args:
            fields (list): Fields to return
            filters (dict): Filters to apply
            page (int): Page number
            per_page (int): Results per page
            
        Returns:
            dict: Institution data
        """
        params = {
            'page': page,
            'per_page': per_page
        }
        
        if fields:
            params['fields'] = ','.join(fields)
            
        if filters:
            for key, value in filters.items():
                params[key] = value
                
        return self.get_data('schools', params)

client = CollegeScorecardClient(api_key=os.getenv("COLLEGE_SCORECARD_API_KEY"))

In [34]:
def fetch_college_data(year, control=None, state=None, per_page=100):
    fields = [
        f'{year}.cost.tuition.in_state',
        f'{year}.cost.tuition.out_of_state',
        f'{year}.cost.attendance.academic_year',
        f'{year}.cost.avg_net_price.public',
        f'{year}.cost.avg_net_price.private',
        f'{year}.student.size',
        f'{year}.student.demographics.race_ethnicity.white',
        f'{year}.student.demographics.race_ethnicity.black',
        f'{year}.student.demographics.race_ethnicity.hispanic',
        f'{year}.student.demographics.race_ethnicity.asian',
        f'{year}.student.demographics.race_ethnicity.aian',
        f'{year}.student.demographics.race_ethnicity.nhpi',
        f'{year}.student.demographics.race_ethnicity.two_or_more',
        f'{year}.student.demographics.race_ethnicity.non_resident_alien',
        f'{year}.student.demographics.race_ethnicity.unknown',
        f'{year}.student.demographics.first_generation',
        'school.name',
        'school.state',
        'school.control',
        'school.region_id',
        'school.ownership',
    ]
    filters = {}
    if control:
        filters['school.ownership'] = control  # 1=Public, 2=Private nonprofit, 3=Private for-profit
    if state:
        filters['school.state'] = state
    data = client.get_institutions(fields=fields, filters=filters, per_page=per_page)
    return data['results']

In [35]:
import pandas as pd

data = fetch_college_data("2022", control=None, state=None)
df = pd.DataFrame(data)

In [38]:
df

Unnamed: 0,2022.cost.tuition.in_state,2022.cost.tuition.out_of_state,2022.cost.attendance.academic_year,2022.cost.avg_net_price.public,2022.cost.avg_net_price.private,2022.student.size,2022.student.demographics.race_ethnicity.white,2022.student.demographics.race_ethnicity.black,2022.student.demographics.race_ethnicity.hispanic,2022.student.demographics.race_ethnicity.asian,2022.student.demographics.race_ethnicity.aian,2022.student.demographics.race_ethnicity.nhpi,2022.student.demographics.race_ethnicity.two_or_more,2022.student.demographics.race_ethnicity.non_resident_alien,2022.student.demographics.race_ethnicity.unknown,2022.student.demographics.first_generation,school.name,school.state,school.region_id,school.ownership
0,10024.0,18634.0,23167.0,14982.0,,5196.0,0.0198,0.8955,0.0110,0.0019,0.0025,0.0015,0.0127,0.0115,0.0435,,Alabama A & M University,AL,5,1
1,8832.0,21216.0,26257.0,16755.0,,12776.0,0.5130,0.2528,0.0711,0.0819,0.0016,0.0005,0.0491,0.0237,0.0064,,University of Alabama at Birmingham,AL,5,1
2,,,,,,228.0,0.2851,0.6623,0.0307,0.0000,0.0044,0.0044,0.0000,0.0000,0.0132,,Amridge University,AL,5,2
3,11878.0,24770.0,25777.0,18240.0,,6985.0,0.7102,0.0873,0.0666,0.0389,0.0087,0.0016,0.0465,0.0149,0.0252,,University of Alabama in Huntsville,AL,5,1
4,11068.0,19396.0,21900.0,13527.0,,3296.0,0.0155,0.9251,0.0121,0.0015,0.0021,0.0009,0.0118,0.0221,0.0088,,Alabama State University,AL,5,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,,,,,24860.0,990.0,0.3182,0.1323,0.3525,0.0131,0.1646,0.0111,0.0020,0.0000,0.0061,,Refrigeration School Inc,AZ,6,3
96,2070.0,7854.0,11938.0,7371.0,,3999.0,0.4191,0.1193,0.3268,0.0323,0.0180,0.0048,0.0475,0.0033,0.0290,,Rio Salado College,AZ,6,1
97,,,,,26594.0,27.0,0.8889,0.0000,0.0370,0.0370,0.0370,0.0000,0.0000,0.0000,0.0000,,Roberto-Venn School of Luthiery,AZ,6,3
98,2070.0,7854.0,18529.0,13198.0,,4245.0,0.5062,0.0455,0.2895,0.0339,0.0443,0.0014,0.0509,0.0085,0.0198,,Scottsdale Community College,AZ,6,1


In [39]:
year = "2022"
tuition_cols = [
f"{year}.cost.tuition.in_state",
f"{year}.cost.tuition.out_of_state",
f"{year}.cost.attendance.academic_year",
f"{year}.cost.avg_net_price.public",
f"{year}.cost.avg_net_price.private",
]
# Prepare data for bar chart
cost_data = pd.DataFrame(
{
    "Institution": df["school.name"],
    "State": df["school.state"],
    "Type": df["school.ownership"].map(
        {1: "Public", 2: "Private Nonprofit", 3: "Private For-Profit"}
    ),
    "In-State Tuition": pd.to_numeric(
        df.get(f"{year}.cost.tuition.in_state", 0), errors="coerce"
    ),
    "Out-of-State Tuition": pd.to_numeric(
        df.get(f"{year}.cost.tuition.out_of_state", 0), errors="coerce"
    ),
    "Total Cost": pd.to_numeric(
        df.get(f"{year}.cost.attendance.academic_year", 0), errors="coerce"
    ),
    "Net Price (Public)": pd.to_numeric(
        df.get(f"{year}.cost.avg_net_price.public", 0), errors="coerce"
    ),
    "Net Price (Private)": pd.to_numeric(
        df.get(f"{year}.cost.avg_net_price.private", 0), errors="coerce"
    ),
}
)

cost_data.head()

Unnamed: 0,Institution,State,Type,In-State Tuition,Out-of-State Tuition,Total Cost,Net Price (Public),Net Price (Private)
0,Alabama A & M University,AL,Public,10024.0,18634.0,23167.0,14982.0,
1,University of Alabama at Birmingham,AL,Public,8832.0,21216.0,26257.0,16755.0,
2,Amridge University,AL,Private Nonprofit,,,,,
3,University of Alabama in Huntsville,AL,Public,11878.0,24770.0,25777.0,18240.0,
4,Alabama State University,AL,Public,11068.0,19396.0,21900.0,13527.0,


In [33]:
df.columns

Index(['2022.cost.tuition.in_state', '2022.cost.tuition.out_of_state',
       '2022.cost.attendance.academic_year', '2022.student.size',
       '2022.student.demographics.race_ethnicity.white',
       '2022.student.demographics.race_ethnicity.black',
       '2022.student.demographics.race_ethnicity.hispanic',
       '2022.student.demographics.race_ethnicity.asian',
       '2022.student.demographics.race_ethnicity.aian',
       '2022.student.demographics.race_ethnicity.nhpi',
       '2022.student.demographics.race_ethnicity.two_or_more',
       '2022.student.demographics.race_ethnicity.non_resident_alien',
       '2022.student.demographics.race_ethnicity.unknown',
       '2022.student.demographics.first_generation', 'school.name',
       'school.state', 'school.region_id', 'school.ownership'],
      dtype='object')