In [15]:
# Dependencies and Setup
import hvplot.pandas
import pandas as pd
import matplotlib.pyplot as plt
import requests
from scipy.stats import linregress

# Import API key
from api_keys import geoapify_key

# Turn off warning messages
import warnings
warnings.filterwarnings("ignore")

In [16]:
test_df = pd.read_csv("data/MERGED2021_22_PP.csv",low_memory=False)

test_df.head()

Unnamed: 0,UNITID,OPEID,OPEID6,INSTNM,CITY,STABBR,ZIP,ACCREDAGENCY,INSTURL,NPCURL,...,BBRR4_PP_MALE_DISCHARGE,BBRR4_PP_NOMALE_N,BBRR4_PP_NOMALE_DFLT,BBRR4_PP_NOMALE_DLNQ,BBRR4_PP_NOMALE_FBR,BBRR4_PP_NOMALE_DFR,BBRR4_PP_NOMALE_NOPROG,BBRR4_PP_NOMALE_MAKEPROG,BBRR4_PP_NOMALE_PAIDINFULL,BBRR4_PP_NOMALE_DISCHARGE
0,100654,100200.0,1002.0,Alabama A & M University,Normal,AL,35762,Southern Association of Colleges and Schools C...,www.aamu.edu/,www.aamu.edu/admissions-aid/tuition-fees/net-p...,...,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed
1,100663,105200.0,1052.0,University of Alabama at Birmingham,Birmingham,AL,35294-0110,Southern Association of Colleges and Schools C...,https://www.uab.edu/,https://tcc.ruffalonl.com/University of Alabam...,...,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed
2,100690,2503400.0,25034.0,Amridge University,Montgomery,AL,36117-3553,Southern Association of Colleges and Schools C...,https://www.amridgeuniversity.edu/,https://www2.amridgeuniversity.edu:9091/,...,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed
3,100706,105500.0,1055.0,University of Alabama in Huntsville,Huntsville,AL,35899,Southern Association of Colleges and Schools C...,www.uah.edu/,finaid.uah.edu/,...,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed
4,100724,100500.0,1005.0,Alabama State University,Montgomery,AL,36104-0271,Southern Association of Colleges and Schools C...,www.alasu.edu/,www.alasu.edu/cost-aid/tuition-costs/net-price...,...,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed,PrivacySuppressed


In [17]:
test_df.columns

Index(['UNITID', 'OPEID', 'OPEID6', 'INSTNM', 'CITY', 'STABBR', 'ZIP',
       'ACCREDAGENCY', 'INSTURL', 'NPCURL',
       ...
       'BBRR4_PP_MALE_DISCHARGE', 'BBRR4_PP_NOMALE_N', 'BBRR4_PP_NOMALE_DFLT',
       'BBRR4_PP_NOMALE_DLNQ', 'BBRR4_PP_NOMALE_FBR', 'BBRR4_PP_NOMALE_DFR',
       'BBRR4_PP_NOMALE_NOPROG', 'BBRR4_PP_NOMALE_MAKEPROG',
       'BBRR4_PP_NOMALE_PAIDINFULL', 'BBRR4_PP_NOMALE_DISCHARGE'],
      dtype='object', length=3214)

In [18]:
#pd.set_option('display.max_rows', None)

# Calculate the sum of null values for each column
null_sum = test_df.isnull().sum()  

# Get the total number of rows in the DataFrame
column_count = len(test_df)  

 # Calculate the percentage of null values for each column
null_percentage = (null_sum / column_count) 

# Filter the results to include only columns with null percentage less than 30%
filtered_results = null_percentage[null_percentage < 0.3]
print(filtered_results.to_frame())

                                   0
UNITID                      0.000000
OPEID                       0.003057
OPEID6                      0.003057
INSTNM                      0.000000
CITY                        0.000000
...                              ...
BBRR4_PP_NOMALE_DFR         0.000000
BBRR4_PP_NOMALE_NOPROG      0.000000
BBRR4_PP_NOMALE_MAKEPROG    0.000000
BBRR4_PP_NOMALE_PAIDINFULL  0.000000
BBRR4_PP_NOMALE_DISCHARGE   0.000000

[732 rows x 1 columns]


In [19]:
test_select_df = test_df[[
    "UNITID",
    "INSTNM",
    "CITY",
    "STABBR",
    "ZIP",
    "MAIN",
    "HIGHDEG",
    "LATITUDE",
    "LONGITUDE",
    "HBCU",
    "PBI",
    "ANNHI",
    "TRIBAL",
    "AANAPII",
    "HSI",
    "NANTI",
    "MENONLY",
    "WOMENONLY",
    "PCIP01",
    "PCIP03",
    "PCIP04",
    "PCIP05",
    "PCIP09",
    "PCIP10",
    "PCIP11",
    "PCIP12",
    "PCIP13",
    "PCIP14",
    "PCIP15",
    "PCIP16",
    "PCIP19",
    "PCIP22",
    "PCIP23",
    "PCIP24",
    "PCIP25",
    "PCIP26",
    "PCIP27",
    "PCIP29",
    "PCIP30",
    "PCIP31",
    "PCIP38",
    "PCIP39",
    "PCIP40",
    "PCIP41",
    "PCIP42",
    "PCIP43",
    "PCIP44",
    "PCIP45",
    "PCIP46",
    "PCIP47",
    "PCIP48",
    "PCIP49",
    "PCIP50",
    "PCIP51",
    "PCIP52",
    "PCIP54",
    "UGDS",
    "UGDS_WHITE",
    "UGDS_BLACK",
    "UGDS_HISP",
    "UGDS_ASIAN",
    "UGDS_AIAN",
    "UGDS_NHPI",
    "UGDS_2MOR",
    "UGDS_NRA",
    "UGDS_UNKN",
    "COSTT4_A",
    "COSTT4_P",
    "TUITFTE",
    "INEXPFTE",
    "PCTPELL",
    "PCTFLOAN",
    "UG25ABV",
    "UGDS_MEN",
    "UGDS_WOMEN",
    "SCHTYPE", 
    "STUFACR",
    "IRPS_2MOR",
    "IRPS_AIAN",
    "IRPS_ASIAN",
    "IRPS_BLACK",
    "IRPS_HISP",
    "IRPS_NHPI",
    "IRPS_NRA",
    "IRPS_UNKN",
    "IRPS_WHITE",
    "IRPS_WOMEN",
    "IRPS_MEN"
]]

test_select_df

Unnamed: 0,UNITID,INSTNM,CITY,STABBR,ZIP,MAIN,HIGHDEG,LATITUDE,LONGITUDE,HBCU,...,IRPS_AIAN,IRPS_ASIAN,IRPS_BLACK,IRPS_HISP,IRPS_NHPI,IRPS_NRA,IRPS_UNKN,IRPS_WHITE,IRPS_WOMEN,IRPS_MEN
0,100654,Alabama A & M University,Normal,AL,35762,1,4,34.783368,-86.568502,1.0,...,0.0047,0.0995,0.6540,0.0000,0.0000,0.0758,0.0095,0.1469,0.5024,0.4976
1,100663,University of Alabama at Birmingham,Birmingham,AL,35294-0110,1,4,33.505697,-86.799345,0.0,...,0.0015,0.1575,0.0619,0.0319,0.0012,0.0569,0.0004,0.6808,0.4433,0.5567
2,100690,Amridge University,Montgomery,AL,36117-3553,1,4,32.362609,-86.174010,0.0,...,0.0000,0.0000,0.0294,0.0000,0.0000,0.0000,0.0000,0.9706,0.1471,0.8529
3,100706,University of Alabama in Huntsville,Huntsville,AL,35899,1,4,34.724557,-86.640449,0.0,...,0.0085,0.1823,0.0513,0.0256,0.0000,0.0484,0.0000,0.6809,0.4644,0.5356
4,100724,Alabama State University,Montgomery,AL,36104-0271,1,4,32.364317,-86.295677,1.0,...,0.0000,0.1584,0.6063,0.0136,0.0000,0.0000,0.0090,0.2127,0.4796,0.5204
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6538,49576720,Pennsylvania State University-Penn State York,York,PA,17403-3326,0,0,,,,...,,,,,,,,,,
6539,49576721,Pennsylvania State University-Penn State Great...,Malvern,PA,19355-1488,0,0,,,,...,,,,,,,,,,
6540,49576722,Pennsylvania State University-Penn State Harri...,Middletown,PA,17057-4846,0,0,,,,...,,,,,,,,,,
6541,49576723,Pennsylvania State University-Penn State Brand...,Media,PA,19063-5522,0,0,,,,...,,,,,,,,,,


In [20]:
column_dictionary = {
    "UNITID":"id",
    "INSTNM":"name",
    "CITY":"city",
    "STABBR":"state",
    "ZIP":"zipcode",
    # 1: main campus, 0: not main campus
    "MAIN" : "main_campus",
    # "Highest degree awarded 0 Non-degree-granting, 1 Certificate degree, 2 Associate degree, 3 Bachelor's degree, 4 Graduate degree"
    "HIGHDEG":"highest_degree",
    "LATITUDE":"lat",
    "LONGITUDE":"lng",
    # 1: yes, 0: no
    "HBCU":"historicalblack_college",
    "PBI":"black_college",
    "ANNHI":"native_college",
    "TRIBAL":"tribal_college",
    "AANAPII":"asian_college",
    "HSI":"hispanic_college",
    "NANTI":"native_college",
    "MENONLY":"men_college",
    "WOMENONLY":"women_college",
    "PCIP01":"percentage_of_agriculture_major",
    "PCIP03":"percentage_of_conservation_major",
    "PCIP04":"percentage_of_architecture_major",
    "PCIP05":"percentage_of_ethniic_major",
    "PCIP09":"percentage_of_communication_major",
    "PCIP10":"percentage_of_techincalsupport_major",
    "PCIP11":"percentage_of_computerscience_major",
    "PCIP12":"percentage_of_culinary_major",
    "PCIP13":"percentage_of_education_major",
    "PCIP14":"percentage_of_engineer_major",
    "PCIP15":"percentage_of_engineertech_major",
    "PCIP16":"percentage_of_language_major",
    "PCIP19":"percentage_of_family_major",
    "PCIP22":"percentage_of_legal_major",
    "PCIP23":"percentage_of_english_major",
    "PCIP24":"percentage_of_humanities_major",
    "PCIP25":"percentage_of_science_major",
    "PCIP26":"percentage_of_biology_major",
    "PCIP27":"percentage_of_math_major",
    "PCIP29":"percentage_of_appliedscience_major",
    "PCIP30":"percentage_of_interdisciplinary_major",
    "PCIP31":"percentage_of_fitness_major",
    "PCIP38":"percentage_of_philosophy_major",
    "PCIP39":"percentage_of_theology_major",
    "PCIP40":"percentage_of_physic_major",
    "PCIP41":"percentage_of_sciencetech_major",
    "PCIP42":"percentage_of_psychology_major",
    "PCIP43":"percentage_of_security_major",
    "PCIP44":"percentage_of_administration_major",
    "PCIP45":"percentage_of_sociology_major",
    "PCIP46":"percentage_of_trade_major",
    "PCIP47":"percentage_of_mechanic_major",
    "PCIP48":"percentage_of_production_major",
    "PCIP49":"percentage_of_transportation_major",
    "PCIP50":"percentage_of_visualart_major",
    "PCIP51":"percentage_of_health_major",
    "PCIP52":"percentage_of_business_major",
    "PCIP54":"percentage_of_history_major",
    "UGDS":"student_count",
    "UGDS_WHITE":"enrollment_proportion_white",
    "UGDS_BLACK":"enrollment_proportion_black",
    "UGDS_HISP":"enrollment_proportion_hispanic",
    "UGDS_ASIAN":"enrollment_proportion_asian",
    "UGDS_AIAN":"enrollment_proportion_native",
    "UGDS_NHPI":"enrollment_proportion_pacific",
    "UGDS_2MOR":"enrollment_proportion_mix",
    "UGDS_NRA":"enrollment_proportion_nonresident",
    "UGDS_UNKN":"enrollment_proportion_unknown",
    "COSTT4_A" : "cost_of_admission",
    "COSTT4_P" : "cost_of_admission_p",
    "TUITFTE":"tuition",
    "INEXPFTE":"instructional_expenditure",
    "PCTPELL":"percentage_of_pellgrant",
    "PCTFLOAN":"percentage_of_federalloan",
    "UG25ABV":"pecentage_of_over25",
    "UGDS_MEN":"enrollment_percentage_man",
    "UGDS_WOMEN":"enrollment_percentage_women",
    "SCHTYPE": "public/private",
    "STUFACR":"faculity_ratio_student",
    "IRPS_2MOR":"faculity_ratio_mix",
    "IRPS_AIAN":"faculity_ratio_native",
    "IRPS_ASIAN":"faculity_ratio_asian",
    "IRPS_BLACK":"faculity_ratio_black",
    "IRPS_HISP":"faculity_ratio_hispanic",
    "IRPS_NHPI":"faculity_ratio_pacific",
    "IRPS_NRA":"faculity_ratio_nonresident",
    "IRPS_UNKN":"faculity_ratio_unknown",
    "IRPS_WHITE":"faculity_ratio_white",
    "IRPS_WOMEN":"faculity_ratio_women",
    "IRPS_MEN":"faculity_ratio_men"
}

# Make a copy of the DataFrame
test_rename = test_select_df.copy()

# Rename the columns using the dictionary
test_rename.rename(columns=column_dictionary, inplace=True)



In [21]:
test_rename["cost_of_admission"] = test_rename["cost_of_admission"].combine_first(test_rename["cost_of_admission_p"])

In [22]:
test_location = test_rename[[
    "id",
    "name",
    "city",
    "state",
    "main_campus",
    "highest_degree",
    "zipcode",
    "lat",
    "lng",
    "student_count",
    "public/private"
]]

test_location_clean = test_location.dropna()
test_location_clean

Unnamed: 0,id,name,city,state,main_campus,highest_degree,zipcode,lat,lng,student_count,public/private
0,100654,Alabama A & M University,Normal,AL,1,4,35762,34.783368,-86.568502,5098.0,1.0
1,100663,University of Alabama at Birmingham,Birmingham,AL,1,4,35294-0110,33.505697,-86.799345,13284.0,1.0
2,100690,Amridge University,Montgomery,AL,1,4,36117-3553,32.362609,-86.174010,251.0,2.0
3,100706,University of Alabama in Huntsville,Huntsville,AL,1,4,35899,34.724557,-86.640449,7358.0,1.0
4,100724,Alabama State University,Montgomery,AL,1,4,36104-0271,32.364317,-86.295677,3495.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...
6041,497286,Universal Technical Institute-West Texas,Austin,TX,0,0,78753-9742,30.415828,-97.660482,0.0,3.0
6042,497301,Avalon Institute-Las Vegas,Las Vegas,NV,0,0,89102-0046,36.142355,-115.206409,170.0,3.0
6043,497310,Medspa Academies-National Institute of Modern ...,Las Vegas,NV,0,1,89169-6745,36.117236,-115.158173,97.0,3.0
6044,497329,American Institute-Cherry Hill,Cherry Hill,NJ,0,0,08002-4309,39.939428,-75.015417,83.0,3.0


In [23]:
test_type = test_rename[[
    "id",
    "name",
    "historicalblack_college",
    "black_college",
    "native_college",
    "tribal_college",
    "asian_college",
    "hispanic_college",
    "native_college",
    "men_college",
    "women_college"
]]

test_type_clean = test_type.dropna()
test_type_clean

Unnamed: 0,id,name,historicalblack_college,black_college,native_college,native_college.1,tribal_college,asian_college,hispanic_college,native_college.2,native_college.3,men_college,women_college
0,100654,Alabama A & M University,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,100663,University of Alabama at Birmingham,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,100690,Amridge University,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,100706,University of Alabama in Huntsville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,100724,Alabama State University,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6041,497286,Universal Technical Institute-West Texas,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6042,497301,Avalon Institute-Las Vegas,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6043,497310,Medspa Academies-National Institute of Modern ...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6044,497329,American Institute-Cherry Hill,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [24]:
test_ethnicity = test_rename[[
    "id",
    "name",
    "enrollment_proportion_white",
    "enrollment_proportion_black",
    "enrollment_proportion_hispanic",
    "enrollment_proportion_asian",
    "enrollment_proportion_native",
    "enrollment_proportion_pacific",
    "enrollment_proportion_mix",
    "enrollment_proportion_nonresident",
    "enrollment_proportion_unknown",
    "enrollment_percentage_man",
    "enrollment_percentage_women",
    "faculity_ratio_student",
    "faculity_ratio_mix",
    "faculity_ratio_native",
    "faculity_ratio_asian",
    "faculity_ratio_black",
    "faculity_ratio_hispanic",
    "faculity_ratio_pacific",
    "faculity_ratio_nonresident",
    "faculity_ratio_unknown",
    "faculity_ratio_white",
    "faculity_ratio_women",
    "faculity_ratio_men"
]]

test_ethnicity_clean = test_ethnicity.dropna()
test_ethnicity_clean

Unnamed: 0,id,name,enrollment_proportion_white,enrollment_proportion_black,enrollment_proportion_hispanic,enrollment_proportion_asian,enrollment_proportion_native,enrollment_proportion_pacific,enrollment_proportion_mix,enrollment_proportion_nonresident,...,faculity_ratio_native,faculity_ratio_asian,faculity_ratio_black,faculity_ratio_hispanic,faculity_ratio_pacific,faculity_ratio_nonresident,faculity_ratio_unknown,faculity_ratio_white,faculity_ratio_women,faculity_ratio_men
0,100654,Alabama A & M University,0.0184,0.8978,0.0114,0.0014,0.0027,0.0010,0.0108,0.0086,...,0.0047,0.0995,0.6540,0.0000,0.0000,0.0758,0.0095,0.1469,0.5024,0.4976
1,100663,University of Alabama at Birmingham,0.5297,0.2458,0.0669,0.0767,0.0021,0.0004,0.0483,0.0233,...,0.0015,0.1575,0.0619,0.0319,0.0012,0.0569,0.0004,0.6808,0.4433,0.5567
2,100690,Amridge University,0.2470,0.6932,0.0438,0.0000,0.0000,0.0040,0.0000,0.0000,...,0.0000,0.0000,0.0294,0.0000,0.0000,0.0000,0.0000,0.9706,0.1471,0.8529
3,100706,University of Alabama in Huntsville,0.7196,0.0871,0.0610,0.0357,0.0080,0.0012,0.0446,0.0166,...,0.0085,0.1823,0.0513,0.0256,0.0000,0.0484,0.0000,0.6809,0.4644,0.5356
4,100724,Alabama State University,0.0152,0.9259,0.0129,0.0020,0.0017,0.0014,0.0132,0.0166,...,0.0000,0.1584,0.6063,0.0136,0.0000,0.0000,0.0090,0.2127,0.4796,0.5204
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6040,497277,Arizona College-Glendale,0.1786,0.1964,0.5099,0.0139,0.0437,0.0079,0.0496,0.0000,...,0.0000,0.0000,0.1111,0.1111,0.0000,0.0000,0.1111,0.5556,0.8889,0.1111
6042,497301,Avalon Institute-Las Vegas,0.1412,0.2824,0.4059,0.0882,0.0000,0.0000,0.0647,0.0000,...,0.0000,0.0000,0.5000,0.0000,0.0000,0.0000,0.0000,0.5000,1.0000,0.0000
6043,497310,Medspa Academies-National Institute of Modern ...,0.3402,0.0515,0.2784,0.0825,0.0000,0.0206,0.2165,0.0000,...,0.0000,0.0000,0.0909,0.2727,0.0909,0.0000,0.0000,0.5455,1.0000,0.0000
6044,497329,American Institute-Cherry Hill,0.1566,0.4819,0.2410,0.0120,0.0000,0.0000,0.0241,0.0000,...,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,1.0000,1.0000,0.0000


In [25]:
test_major = test_rename[[
    "id",
    "name",
    "percentage_of_agriculture_major",
    "percentage_of_conservation_major",
    "percentage_of_architecture_major",
    "percentage_of_ethniic_major",
    "percentage_of_communication_major",
    "percentage_of_techincalsupport_major",
    "percentage_of_computerscience_major",
    "percentage_of_culinary_major",
    "percentage_of_education_major",
    "percentage_of_engineer_major",
    "percentage_of_engineertech_major",
    "percentage_of_language_major",
    "percentage_of_family_major",
    "percentage_of_legal_major",
    "percentage_of_english_major",
    "percentage_of_humanities_major",
    "percentage_of_science_major",
    "percentage_of_biology_major",
    "percentage_of_math_major",
    "percentage_of_appliedscience_major",
    "percentage_of_interdisciplinary_major",
    "percentage_of_fitness_major",
    "percentage_of_philosophy_major",
    "percentage_of_theology_major",
    "percentage_of_physic_major",
    "percentage_of_sciencetech_major",
    "percentage_of_psychology_major",
    "percentage_of_security_major",
    "percentage_of_administration_major",
    "percentage_of_sociology_major",
    "percentage_of_trade_major",
    "percentage_of_mechanic_major",
    "percentage_of_production_major",
    "percentage_of_transportation_major",
    "percentage_of_visualart_major",
    "percentage_of_health_major",
    "percentage_of_business_major",
    "percentage_of_history_major"
]]

test_major_clean = test_major.dropna()
test_major_clean

Unnamed: 0,id,name,percentage_of_agriculture_major,percentage_of_conservation_major,percentage_of_architecture_major,percentage_of_ethniic_major,percentage_of_communication_major,percentage_of_techincalsupport_major,percentage_of_computerscience_major,percentage_of_culinary_major,...,percentage_of_administration_major,percentage_of_sociology_major,percentage_of_trade_major,percentage_of_mechanic_major,percentage_of_production_major,percentage_of_transportation_major,percentage_of_visualart_major,percentage_of_health_major,percentage_of_business_major,percentage_of_history_major
0,100654,Alabama A & M University,0.0445,0.0071,0.0053,0.000,0.0000,0.0285,0.0658,0.0,...,0.0409,0.0249,0.0,0.0,0.0,0.0,0.0231,0.0000,0.1637,0.0000
1,100663,University of Alabama at Birmingham,0.0000,0.0000,0.0000,0.002,0.0333,0.0000,0.0229,0.0,...,0.0195,0.0239,0.0,0.0,0.0,0.0,0.0249,0.2088,0.2159,0.0141
2,100690,Amridge University,0.0000,0.0000,0.0000,0.000,0.0000,0.0000,0.0000,0.0,...,0.0000,0.0000,0.0,0.0,0.0,0.0,0.0000,0.0000,0.5000,0.0000
3,100706,University of Alabama in Huntsville,0.0000,0.0000,0.0000,0.000,0.0140,0.0000,0.0692,0.0,...,0.0000,0.0127,0.0,0.0,0.0,0.0,0.0407,0.1341,0.1930,0.0073
4,100724,Alabama State University,0.0000,0.0000,0.0000,0.000,0.0923,0.0000,0.0530,0.0,...,0.0648,0.0196,0.0,0.0,0.0,0.0,0.0511,0.0904,0.1513,0.0059
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6041,497286,Universal Technical Institute-West Texas,0.0000,0.0000,0.0000,0.000,0.0000,0.0000,0.0000,0.0,...,0.0000,0.0000,0.0,0.0,0.0,0.0,0.0000,0.0000,0.0000,0.0000
6042,497301,Avalon Institute-Las Vegas,0.0000,0.0000,0.0000,0.000,0.0000,0.0000,0.0000,0.0,...,0.0000,0.0000,0.0,0.0,0.0,0.0,0.0000,0.0000,0.0000,0.0000
6043,497310,Medspa Academies-National Institute of Modern ...,0.0000,0.0000,0.0000,0.000,0.0000,0.0000,0.0000,1.0,...,0.0000,0.0000,0.0,0.0,0.0,0.0,0.0000,0.0000,0.0000,0.0000
6044,497329,American Institute-Cherry Hill,0.0000,0.0000,0.0000,0.000,0.0000,0.0000,0.0000,0.0,...,0.0000,0.0000,0.0,0.0,0.0,0.0,0.0000,0.0000,0.0000,0.0000


In [26]:
test_financial = test_rename[[
    "id",
    "name",
    "tuition",
    "cost_of_admission",
    "instructional_expenditure",
    "percentage_of_pellgrant",
    "percentage_of_federalloan",
    "pecentage_of_over25"
]]

test_financial_clean = test_financial.dropna()
test_financial_clean

Unnamed: 0,id,name,tuition,cost_of_admission,instructional_expenditure,percentage_of_pellgrant,percentage_of_federalloan,pecentage_of_over25
0,100654,Alabama A & M University,9243.0,21924.0,7699.0,0.6853,0.6552,0.0617
1,100663,University of Alabama at Birmingham,11678.0,26248.0,14264.0,0.3253,0.4401,0.1794
3,100706,University of Alabama in Huntsville,8246.0,24869.0,8156.0,0.2377,0.3578,0.1519
4,100724,Alabama State University,8535.0,21938.0,9459.0,0.7205,0.7637,0.0677
5,100751,The University of Alabama,13731.0,31050.0,8912.0,0.1712,0.3454,0.0735
...,...,...,...,...,...,...,...,...
6027,497000,Remington College-Knoxville,8751.0,27799.0,3428.0,0.6809,0.6079,0.6786
6038,497222,Salon Professional Academy-Elevate Salon Insti...,12789.0,24070.0,5598.0,0.4167,0.5208,0.4348
6039,497268,Arizona College of Nursing-Salt Lake City,6378.0,38403.0,1206.0,0.4286,1.0000,0.5430
6040,497277,Arizona College-Glendale,9305.0,28608.0,1943.0,0.6985,0.6718,0.5496


In [27]:
# File path to save the CSV file

file_list = [
        "test_location_clean",
        "test_type_clean",
        "test_ethnicity_clean",
        "test_major_clean",
        "test_financial_clean"
]

file_path = [
       'data/school_location.csv',
       'data/school_type.csv',
       'data/school_ethnicity.csv',
       'data/school_major.csv',
       'data/school_financial.csv'
]

for i, filename in enumerate(file_list):
    dataframe = globals()[filename]
    file = file_path[i]

    # Save the dataframe as CSV using the specified file path
    dataframe.to_csv(file, index=False)

    print(f"CSV file '{filename}' saved successfully at '{file}'.")

print("All CSV files saved successfully.")

CSV file 'test_location_clean' saved successfully at 'data/school_location.csv'.
CSV file 'test_type_clean' saved successfully at 'data/school_type.csv'.
CSV file 'test_ethnicity_clean' saved successfully at 'data/school_ethnicity.csv'.
CSV file 'test_major_clean' saved successfully at 'data/school_major.csv'.
CSV file 'test_financial_clean' saved successfully at 'data/school_financial.csv'.
All CSV files saved successfully.
