In [4]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd



In [5]:
data_2018 = pd.read_csv("../Resources/clean2018.csv")
data_2014 = pd.read_csv("../Resources/clean2014.csv")
data_2011 = pd.read_csv("../Resources/clean2011.csv")

In [13]:
# We are focusing on public colleges. Researching the data implies that CONTROL = 1 in 
# this case. Extracting rows where CONTROL = 1.

public_2018 = data_2018.loc[(data_2018['CONTROL'] == 1) & (data_2018['HIGHDEG'] == 2), :]
public_2014 = data_2014.loc[(data_2014['CONTROL'] == 1) & (data_2014['HIGHDEG'] == 2), :]
public_2011 = data_2011.loc[(data_2011['CONTROL'] == 1) & (data_2011['HIGHDEG'] == 2), :]

In [14]:
# Grouping public schools by state

public_2018_state_gb = public_2018.groupby("STABBR")
public_2014_state_gb = public_2014.groupby("STABBR")
public_2011_state_gb = public_2011.groupby("STABBR")

In [15]:
# Extracting the 1 largest colleges for each state by total undergraduate enrollment UG



top1_2018 = public_2018_state_gb.head(1)
top1_2014 = public_2014_state_gb.head(1)
top1_2011 = public_2011_state_gb.head(1)

top1_2018 = top1_2018.sort_values(by='STABBR')
top1_2014 = top1_2014.sort_values(by='STABBR')
top1_2011 = top1_2011.sort_values(by='STABBR')

In [16]:
# Group top1 by state, now we can get average enrollments for each race

top1_2018_state_gb = top1_2018.groupby("STABBR")
top1_2014_state_gb = top1_2014.groupby("STABBR")
top1_2011_state_gb = top1_2011.groupby("STABBR")


In [17]:
top1_2018_averages = top1_2018_state_gb.mean()
top1_2014_averages = top1_2014_state_gb.mean()
top1_2011_averages = top1_2011_state_gb.mean()




In [18]:
diversity_2018_df = top1_2018_averages[["UGDS_WHITE", "UGDS_BLACK", "UGDS_HISP", \
                                  "UGDS_ASIAN", "UGDS_AIAN"]]
diversity_2014_df = top1_2014_averages[["UGDS_WHITE", "UGDS_BLACK", "UGDS_HISP", \
                                  "UGDS_ASIAN", "UGDS_AIAN"]]
diversity_2011_df = top1_2011_averages[["UGDS_WHITE", "UGDS_BLACK", "UGDS_HISP", \
                                  "UGDS_ASIAN", "UGDS_AIAN"]]

diversity_2018_df.columns = [str(col) + '_2018' for col in diversity_2018_df.columns]
diversity_2014_df.columns = [str(col) + '_2014' for col in diversity_2014_df.columns]
diversity_2011_df.columns = [str(col) + '_2011' for col in diversity_2011_df.columns]

In [19]:
merge1 = diversity_2011_df.merge(diversity_2014_df, how='outer', left_index=True, right_index=True)
diversity_all_df = merge1.merge(diversity_2018_df, how='outer', left_index=True, right_index=True)




In [20]:
diversity_all_df["WHITE_change_14_18"] = diversity_all_df["UGDS_WHITE_2018"] - diversity_all_df["UGDS_WHITE_2014"]
diversity_all_df["WHITE_change_11_18"] = diversity_all_df["UGDS_WHITE_2018"] - diversity_all_df["UGDS_WHITE_2011"]
diversity_all_df["WHITE_change_11_14"] = diversity_all_df["UGDS_WHITE_2014"] - diversity_all_df["UGDS_WHITE_2011"]



In [21]:
diversity_all_df["BLACK_change_14_18"] = diversity_all_df["UGDS_BLACK_2018"] - diversity_all_df["UGDS_BLACK_2014"]
diversity_all_df["BLACK_change_11_18"] = diversity_all_df["UGDS_BLACK_2018"] - diversity_all_df["UGDS_BLACK_2011"]
diversity_all_df["BLACK_change_11_14"] = diversity_all_df["UGDS_BLACK_2014"] - diversity_all_df["UGDS_BLACK_2011"]

In [22]:
diversity_all_df["HISP_change_14_18"] = diversity_all_df["UGDS_HISP_2018"] - diversity_all_df["UGDS_HISP_2014"]
diversity_all_df["HISP_change_11_18"] = diversity_all_df["UGDS_HISP_2018"] - diversity_all_df["UGDS_HISP_2011"]
diversity_all_df["HISP_change_11_14"] = diversity_all_df["UGDS_HISP_2014"] - diversity_all_df["UGDS_HISP_2011"]

In [23]:
diversity_all_df["ASIAN_change_14_18"] = diversity_all_df["UGDS_ASIAN_2018"] - diversity_all_df["UGDS_ASIAN_2014"]
diversity_all_df["ASIAN_change_11_18"] = diversity_all_df["UGDS_ASIAN_2018"] - diversity_all_df["UGDS_ASIAN_2011"]
diversity_all_df["ASIAN_change_11_14"] = diversity_all_df["UGDS_ASIAN_2014"] - diversity_all_df["UGDS_ASIAN_2011"]

In [24]:
diversity_all_df["AIAN_change_14_18"] = diversity_all_df["UGDS_AIAN_2018"] - diversity_all_df["UGDS_AIAN_2014"]
diversity_all_df["AIAN_change_11_18"] = diversity_all_df["UGDS_AIAN_2018"] - diversity_all_df["UGDS_AIAN_2011"]
diversity_all_df["AIAN_change_11_14"] = diversity_all_df["UGDS_AIAN_2014"] - diversity_all_df["UGDS_AIAN_2011"]

In [25]:
diversity_all_df

Unnamed: 0_level_0,UGDS_WHITE_2011,UGDS_BLACK_2011,UGDS_HISP_2011,UGDS_ASIAN_2011,UGDS_AIAN_2011,UGDS_WHITE_2014,UGDS_BLACK_2014,UGDS_HISP_2014,UGDS_ASIAN_2014,UGDS_AIAN_2014,...,BLACK_change_11_14,HISP_change_14_18,HISP_change_11_18,HISP_change_11_14,ASIAN_change_14_18,ASIAN_change_11_18,ASIAN_change_11_14,AIAN_change_14_18,AIAN_change_11_18,AIAN_change_11_14
STABBR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AK,0.6429,0.013,0.0455,0.0,0.1234,0.5388,0.0112,0.0427,0.0157,0.1879,...,-0.0018,-0.0295,-0.0323,-0.0028,0.0698,0.0855,0.0157,0.5621,0.6266,0.0645
AL,0.6732,0.3067,0.0051,0.0026,0.0051,0.7255,0.2613,0.0044,0.0025,0.0044,...,-0.0454,0.0195,0.0188,-0.0007,0.0016,0.0015,-0.0001,-0.0003,-0.001,-0.0007
AR,0.855,0.0527,0.0299,0.0081,0.0054,0.8286,0.0514,0.0504,0.0093,0.0037,...,-0.0013,0.0103,0.0308,0.0205,-0.0004,0.0008,0.0012,0.0032,0.0015,-0.0017
AS,0.0019,0.0,0.0005,0.0033,0.0,,,,,,...,,,,,,,,,,
AZ,0.2069,0.0308,0.6124,0.0114,0.0117,0.1793,0.0292,0.6952,0.01,0.0114,...,-0.0016,0.0448,0.1276,0.0828,-0.0006,-0.002,-0.0014,0.0,-0.0003,-0.0003
CA,0.1534,0.2142,0.1548,0.2804,0.0025,0.1512,0.2131,0.218,0.3027,0.0037,...,-0.0011,0.0192,0.0824,0.0632,0.0328,0.0551,0.0223,-0.0027,-0.0015,0.0012
CO,0.6366,0.0224,0.2817,0.0118,0.0109,0.5721,0.0174,0.3617,0.0119,0.0054,...,-0.005,0.0288,0.1088,0.08,0.0006,0.0007,0.0001,0.0001,-0.0054,-0.0055
CT,0.7811,0.0801,0.0572,0.029,0.0015,0.731,0.0973,0.0828,0.0319,0.003,...,0.0172,0.0632,0.0888,0.0256,-0.0089,-0.006,0.0029,-0.001,0.0005,0.0015
DE,0.6769,0.1884,0.0575,0.0176,0.0044,0.6409,0.1796,0.0911,0.0205,0.0039,...,-0.0088,0.0383,0.0719,0.0336,0.01,0.0129,0.0029,0.0002,-0.0003,-0.0005
FL,0.7157,0.1219,0.0944,0.0222,0.0061,0.6742,0.1254,0.1145,0.0198,0.0054,...,0.0035,0.2246,0.2447,0.0201,0.0097,0.0073,-0.0024,-0.0029,-0.0036,-0.0007


In [26]:
diversity_all_df.to_csv("../Resources/diversity_cc_changes.csv")