In [9]:
import pandas as pd
import os

In [10]:
absolute_filepath = '/Volumes/Lexar/educational_attainment/'

In [11]:
col_mapper = {'B69AA1970':'1970: 25+ Less than 9th grade',
              'B69AA1980':'1980: 25+ Less than 9th grade',
              'B69AA1990':'1990: 25+ Less than 9th grade',
              'B69AA2000':'2000: 25+ Less than 9th grade',
              'B69AA125':'2008-2012: 25+ Less than 9th grade',
              'B69AA125M':'Margin of error: 2008-2012: 25+ Less than 9th grade',
              'B69AB1970':'1970: 25+ 9th grade to 3 years of college',
              'B69AB1980':'1980: 25+ 9th grade to 3 years of college',
              'B69AB1990':'1990: 25+ 9th grade to some college or associates degree',
              'B69AB2000':'2000: 25+ 9th grade to some college or associates degree',
              'B69AB125':'2008-2012: 25+ 9th grade to some college or associates degree',
              'B69AB125M':'Margin of error: 2008-2012: 25+ 9th grade to some college or associates degree',
              'B69AC1970':'1970: 25+ 4 or more years of college',
              'B69AC1980':'1980: 25+ 4 or more years of college',
              'B69AC1990':'1990: 25+ bachelors degree or higher',
              'B69AC2000':'2000: 25+ bachelors degree or higher',
              'B69AC125':'2008-2012: Persons: 25+ bachelors degree or higher',
              'B69AC125M':'Margin of error: 2008-2012: Persons: 25+ bachelors degree or higher'}

In [12]:
drop_cols = ['1970: 25+ Less than 9th grade', '1980: 25+ Less than 9th grade',
              '1990: 25+ Less than 9th grade','2000: 25+ Less than 9th grade',
              '2008-2012: 25+ Less than 9th grade','Margin of error: 2008-2012: 25+ Less than 9th grade',
              '1970: 25+ 9th grade to 3 years of college','1980: 25+ 9th grade to 3 years of college',
              '1990: 25+ 9th grade to some college or associates degree',
              '2000: 25+ 9th grade to some college or associates degree',
              '2008-2012: 25+ 9th grade to some college or associates degree',
              'Margin of error: 2008-2012: 25+ 9th grade to some college or associates degree',
              '1970: 25+ 4 or more years of college','1980: 25+ 4 or more years of college',
              '1990: 25+ bachelors degree or higher','2000: 25+ bachelors degree or higher',
              '2008-2012: Persons: 25+ bachelors degree or higher',
              'Margin of error: 2008-2012: Persons: 25+ bachelors degree or higher',
             'COUNTYFP', 'COUNTYNH', 'NAME1970', 'NAME1980', 'NAME1990', 'NAME2000', 
             'NAME2012', 'STATEFP', 'STATENH']

In [13]:
# Load educational attainment data into df
if os.path.exists(absolute_filepath + 'nhgis0014_ts_nominal_tract.csv'):
    edu_attainment_df = pd.read_csv(absolute_filepath + 'nhgis0014_ts_nominal_tract.csv', 
                                    encoding = "ISO-8859-1", index_col = 0)
else:
    raise IOError('This file does not exists in this location')

In [14]:
edu_attainment_df.rename(columns = col_mapper, inplace = True)

In [15]:
chi_edu_df = edu_attainment_df[edu_attainment_df['STATE'] == 'Illinois']

In [16]:
# Calculate total population and assign to a column
total_1970 = chi_edu_df.loc[:,('1970: 25+ Less than 9th grade','1970: 25+ 9th grade to 3 years of college','1970: 25+ 4 or more years of college')].sum(axis = 1)
total_1980 = chi_edu_df.loc[:,('1980: 25+ Less than 9th grade','1980: 25+ 9th grade to 3 years of college','1980: 25+ 4 or more years of college')].sum(axis = 1)
total_1990 = chi_edu_df.loc[:,('1990: 25+ Less than 9th grade','1990: 25+ 9th grade to some college or associates degree','1990: 25+ bachelors degree or higher')].sum(axis = 1)
total_2000 = chi_edu_df.loc[:,('2000: 25+ Less than 9th grade','2000: 25+ 9th grade to some college or associates degree','2000: 25+ bachelors degree or higher')].sum(axis = 1)
total_2010 = chi_edu_df.loc[:,('2008-2012: 25+ Less than 9th grade','2008-2012: 25+ 9th grade to some college or associates degree','2008-2012: Persons: 25+ bachelors degree or higher')].sum(axis = 1)
chi_edu_df = chi_edu_df.assign(Total_1970 = total_1970)
chi_edu_df = chi_edu_df.assign(Total_1980 = total_1980)
chi_edu_df = chi_edu_df.assign(Total_1990 = total_1990)
chi_edu_df = chi_edu_df.assign(Total_2000 = total_2000)
chi_edu_df = chi_edu_df.assign(Total_2010 = total_2010)

In [17]:
# Calculate proportion achieving highest level of educational attainment and assign to a column
prop_1970 = chi_edu_df.loc[:,'1970: 25+ 4 or more years of college']/chi_edu_df.loc[:,'Total_1970']
prop_1980 = chi_edu_df.loc[:,'1980: 25+ 4 or more years of college']/chi_edu_df.loc[:,'Total_1980']
prop_1990 = chi_edu_df.loc[:,'1990: 25+ bachelors degree or higher']/chi_edu_df.loc[:,'Total_1990']
prop_2000 = chi_edu_df.loc[:,'2000: 25+ bachelors degree or higher']/chi_edu_df.loc[:,'Total_2000']
prop_2010 = chi_edu_df.loc[:,'2008-2012: Persons: 25+ bachelors degree or higher']/chi_edu_df.loc[:,'Total_2010']
chi_edu_df = chi_edu_df.assign(High_Attainment_1970 = prop_1970)
chi_edu_df = chi_edu_df.assign(High_Attainment_1980 = prop_1980)
chi_edu_df = chi_edu_df.assign(High_Attainment_1990 = prop_1990)
chi_edu_df = chi_edu_df.assign(High_Attainment_2000 = prop_2000)
chi_edu_df = chi_edu_df.assign(High_Attainment_2010 = prop_2010)

In [18]:
chi_edu_df.drop(drop_cols, axis = 1, inplace = True)

In [20]:
chi_edu_df.to_csv("edu_attainment_data_1970_2010.csv", sep = "|")