# Figure 3
This file produces the results from "Figure 3. Highest Education Level Attained". 

In [4]:
#Import packages
import pandas as pd
import matplotlib as mpl
import numpy as np
import math
import os
pd.options.mode.chained_assignment = None #gets rid of warning for chained alterations

In [5]:
#set directories
os.chdir("/Users/briangoggin/Dropbox/CP 201A/Fruitvale")
cwd = os.getcwd()
root = cwd #root is directory to raw files

In [6]:
#set input data source
education = root+"/Raw Data/ACS_14_5YR_B15003_with_ann.csv"


In [7]:
#identify Fruitvale Census Tracts
Fruitvale = [6001406100, 6001406201, 6001406202, 6001406300, 6001406500, 6001407101, 6001407102, 6001407200, 6001407300, 6001407400, 6001407500, 6001407600]


In [8]:
#Define function for import and standard ACS cleaning operations
def import_census(file):
    df = pd.read_csv(file, header = 1)
    df['Fruitvale'] = df['Id2'].isin(Fruitvale)
    #the following 5 lines create duplicate observations of Fruitvale to include them in county averages
    df2 = df[df['Fruitvale'] == True]
    df2.drop('Fruitvale', axis = 1)
    df2.is_copy = False
    df2['Fruitvale'] = False
    df = df.append(df2)
    return df

#Define function for proportions MOE
def prop_MOE(numerator, denominator, num_moe, den_moe):
    return ((num_moe**2 - (((numerator/denominator)**2)*(den_moe**2))))**(1/2)/denominator

In [9]:
#Import data. 
ed = import_census(education)

In [10]:
#rename variables
ed['total'] = ed["Estimate; Total:"].astype(int)
ed['total_moe'] = ed["Margin of Error; Total:"].astype(int)
ed['none'] = ed["Estimate; Total: - No schooling completed"].astype(int)
ed['none_moe'] = ed["Margin of Error; Total: - No schooling completed"].astype(int)
ed['prek'] = ed["Estimate; Total: - Nursery school"].astype(int)
ed['prek_moe'] = ed["Margin of Error; Total: - Nursery school"].astype(int)
ed['k'] = ed["Estimate; Total: - Kindergarten"].astype(int)
ed['k_moe'] = ed["Margin of Error; Total: - Kindergarten"].astype(int)
ed['1'] = ed["Estimate; Total: - 1st grade"].astype(int)
ed['1_moe'] = ed["Margin of Error; Total: - 1st grade"].astype(int)
ed['2'] = ed["Estimate; Total: - 2nd grade"].astype(int)
ed['2_moe'] = ed["Margin of Error; Total: - 2nd grade"].astype(int)
ed['3'] = ed["Estimate; Total: - 3rd grade"].astype(int)
ed['3_moe'] = ed["Margin of Error; Total: - 3rd grade"].astype(int)
ed['4'] = ed["Estimate; Total: - 4th grade"].astype(int)
ed['4_moe'] = ed["Margin of Error; Total: - 4th grade"].astype(int)
ed['5'] = ed["Estimate; Total: - 5th grade"].astype(int)
ed['5_moe'] = ed["Margin of Error; Total: - 5th grade"].astype(int)
ed['6'] = ed["Estimate; Total: - 6th grade"].astype(int)
ed['6_moe'] = ed["Margin of Error; Total: - 6th grade"].astype(int)
ed['7'] = ed["Estimate; Total: - 7th grade"].astype(int)
ed['7_moe'] = ed["Margin of Error; Total: - 7th grade"].astype(int)
ed['8'] = ed["Estimate; Total: - 8th grade"].astype(int)
ed['8_moe'] = ed["Margin of Error; Total: - 8th grade"].astype(int)
ed['9'] = ed["Estimate; Total: - 9th grade"].astype(int)
ed['9_moe'] = ed["Margin of Error; Total: - 9th grade"].astype(int)
ed['10'] = ed["Estimate; Total: - 10th grade"].astype(int)
ed['10_moe'] = ed["Margin of Error; Total: - 10th grade"].astype(int)
ed['11'] = ed["Estimate; Total: - 11th grade"].astype(int)
ed['11_moe'] = ed["Margin of Error; Total: - 11th grade"].astype(int)
ed['12'] = ed["Estimate; Total: - 12th grade, no diploma"].astype(int)
ed['12_moe'] = ed["Margin of Error; Total: - 12th grade, no diploma"].astype(int)
ed['high'] = ed["Estimate; Total: - Regular high school diploma"].astype(int)
ed['high_moe'] = ed["Margin of Error; Total: - Regular high school diploma"].astype(int)
ed['ged'] = ed["Estimate; Total: - GED or alternative credential"].astype(int)
ed['ged_moe'] = ed["Margin of Error; Total: - GED or alternative credential"].astype(int)
ed['somec'] = ed["Estimate; Total: - Some college, less than 1 year"].astype(int)
ed['somec_moe'] = ed["Margin of Error; Total: - Some college, less than 1 year"].astype(int)
ed['somec2'] = ed["Estimate; Total: - Some college, 1 or more years, no degree"].astype(int)
ed['somec2_moe'] = ed["Margin of Error; Total: - Some college, 1 or more years, no degree"].astype(int)
ed['somec2'] = ed["Estimate; Total: - Some college, 1 or more years, no degree"].astype(int)
ed['somec2_moe'] = ed["Margin of Error; Total: - Some college, 1 or more years, no degree"].astype(int)
ed['ass'] = ed["Estimate; Total: - Associate's degree"].astype(int)
ed['ass_moe'] = ed["Margin of Error; Total: - Associate's degree"].astype(int)
ed['bachelors'] = ed["Estimate; Total: - Bachelor's degree"].astype(int)
ed['bachelors_moe'] = ed["Margin of Error; Total: - Bachelor's degree"].astype(int)
ed['masters'] = ed["Estimate; Total: - Master's degree"].astype(int)
ed['masters_moe'] = ed["Margin of Error; Total: - Master's degree"].astype(int)
ed['professional'] = ed["Estimate; Total: - Professional school degree"].astype(int)
ed['professional_moe'] = ed["Margin of Error; Total: - Professional school degree"].astype(int)
ed['doctorate'] = ed["Estimate; Total: - Doctorate degree"].astype(int)
ed['doctorate_moe'] = ed["Margin of Error; Total: - Doctorate degree"].astype(int)
ed['at_least'] = ed['bachelors']+ed['masters']+ed['professional']+ed['doctorate']


In [11]:
#list moes to transform
list = ['none_moe', 'prek_moe', 'k_moe', '1_moe', '2_moe', '3_moe',
        '4_moe', '5_moe', '6_moe', '7_moe', '8_moe', '9_moe', '10_moe', 
        '11_moe', '12_moe', 'high_moe', 'ged_moe', 'somec_moe', 
               'somec2_moe', 'ass_moe', 'bachelors_moe', 
              'masters_moe', 'professional_moe', 'doctorate_moe', 
              'total_moe']

#convert MOEs to 95% confidence level
def convert(column):
    return column*(1.96/1.645)

#square columns
def square(column):
    return column**2

for item in list:
    ed[item] = convert(ed[item])
    ed[item+'2']= square(ed[item])

In [12]:
#Highest completion columns
ed['nhigh'] = ed['none'] + ed['prek'] + ed['k'] + ed['1'] + ed['2'] + ed['3'] + ed['4'] + ed['5'] + ed['6'] + ed['7'] + ed['8'] + ed['9'] + ed['10'] + ed['11'] + ed['12']
ed['nhigh_moe'] = (ed['none_moe2'] + ed['prek_moe2'] + ed['k_moe2'] + ed['1_moe2'] + ed['2_moe2'] + ed['3_moe2'] + ed['4_moe2'] + ed['5_moe2'] + ed['6_moe2'] + ed['7_moe2'] + ed['8_moe2'] + ed['9_moe2'] + ed['10_moe2'] + ed['11_moe2'] + ed['12_moe2'])**(1/2) 
ed['nhigh_moe2'] = (ed['nhigh_moe'])**2

ed['highc'] = ed['high'] + ed['ged'] + ed['somec'] + ed['somec2'] 
ed['highc_moe'] = (ed['high_moe2'] + ed['ged_moe2'] + ed['somec_moe2']+ ed['somec2_moe2'])**(1/2) 
ed['highc_moe2'] = (ed['highc_moe'])**2

ed['pro'] = ed['ass']  + ed['professional'] 
ed['pro_moe'] = (ed['ass_moe2'] + ed['professional_moe2'])**(1/2) 
ed['pro_moe2'] = (ed['pro_moe'])**2

ed['edu'] = ed['bachelors'] + ed['masters'] + ed['doctorate'] 
ed['edu_moe'] = (ed['masters_moe2'] + ed['doctorate_moe2'] + ed['bachelors_moe2'] )**(1/2) 
ed['edu_moe2'] = (ed['edu_moe'])**2

In [13]:
#list variables to include in sum
eds = ['total', 'total_moe2', 'nhigh', 'nhigh_moe2', 'highc', 'highc_moe2', 'pro', 'pro_moe2', 
          'edu', 'edu_moe2']

exed = ed.groupby('Fruitvale')[eds].sum()

In [None]:
#Create new MOEs after sum
moe_list = ['total_moe2', 'nhigh_moe2', 'highc_moe2', 'pro_moe2', 'edu_moe2']

def sq_root(column):
    return column**(1/2)

for item in moe_list:
    exed[item.strip('2')] = sq_root(exed[item])

In [None]:
#Create Percentages
pct_list = ['nhigh', 'highc', 'pro', 'edu']

def pct(column):
    return 100*(exed[column]/exed['total'])

for item in pct_list:
    exed['pct_'+item] = pct(item)

In [None]:
#Create New Proportions MOEs
exed['pct_nhigh_moe'] = 100*(prop_MOE(exed['nhigh'], exed['total'], exed['nhigh_moe'], exed['total_moe']))
exed['pct_highc_moe'] = 100*(prop_MOE(exed['highc'], exed['total'], exed['highc_moe'], exed['total_moe']))
exed['pct_pro_moe'] = 100*(prop_MOE(exed['pro'], exed['total'], exed['pro_moe'], exed['total_moe']))
exed['pct_edu_moe'] = 100*(prop_MOE(exed['edu'], exed['total'], exed['edu_moe'], exed['total_moe']))


In [None]:
#list data to include in export
export = ['total', 'nhigh', 'highc', 'pro', 'edu', 'total_moe', 'nhigh_moe', 'highc_moe', 'pro_moe', 'edu_moe',
         'pct_nhigh', 'pct_highc', 'pct_pro', 'pct_edu', 'pct_nhigh_moe', 'pct_highc_moe', 'pct_pro_moe',
         'pct_edu_moe']

exed = exed[export]

#change everything to rounded percents
exed = exed.round(2)

In [None]:
#export data to csv
export_path = cwd+'/Output/python_output/ed_finalproject.csv'
exed.to_csv(export_path)