# Final Project Tables
This notebook conducts data analysis for tables and figures that I did not include in previous assignments. For the tables and figures for the final project that I take from other assignments, I use the code from those assignments.

In [72]:
#Import packages
import pandas as pd
import matplotlib as mpl
#import matplotlib.pyplot as plt
import numpy as np
import math
import os
pd.options.mode.chained_assignment = None #gets rid of warning for chained alterations

In [73]:
#set directories
os.chdir("/Users/briangoggin/Dropbox/CP 201A/Fruitvale")
cwd = os.getcwd()
root = cwd #root is directory to raw files

# Section 1. Accessibility Table

In [74]:
#import data
input_data = root+"/Output/AC_26minute_accessibility.csv"
df = pd.read_csv(input_data)

In [75]:
#Drop unneeded variables
list = ['high_jobs_26', 'low_jobs_26', 'middle_jobs_26', 'total_jobs_26', 'block_id']
df = df[list]
df.head()

Unnamed: 0,high_jobs_26,low_jobs_26,middle_jobs_26,total_jobs_26,block_id
0,22,16,7,45,60014001001007
1,18,3,5,26,60014001001008
2,26,15,7,48,60014001001017
3,7,9,6,22,60014001001024
4,26,20,21,67,60014001001026


In [76]:
Fruitvale = [6001406100, 6001406201, 6001406202, 6001406300, 6001406500, 6001407101, 6001407102, 6001407200, 6001407300, 6001407400, 6001407500, 6001407600]
df['tract'] = df['block_id'].astype(str).str[0:10]
df['Fruitvale'] = df['tract'].astype(int).isin(Fruitvale)
df.head()

Unnamed: 0,high_jobs_26,low_jobs_26,middle_jobs_26,total_jobs_26,block_id,tract,Fruitvale
0,22,16,7,45,60014001001007,6001400100,False
1,18,3,5,26,60014001001008,6001400100,False
2,26,15,7,48,60014001001017,6001400100,False
3,7,9,6,22,60014001001024,6001400100,False
4,26,20,21,67,60014001001026,6001400100,False


In [77]:
#add duplicate observations for Fruitvale before collapsing to the average
df2 = df[df['Fruitvale']==True]
df2['Fruitvale'] = False
df = df.append(df2)
df.head()

Unnamed: 0,high_jobs_26,low_jobs_26,middle_jobs_26,total_jobs_26,block_id,tract,Fruitvale
0,22,16,7,45,60014001001007,6001400100,False
1,18,3,5,26,60014001001008,6001400100,False
2,26,15,7,48,60014001001017,6001400100,False
3,7,9,6,22,60014001001024,6001400100,False
4,26,20,21,67,60014001001026,6001400100,False


In [78]:
df[df['block_id'] == 60014063001000]

Unnamed: 0,high_jobs_26,low_jobs_26,middle_jobs_26,total_jobs_26,block_id,tract,Fruitvale
2039,68321,31975,37368,137665,60014063001000,6001406300,True
2039,68321,31975,37368,137665,60014063001000,6001406300,False


In [79]:
varlist =[ 'total_jobs_26','high_jobs_26','middle_jobs_26', 'low_jobs_26']
exdf = df.groupby(['Fruitvale'])[varlist].mean()
exdf.head()

Unnamed: 0_level_0,total_jobs_26,high_jobs_26,middle_jobs_26,low_jobs_26
Fruitvale,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
False,76166.0562,38174.323151,20691.628626,17300.110769
True,129767.801354,64792.469526,35256.735892,29718.604966


In [80]:
#Reorder, Reshape, and export data
exdf = exdf.transpose()
export_path = root+"/Output/python_output"
exdf.to_csv(export_path+"/acc_comparison.csv")

# Section 2. Census Tables

In [101]:
#set data roots
poverty = root+"/Raw Data/ACS_14_5YR_B17001_with_ann.csv"
education = root+"/Raw Data/ACS_14_5YR_B15003_with_ann.csv"
industry = root+"/Raw Data/ACS_14_5YR_B08126_with_ann.csv"
cmincome1 = root+"/Raw Data/ACS_14_5YR_B08119_with_ann.csv"
cmincome2 = root+"/Raw Data/ACS_14_5YR_B08121_with_ann.csv"

#identify Fruitvale Census Tracts
Fruitvale = [6001406100, 6001406201, 6001406202, 6001406300, 6001406500, 6001407101, 6001407102, 6001407200, 6001407300, 6001407400, 6001407500, 6001407600]


In [102]:
#Define function for import and standard ACS cleaning operations
def import_census(file):
    df = pd.read_csv(file, header = 1)
    df['Fruitvale'] = df['Id2'].isin(Fruitvale)
    #the following 5 lines create duplicate observations of Fruitvale to include them in county averages
    df2 = df[df['Fruitvale'] == True]
    df2.drop('Fruitvale', axis = 1)
    df2.is_copy = False
    df2['Fruitvale'] = False
    df = df.append(df2)
    return df

#Define function for proportions MOE
def prop_MOE(numerator, denominator, num_moe, den_moe):
    return ((num_moe**2 - (((numerator/denominator)**2)*(den_moe**2))))**(1/2)/denominator

In [103]:
##########################################
##Section 1: Commute Mode by Income
##########################################

#Import data
cm1 = import_census(cmincome1)
cm2 = import_census(cmincome2)

In [104]:
#rename variables
cm2.shape

(373, 18)

In [105]:
cm2['total'] = cm2["Estimate; Median earnings in the past 12 months -- - Total:"]
cm2['total_moe'] = cm2["Margin of Error; Median earnings in the past 12 months -- - Total:"]
cm2['transit'] = cm2["Estimate; Median earnings in the past 12 months -- - Total: - Public transportation (excluding taxicab)"]
cm2['transit_moe'] = cm2["Margin of Error; Median earnings in the past 12 months -- - Total: - Public transportation (excluding taxicab)"]


In [106]:
#drop observations with missing values
vars = ['total', 'total_moe', 'transit', 'transit_moe']
for item in vars:
    cm2 = cm2[(cm2[item] != '-') & (cm2[item] != '**')]
    
for item in vars:
    cm2[item] = cm2[item].astype(int)

In [107]:
cm2.shape

(365, 22)

In [None]:
cm2[cm2['Fruitvale']==True]

In [109]:
#convert MOEs to 95% confidence interval
cm2['total_moe'] = cm2['total_moe']*(1.96/1.645)
cm2['transit_moe'] = cm2['transit_moe']*(1.96/1.645)

#collapse, taking the average of all of the vars
sumvars = ['total', 'total_moe', 'transit', 'transit_moe']
cm2_sum = cm2.groupby('Fruitvale')[sumvars].mean()

In [110]:
cm2_sum.head()

Unnamed: 0_level_0,total,total_moe,transit,transit_moe
Fruitvale,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
False,48116.790368,10330.452414,57055.1983,39578.728226
True,24016.833333,6423.914894,16118.916667,15770.056738


In [111]:
cm1['total'] = cm1["Estimate; Total: - Public transportation (excluding taxicab):"]
cm1['total_moe'] = cm1["Margin of Error; Total: - Public transportation (excluding taxicab):"]
cm1['10'] = cm1["Estimate; Total: - Public transportation (excluding taxicab): - $1 to $9,999 or loss"]
cm1['10_moe'] = cm1["Margin of Error; Total: - Public transportation (excluding taxicab): - $1 to $9,999 or loss"]
cm1['15'] = cm1["Estimate; Total: - Public transportation (excluding taxicab): - $10,000 to $14,999"]
cm1['15_moe'] = cm1["Margin of Error; Total: - Public transportation (excluding taxicab): - $10,000 to $14,999"]
cm1['25'] = cm1["Estimate; Total: - Public transportation (excluding taxicab): - $15,000 to $24,999"]
cm1['25_moe'] = cm1["Margin of Error; Total: - Public transportation (excluding taxicab): - $15,000 to $24,999"]
cm1['35'] = cm1["Estimate; Total: - Public transportation (excluding taxicab): - $25,000 to $34,999"]
cm1['35_moe'] = cm1["Margin of Error; Total: - Public transportation (excluding taxicab): - $25,000 to $34,999"]
cm1['50'] = cm1["Estimate; Total: - Public transportation (excluding taxicab): - $35,000 to $49,999"]
cm1['50_moe'] = cm1["Margin of Error; Total: - Public transportation (excluding taxicab): - $35,000 to $49,999"]
cm1['65'] = cm1["Estimate; Total: - Public transportation (excluding taxicab): - $50,000 to $64,999"]
cm1['65_moe'] = cm1["Margin of Error; Total: - Public transportation (excluding taxicab): - $50,000 to $64,999"]
cm1['75'] = cm1["Estimate; Total: - Public transportation (excluding taxicab): - $65,000 to $74,999"]
cm1['75_moe'] = cm1["Margin of Error; Total: - Public transportation (excluding taxicab): - $65,000 to $74,999"]
cm1['75plus'] = cm1["Estimate; Total: - Public transportation (excluding taxicab): - $75,000 or more"]
cm1['75plus_moe'] = cm1["Margin of Error; Total: - Public transportation (excluding taxicab): - $75,000 or more"]


In [112]:
moes = ['total_moe', '10_moe', '15_moe', '25_moe', '35_moe', '50_moe', '65_moe', '75_moe', '75plus_moe']

#convert MOEs to 95% confidence level
def convert(column):
    return column*(1.96/1.645)

#square columns
def square(column):
    return column**2

for item in moes:
    cm1[item] = convert(cm1[item])
    cm1[item+'2']= square(cm1[item])
    

In [113]:
#Create Categories
cm1['0to25'] = cm1['10'] + cm1['15'] + cm1['25'] 
cm1['0to25_moe'] = (cm1['10_moe2'] + cm1['15_moe2'] + cm1['25_moe2'] )**(1/2) 
cm1['0to25_moe2'] = (cm1['0to25_moe'])**2

cm1['_25to50'] = cm1['35'] + cm1['50'] 
cm1['_25to50_moe'] = (cm1['35_moe2'] + cm1['50_moe2'] )**(1/2) 
cm1['_25to50_moe2'] = (cm1['_25to50_moe'])**2

cm1['50to75'] = cm1['65'] + cm1['75'] 
cm1['50to75_moe'] = (cm1['65_moe2'] + cm1['75_moe2'] )**(1/2) 
cm1['50to75_moe2'] = (cm1['50to75_moe'])**2

In [114]:
#list variables to include in sum
cms = ['total', 'total_moe2', '0to25', '0to25_moe2', '_25to50', '_25to50_moe2', '50to75', '50to75_moe2', 
          '75plus', '75plus_moe2']

excm1 = cm1.groupby('Fruitvale')[cms].sum()

In [115]:
#Create new MOEs after sum
moe_list = ['total_moe2', '0to25_moe2', '_25to50_moe2', '50to75_moe2', '75plus_moe2']

def sq_root(column):
    return column**(1/2)

for item in moe_list:
    excm1[item.strip('2')] = sq_root(excm1[item])

In [117]:
#Create Percentages
pct_list = ['0to25', '_25to50', '50to75', '75plus']

def pct(column):
    return 100*(excm1[column]/excm1['total'])

for item in pct_list:
    excm1['pct_'+item] = pct(item)

In [119]:
#Create New Proportions MOEs
excm1['pct_0to25_moe'] = 100*(prop_MOE(excm1['0to25'], excm1['total'], excm1['0to25_moe'], excm1['total_moe']))
excm1['pct__25to50_moe'] = 100*(prop_MOE(excm1['_25to50'], excm1['total'], excm1['_25to50_moe'], excm1['total_moe']))
excm1['pct_50to75_moe'] = 100*(prop_MOE(excm1['50to75'], excm1['total'], excm1['50to75_moe'], excm1['total_moe']))
excm1['pct_75plus_moe'] = 100*(prop_MOE(excm1['75plus'], excm1['total'], excm1['75plus_moe'], excm1['total_moe']))


In [121]:
#list data to include in export
export = ['pct_0to25', 'pct__25to50', 'pct_50to75', 'pct_75plus', 'pct_0to25_moe', 'pct__25to50_moe', 
         'pct_50to75_moe', 'pct_75plus_moe']

excm1 = excm1[export]

#change everything to rounded percents
excm1 = excm1.round(2)

In [122]:
#export data to csv
export_path = cwd+'/Output/python_output/cm1_finalproject.csv'
excm1.to_csv(export_path)

In [55]:
##############################
##Section 1: Poverty
##############################

#Import data. 
pov = import_census(poverty)

In [56]:
#rename variables
pov['total'] = pov["Estimate; Total:"].astype(int)
pov['total_moe'] = pov["Margin of Error; Total:"].astype(int)
pov['poor'] = pov["Estimate; Income in the past 12 months below poverty level:"].astype(int)
pov['poor_moe'] = pov["Margin of Error; Income in the past 12 months below poverty level:"].astype(int)

#convert MOEs to 95% confidence interval
pov['total_moe'] = pov['total_moe']*(1.96/1.645)
pov['poor_moe'] = pov['poor_moe']*(1.96/1.645)

#prepare variables for collapse
pov['total_moe2'] = pov['total_moe']**2
pov['poor_moe2'] = pov['poor_moe']**2

In [57]:
#sumdata
sumvars = ['total', 'total_moe2', 'poor', 'poor_moe2']
pov_sum = pov.groupby('Fruitvale')[sumvars].sum()
pov_sum['total_moe'] = pov_sum['total_moe2']**(1/2)
pov_sum['poor_moe'] = pov_sum['poor_moe2']**(1/2)
pov_sum['pct_poor'] = (pov_sum['poor']/pov_sum['total'])
pov_sum['pct_poor_moe'] = prop_MOE(pov_sum['poor'], pov_sum['total'],  pov_sum['poor_moe'], pov_sum['total_moe'])

In [58]:
#final cleaning
pov_sum = pov_sum.round(4)
pov_sum['pct_poor'] = 100*pov_sum['pct_poor']
pov_sum['pct_poor_moe'] = 100*pov_sum['pct_poor_moe']


In [59]:
export_path = cwd+'/Output/python_output/pov_finalproject.csv'
pov_sum.to_csv(export_path)

In [60]:
##############################
##Section 2: Education
##############################

In [19]:
#Import data. 
ed = import_census(education)

In [20]:
#rename variables
ed['total'] = ed["Estimate; Total:"].astype(int)
ed['total_moe'] = ed["Margin of Error; Total:"].astype(int)
ed['none'] = ed["Estimate; Total: - No schooling completed"].astype(int)
ed['none_moe'] = ed["Margin of Error; Total: - No schooling completed"].astype(int)
ed['prek'] = ed["Estimate; Total: - Nursery school"].astype(int)
ed['prek_moe'] = ed["Margin of Error; Total: - Nursery school"].astype(int)
ed['k'] = ed["Estimate; Total: - Kindergarten"].astype(int)
ed['k_moe'] = ed["Margin of Error; Total: - Kindergarten"].astype(int)
ed['1'] = ed["Estimate; Total: - 1st grade"].astype(int)
ed['1_moe'] = ed["Margin of Error; Total: - 1st grade"].astype(int)
ed['2'] = ed["Estimate; Total: - 2nd grade"].astype(int)
ed['2_moe'] = ed["Margin of Error; Total: - 2nd grade"].astype(int)
ed['3'] = ed["Estimate; Total: - 3rd grade"].astype(int)
ed['3_moe'] = ed["Margin of Error; Total: - 3rd grade"].astype(int)
ed['4'] = ed["Estimate; Total: - 4th grade"].astype(int)
ed['4_moe'] = ed["Margin of Error; Total: - 4th grade"].astype(int)
ed['5'] = ed["Estimate; Total: - 5th grade"].astype(int)
ed['5_moe'] = ed["Margin of Error; Total: - 5th grade"].astype(int)
ed['6'] = ed["Estimate; Total: - 6th grade"].astype(int)
ed['6_moe'] = ed["Margin of Error; Total: - 6th grade"].astype(int)
ed['7'] = ed["Estimate; Total: - 7th grade"].astype(int)
ed['7_moe'] = ed["Margin of Error; Total: - 7th grade"].astype(int)
ed['8'] = ed["Estimate; Total: - 8th grade"].astype(int)
ed['8_moe'] = ed["Margin of Error; Total: - 8th grade"].astype(int)
ed['9'] = ed["Estimate; Total: - 9th grade"].astype(int)
ed['9_moe'] = ed["Margin of Error; Total: - 9th grade"].astype(int)
ed['10'] = ed["Estimate; Total: - 10th grade"].astype(int)
ed['10_moe'] = ed["Margin of Error; Total: - 10th grade"].astype(int)
ed['11'] = ed["Estimate; Total: - 11th grade"].astype(int)
ed['11_moe'] = ed["Margin of Error; Total: - 11th grade"].astype(int)
ed['12'] = ed["Estimate; Total: - 12th grade, no diploma"].astype(int)
ed['12_moe'] = ed["Margin of Error; Total: - 12th grade, no diploma"].astype(int)
ed['high'] = ed["Estimate; Total: - Regular high school diploma"].astype(int)
ed['high_moe'] = ed["Margin of Error; Total: - Regular high school diploma"].astype(int)
ed['ged'] = ed["Estimate; Total: - GED or alternative credential"].astype(int)
ed['ged_moe'] = ed["Margin of Error; Total: - GED or alternative credential"].astype(int)
ed['somec'] = ed["Estimate; Total: - Some college, less than 1 year"].astype(int)
ed['somec_moe'] = ed["Margin of Error; Total: - Some college, less than 1 year"].astype(int)
ed['somec2'] = ed["Estimate; Total: - Some college, 1 or more years, no degree"].astype(int)
ed['somec2_moe'] = ed["Margin of Error; Total: - Some college, 1 or more years, no degree"].astype(int)
ed['somec2'] = ed["Estimate; Total: - Some college, 1 or more years, no degree"].astype(int)
ed['somec2_moe'] = ed["Margin of Error; Total: - Some college, 1 or more years, no degree"].astype(int)
ed['ass'] = ed["Estimate; Total: - Associate's degree"].astype(int)
ed['ass_moe'] = ed["Margin of Error; Total: - Associate's degree"].astype(int)
ed['bachelors'] = ed["Estimate; Total: - Bachelor's degree"].astype(int)
ed['bachelors_moe'] = ed["Margin of Error; Total: - Bachelor's degree"].astype(int)
ed['masters'] = ed["Estimate; Total: - Master's degree"].astype(int)
ed['masters_moe'] = ed["Margin of Error; Total: - Master's degree"].astype(int)
ed['professional'] = ed["Estimate; Total: - Professional school degree"].astype(int)
ed['professional_moe'] = ed["Margin of Error; Total: - Professional school degree"].astype(int)
ed['doctorate'] = ed["Estimate; Total: - Doctorate degree"].astype(int)
ed['doctorate_moe'] = ed["Margin of Error; Total: - Doctorate degree"].astype(int)
ed['at_least'] = ed['bachelors']+ed['masters']+ed['professional']+ed['doctorate']


In [21]:
#list moes to transform
list = ['none_moe', 'prek_moe', 'k_moe', '1_moe', '2_moe', '3_moe',
        '4_moe', '5_moe', '6_moe', '7_moe', '8_moe', '9_moe', '10_moe', 
        '11_moe', '12_moe', 'high_moe', 'ged_moe', 'somec_moe', 
               'somec2_moe', 'ass_moe', 'bachelors_moe', 
              'masters_moe', 'professional_moe', 'doctorate_moe', 
              'total_moe']

#convert MOEs to 95% confidence level
def convert(column):
    return column*(1.96/1.645)

#square columns
def square(column):
    return column**2

for item in list:
    ed[item] = convert(ed[item])
    ed[item+'2']= square(ed[item])

In [22]:
#Highest completion columns
ed['nhigh'] = ed['none'] + ed['prek'] + ed['k'] + ed['1'] + ed['2'] + ed['3'] + ed['4'] + ed['5'] + ed['6'] + ed['7'] + ed['8'] + ed['9'] + ed['10'] + ed['11'] + ed['12']
ed['nhigh_moe'] = (ed['none_moe2'] + ed['prek_moe2'] + ed['k_moe2'] + ed['1_moe2'] + ed['2_moe2'] + ed['3_moe2'] + ed['4_moe2'] + ed['5_moe2'] + ed['6_moe2'] + ed['7_moe2'] + ed['8_moe2'] + ed['9_moe2'] + ed['10_moe2'] + ed['11_moe2'] + ed['12_moe2'])**(1/2) 
ed['nhigh_moe2'] = (ed['nhigh_moe'])**2

ed['highc'] = ed['high'] + ed['ged'] + ed['somec'] + ed['somec2'] 
ed['highc_moe'] = (ed['high_moe2'] + ed['ged_moe2'] + ed['somec_moe2']+ ed['somec2_moe2'])**(1/2) 
ed['highc_moe2'] = (ed['highc_moe'])**2

ed['pro'] = ed['ass']  + ed['professional'] 
ed['pro_moe'] = (ed['ass_moe2'] + ed['professional_moe2'])**(1/2) 
ed['pro_moe2'] = (ed['pro_moe'])**2

ed['edu'] = ed['bachelors'] + ed['masters'] + ed['doctorate'] 
ed['edu_moe'] = (ed['masters_moe2'] + ed['doctorate_moe2'] + ed['bachelors_moe2'] )**(1/2) 
ed['edu_moe2'] = (ed['edu_moe'])**2

In [67]:
#test code to check summation
ed[ed['Id2'] == 6001406100][['high' , 'ged', 'somec','somec2',  'ass','bachelors', 'masters', 'professional', 'doctorate', 'highc']]

Unnamed: 0,high,ged,somec,somec2,ass,bachelors,masters,professional,doctorate,highc
62,367,19,126,455,127,484,276,26,36,967
62,367,19,126,455,127,484,276,26,36,967


In [23]:
#list variables to include in sum
eds = ['total', 'total_moe2', 'nhigh', 'nhigh_moe2', 'highc', 'highc_moe2', 'pro', 'pro_moe2', 
          'edu', 'edu_moe2']

exed = ed.groupby('Fruitvale')[eds].sum()

In [24]:
#Create new MOEs after sum
moe_list = ['total_moe2', 'nhigh_moe2', 'highc_moe2', 'pro_moe2', 'edu_moe2']

def sq_root(column):
    return column**(1/2)

for item in moe_list:
    exed[item.strip('2')] = sq_root(exed[item])

In [25]:
#Create Percentages
pct_list = ['nhigh', 'highc', 'pro', 'edu']

def pct(column):
    return 100*(exed[column]/exed['total'])

for item in pct_list:
    exed['pct_'+item] = pct(item)

In [26]:
#Create New Proportions MOEs
exed['pct_nhigh_moe'] = 100*(prop_MOE(exed['nhigh'], exed['total'], exed['nhigh_moe'], exed['total_moe']))
exed['pct_highc_moe'] = 100*(prop_MOE(exed['highc'], exed['total'], exed['highc_moe'], exed['total_moe']))
exed['pct_pro_moe'] = 100*(prop_MOE(exed['pro'], exed['total'], exed['pro_moe'], exed['total_moe']))
exed['pct_edu_moe'] = 100*(prop_MOE(exed['edu'], exed['total'], exed['edu_moe'], exed['total_moe']))


In [27]:
#list data to include in export
export = ['total', 'nhigh', 'highc', 'pro', 'edu', 'total_moe', 'nhigh_moe', 'highc_moe', 'pro_moe', 'edu_moe',
         'pct_nhigh', 'pct_highc', 'pct_pro', 'pct_edu', 'pct_nhigh_moe', 'pct_highc_moe', 'pct_pro_moe',
         'pct_edu_moe']

exed = exed[export]

#change everything to rounded percents
exed = exed.round(2)

In [28]:
#export data to csv
export_path = cwd+'/Output/python_output/ed_finalproject.csv'
exed.to_csv(export_path)

In [74]:
##############################
##Section 3: Industry
##############################
#Import data. 
ind = import_census(industry)

In [None]:
ind.head()

In [76]:
#rename variables
ind['total'] = ind["Estimate; Total:"].astype(int)
ind['total_moe'] = ind["Margin of Error; Total:"].astype(int)
ind['ag'] = ind["Estimate; Total: - Agriculture, forestry, fishing and hunting, and mining"].astype(int)
ind['ag_moe'] = ind["Margin of Error; Total: - Agriculture, forestry, fishing and hunting, and mining"].astype(int)
ind['const'] = ind["Estimate; Total: - Construction"].astype(int)
ind['const_moe'] = ind["Margin of Error; Total: - Construction"].astype(int)
ind['manu'] = ind["Estimate; Total: - Manufacturing"].astype(int)
ind['manu_moe'] = ind["Margin of Error; Total: - Manufacturing"].astype(int)
ind['wholesale'] = ind["Estimate; Total: - Wholesale trade"].astype(int)
ind['wholesale_moe'] = ind["Margin of Error; Total: - Wholesale trade"].astype(int)
ind['retail'] = ind["Estimate; Total: - Retail trade"].astype(int)
ind['retail_moe'] = ind["Margin of Error; Total: - Retail trade"].astype(int)
ind['trans'] = ind["Estimate; Total: - Transportation and warehousing, and utilities"].astype(int)
ind['trans_moe'] = ind["Margin of Error; Total: - Transportation and warehousing, and utilities"].astype(int)
ind['info'] = ind["Estimate; Total: - Information"].astype(int)
ind['info_moe'] = ind["Margin of Error; Total: - Information"].astype(int)
ind['fin'] = ind["Estimate; Total: - Finance and insurance, and real estate and rental and leasing"].astype(int)
ind['fin_moe'] = ind["Margin of Error; Total: - Finance and insurance, and real estate and rental and leasing"].astype(int)
ind['pro'] = ind["Estimate; Total: - Professional, scientific, and management, and administrative and waste management services"].astype(int)
ind['pro_moe'] = ind["Margin of Error; Total: - Professional, scientific, and management, and administrative and waste management services"].astype(int)
ind['ed'] = ind["Estimate; Total: - Educational services, and health care and social assistance"].astype(int)
ind['ed_moe'] = ind["Margin of Error; Total: - Educational services, and health care and social assistance"].astype(int)
ind['art'] = ind["Estimate; Total: - Arts, entertainment, and recreation, and accommodation and food services"].astype(int)
ind['art_moe'] = ind["Margin of Error; Total: - Arts, entertainment, and recreation, and accommodation and food services"].astype(int)
ind['oth'] = ind["Estimate; Total: - Other services (except public administration)"].astype(int)
ind['oth_moe'] = ind["Margin of Error; Total: - Other services (except public administration)"].astype(int)
ind['pub'] = ind["Estimate; Total: - Public administration"].astype(int)
ind['pub_moe'] = ind["Margin of Error; Total: - Public administration"].astype(int)
ind['arm'] = ind["Estimate; Total: - Armed forces"].astype(int)
ind['arm_moe'] = ind["Margin of Error; Total: - Armed forces"].astype(int)


In [77]:
#list moes to transform
list = ['total_moe', 'ag_moe', 'const_moe', 
               'manu_moe', 'wholesale_moe', 'retail_moe', 
              'trans_moe', 'info_moe', 'fin_moe', 
              'pro_moe', 'ed_moe', 'art_moe', 'oth_moe', 
               'pub_moe', 'arm_moe']


#convert MOEs to 95% confidence level
def convert(column):
    return column*(1.96/1.645)

#square columns
def square(column):
    return column**2

for item in list:
    ind[item] = convert(ind[item])
    ind[item+'2']= square(ind[item])

In [78]:
#list values to collapse
inds = ['total', 'total_moe2','ag', 'ag_moe2', 'const', 'const_moe2', 
               'manu', 'manu_moe2', 'wholesale', 'wholesale_moe2', 'retail', 'retail_moe2', 
              'trans', 'trans_moe2', 'info', 'info_moe2', 'fin', 'fin_moe2', 
              'pro', 'pro_moe2', 'ed', 'ed_moe2', 'art', 'art_moe2', 'oth', 'oth_moe2', 
               'pub', 'pub_moe2', 'arm', 'arm_moe2']

exind = ind.groupby('Fruitvale')[inds].sum()

In [79]:
#Create new MOEs after sum
moe_list = ['total_moe2','ag_moe2', 'const_moe2', 
              'manu_moe2',  'wholesale_moe2', 'retail_moe2', 
              'trans_moe2', 'info_moe2', 'fin_moe2', 
              'pro_moe2',  'ed_moe2',  'art_moe2', 'oth_moe2', 
               'pub_moe2', 'arm_moe2']

def sq_root(column):
    return column**(1/2)

for item in moe_list:
    exind[item.strip('2')] = sq_root(exind[item])

In [80]:
#Create Percentages
pct_list = ['ag', 'const', 
               'manu',  'wholesale',  'retail', 
              'trans', 'info',  'fin', 
              'pro',  'ed',  'art', 'oth', 
               'pub', 'arm']

def pct(column):
    return 100*(exind[column]/exind['total'])

for item in pct_list:
    exind['pct_'+item] = pct(item)

In [81]:
#Create New Proportions MOEs
for item in pct_list:
    exind['pct_'+item+'_moe'] = 100*(prop_MOE(exind[item], exind['total'], exind[item+'_moe'], exind['total_moe']))


In [82]:
exind.head()

Unnamed: 0_level_0,total,total_moe2,ag,ag_moe2,const,const_moe2,manu,manu_moe2,wholesale,wholesale_moe2,...,pct_retail_moe,pct_trans_moe,pct_info_moe,pct_fin_moe,pct_pro_moe,pct_ed_moe,pct_art_moe,pct_oth_moe,pct_pub_moe,pct_arm_moe
Fruitvale,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
False,701525,33993270.0,2669,276392.474423,34196,2550498.0,73742,5000255.0,19966,1455984.0,...,0.317748,0.227174,0.164647,0.238324,0.365931,0.416676,0.335296,0.231793,0.187305,0.064199
True,22470,1883471.0,150,9768.590312,2725,325760.7,1652,162055.5,532,36135.69,...,1.761568,0.978333,0.589604,1.132159,1.882084,2.310414,2.473065,1.509837,0.754995,0.246603


In [83]:
#list data to include in export
export = ['pct_ag', 'pct_const', 
               'pct_manu', 'pct_trans', 'pct_wholesale',  'pct_retail',  
                'pct_info', 'pct_fin', 
              'pct_pro',  'pct_ed',  'pct_art',  'pct_oth',  
               'pct_pub',  'pct_arm', 'pct_ag_moe', 'pct_const_moe', 'pct_manu_moe',
             'pct_trans_moe', 'pct_wholesale_moe', 'pct_retail_moe', 'pct_info_moe', 
          'pct_fin_moe', 'pct_pro_moe', 'pct_ed_moe', 'pct_art_moe', 'pct_oth_moe', 
          'pct_pub_moe', 'pct_arm_moe', 'ag', 'const', 
               'manu',  'wholesale',  'retail', 
              'trans', 'info',  'fin', 
              'pro',  'ed',  'art', 'oth', 
               'pub', 'arm',]

exind = exind[export]

#change everything to rounded percents
exind = exind.round(2)

In [84]:
#export data to csv
export_path = cwd+'/Output/python_output/id_finalproject.csv'
exind.to_csv(export_path)