# Figure 5
This file produces the results from "Figure 5. Income Distribution of Transit Users". 

In [1]:
#Import packages
import pandas as pd
import matplotlib as mpl
import numpy as np
import math
import os
pd.options.mode.chained_assignment = None #gets rid of warning for chained alterations

In [2]:
#set directories
os.chdir("/Users/briangoggin/Dropbox/CP 201A/Fruitvale")
cwd = os.getcwd()
root = cwd #root is directory to raw files

In [3]:
#set input data source
cmincome1 = root+"/Raw Data/ACS_14_5YR_B08119_with_ann.csv"


In [4]:
#identify Fruitvale Census Tracts
Fruitvale = [6001406100, 6001406201, 6001406202, 6001406300, 6001406500, 6001407101, 6001407102, 6001407200, 6001407300, 6001407400, 6001407500, 6001407600]


In [5]:
#Define function for import and standard ACS cleaning operations
def import_census(file):
    df = pd.read_csv(file, header = 1)
    df['Fruitvale'] = df['Id2'].isin(Fruitvale)
    #the following 5 lines create duplicate observations of Fruitvale to include them in county averages
    df2 = df[df['Fruitvale'] == True]
    df2.drop('Fruitvale', axis = 1)
    df2.is_copy = False
    df2['Fruitvale'] = False
    df = df.append(df2)
    return df

#Define function for proportions MOE
def prop_MOE(numerator, denominator, num_moe, den_moe):
    return ((num_moe**2 - (((numerator/denominator)**2)*(den_moe**2))))**(1/2)/denominator

In [6]:
#Import data
cm1 = import_census(cmincome1)

In [7]:
#rename variables
cm1['total'] = cm1["Estimate; Total: - Public transportation (excluding taxicab):"]
cm1['total_moe'] = cm1["Margin of Error; Total: - Public transportation (excluding taxicab):"]
cm1['10'] = cm1["Estimate; Total: - Public transportation (excluding taxicab): - $1 to $9,999 or loss"]
cm1['10_moe'] = cm1["Margin of Error; Total: - Public transportation (excluding taxicab): - $1 to $9,999 or loss"]
cm1['15'] = cm1["Estimate; Total: - Public transportation (excluding taxicab): - $10,000 to $14,999"]
cm1['15_moe'] = cm1["Margin of Error; Total: - Public transportation (excluding taxicab): - $10,000 to $14,999"]
cm1['25'] = cm1["Estimate; Total: - Public transportation (excluding taxicab): - $15,000 to $24,999"]
cm1['25_moe'] = cm1["Margin of Error; Total: - Public transportation (excluding taxicab): - $15,000 to $24,999"]
cm1['35'] = cm1["Estimate; Total: - Public transportation (excluding taxicab): - $25,000 to $34,999"]
cm1['35_moe'] = cm1["Margin of Error; Total: - Public transportation (excluding taxicab): - $25,000 to $34,999"]
cm1['50'] = cm1["Estimate; Total: - Public transportation (excluding taxicab): - $35,000 to $49,999"]
cm1['50_moe'] = cm1["Margin of Error; Total: - Public transportation (excluding taxicab): - $35,000 to $49,999"]
cm1['65'] = cm1["Estimate; Total: - Public transportation (excluding taxicab): - $50,000 to $64,999"]
cm1['65_moe'] = cm1["Margin of Error; Total: - Public transportation (excluding taxicab): - $50,000 to $64,999"]
cm1['75'] = cm1["Estimate; Total: - Public transportation (excluding taxicab): - $65,000 to $74,999"]
cm1['75_moe'] = cm1["Margin of Error; Total: - Public transportation (excluding taxicab): - $65,000 to $74,999"]
cm1['75plus'] = cm1["Estimate; Total: - Public transportation (excluding taxicab): - $75,000 or more"]
cm1['75plus_moe'] = cm1["Margin of Error; Total: - Public transportation (excluding taxicab): - $75,000 or more"]


In [8]:
#Create MOEs and prepare them for collapsing across observations
moes = ['total_moe', '10_moe', '15_moe', '25_moe', '35_moe', '50_moe', '65_moe', '75_moe', '75plus_moe']

#convert MOEs to 95% confidence level
def convert(column):
    return column*(1.96/1.645)

#square columns
def square(column):
    return column**2

for item in moes:
    cm1[item] = convert(cm1[item])
    cm1[item+'2']= square(cm1[item])

In [9]:
#Create Categories
cm1['0to25'] = cm1['10'] + cm1['15'] + cm1['25'] 
cm1['0to25_moe'] = (cm1['10_moe2'] + cm1['15_moe2'] + cm1['25_moe2'] )**(1/2) 
cm1['0to25_moe2'] = (cm1['0to25_moe'])**2

cm1['_25to50'] = cm1['35'] + cm1['50'] 
cm1['_25to50_moe'] = (cm1['35_moe2'] + cm1['50_moe2'] )**(1/2) 
cm1['_25to50_moe2'] = (cm1['_25to50_moe'])**2

cm1['50to75'] = cm1['65'] + cm1['75'] 
cm1['50to75_moe'] = (cm1['65_moe2'] + cm1['75_moe2'] )**(1/2) 
cm1['50to75_moe2'] = (cm1['50to75_moe'])**2

In [10]:
#list variables to include in sum
cms = ['total', 'total_moe2', '0to25', '0to25_moe2', '_25to50', '_25to50_moe2', '50to75', '50to75_moe2', 
          '75plus', '75plus_moe2']

excm1 = cm1.groupby('Fruitvale')[cms].sum()

In [11]:
#Create new MOEs after sum
moe_list = ['total_moe2', '0to25_moe2', '_25to50_moe2', '50to75_moe2', '75plus_moe2']

def sq_root(column):
    return column**(1/2)

for item in moe_list:
    excm1[item.strip('2')] = sq_root(excm1[item])

In [12]:
#Create Percentages
pct_list = ['0to25', '_25to50', '50to75', '75plus']

def pct(column):
    return 100*(excm1[column]/excm1['total'])

for item in pct_list:
    excm1['pct_'+item] = pct(item)

In [13]:
#Create New Proportions MOEs
excm1['pct_0to25_moe'] = 100*(prop_MOE(excm1['0to25'], excm1['total'], excm1['0to25_moe'], excm1['total_moe']))
excm1['pct__25to50_moe'] = 100*(prop_MOE(excm1['_25to50'], excm1['total'], excm1['_25to50_moe'], excm1['total_moe']))
excm1['pct_50to75_moe'] = 100*(prop_MOE(excm1['50to75'], excm1['total'], excm1['50to75_moe'], excm1['total_moe']))
excm1['pct_75plus_moe'] = 100*(prop_MOE(excm1['75plus'], excm1['total'], excm1['75plus_moe'], excm1['total_moe']))


In [14]:
#list data to include in export
export = ['pct_0to25', 'pct__25to50', 'pct_50to75', 'pct_75plus', 'pct_0to25_moe', 'pct__25to50_moe', 
         'pct_50to75_moe', 'pct_75plus_moe']

excm1 = excm1[export]

#change everything to rounded percents
excm1 = excm1.round(2)

In [15]:
#export data to csv
export_path = cwd+'/Output/python_output/cm1_finalproject.csv'
excm1.to_csv(export_path)