#### Idea #1 - comparison of segregation in Boston vs Philly (now and prior)

Ways of measuring:

*Compare percent of different geographies (from blocks to ZIP code) that are predominately one race or ethnicity

*The same but white vs minority in general

*The same as either but also considering population of a tract, and distance to close tract with a predominance that is not your own. Classify the tracts

In [453]:
import charts_function_list
import os
base,data,outputs = charts_function_list.folder_setup()
from census import Census
from us import states
key = "3d0b37b61b0d8f27d82ee122b988a9b4b982ea17"
c = Census(key)

Some info on Census and US libraries from 

https://github.com/datamade/census

## Description

#### ACS5 
state(fields, state_fips)


state_county(fields, state_fips, county_fips)


state_county_blockgroup(fields, state_fips, county_fips, blockgroup)


state_county_subdivision(fields, state_fips, county_fips, subdiv_fips)


state_county_tract(fields, state_fips, county_fips, tract)


state_place(fields, state_fips, place)


state_district(fields, state_fips, district)


us(fields)

zipcode(fields, zip5)

#### ACS1

ACS1 Geographies

state(fields, state_fips)

state_district(fields, state_fips, district)
us(fields)


## Datasets
*acs5: ACS 5 Year Estimates (2016, 2015, 2014, 2013, 2012, 2011, 2010)

*acs1dp: ACS 1 Year Estimates, Data Profiles (2016, 2015, 2014, 2013, 2012)

*sf1: Census Summary File 1 (2010, 2000, 1990)

*sf3: Census Summary File 3 (2000, 1990)

In [591]:
class state_things(object):
    def __init__(self,state_name):
        
        temp_df = pd.DataFrame(c.acs5.state('NAME', Census.ALL))
        self.state_name = state_name
        self.state_code = temp_df[temp_df['NAME']==self.state_name]['state'].values[0]
        county_codes = pd.DataFrame(c.acs5.get('NAME', geo={'for': 'county:*',
                       'in': 'state:'+self.state_code}))
        
        self.county_codes = {k:v for k,v in zip(county_codes['county'],county_codes['NAME'])}
        
        
    def search_for_county(self,county_name):
        return self.county_codes[self.county_codes['NAME'].str.contains(county_name)]
    
    
        
    def get_by_tract(self,county_code,additional_fields=None):
        if additional_fields == None:
            tract_df = pd.DataFrame(c.acs5.state_county_tract('NAME', self.state_code,county_code, Census.ALL))
            
        else:
            tract_df = pd.DataFrame(c.acs5.state_county_tract(tuple(['NAME'])+tuple(additional_fields), self.state_code,county_code, Census.ALL))
      
        tract_df['county_map'] = tract_df['county'].map(self.county_codes)
        return tract_df
        
    def get_places(self,county_code,additional_fields=None):
        if additional_fields == None:
            place_df = pd.DataFrame(c.acs5.state_place('NAME',self.state_code, place=Census.ALL))

        else:
            place_df = pd.DataFrame(c.acs5.state_place((tuple(['NAME'])+tuple(additional_fields)),
                                                    self.state_code, place= Census.ALL))
        place_df['county_map'] = place_df['county'].map(self.county_codes)
        return place_df

#find fields in a particular table and return as a dictionary
def fields_in_table(table_number):
    table_list =  list(filter(lambda x: table_number in x, c.acs5.fields().keys()))
    labels = [c.acs5.fields()[x]['label'] for x in table_list]
    return {k:v for k,v in zip(table_list,labels)}    

#create categories
def reformat_race(df):
    race_mapping = {'B03002_001E':'TOTAL',
                       'B03002_003E':'WHITE', #White = White NH
                       'B03002_004E':'BLACK', #Black = Black NH
                       'B03002_005E':'AK_NH',
                       'B03002_006E':'ASIAN_NH',
                       'B03002_007E':'PI_NH',
                       'B03002_008E':'OTHER_NH',
                       'B03002_009E':'MIX_NH',
                       'B03002_012E':'HISP_ALL',
                       'B03002_013E':'WHITE_H',
                       'B03002_014E':'BLACK_H',
                       'B03002_015E':'AK_H',
                       'B03002_016E':'ASIAN_H',
                       'B03002_017E':'PI_H',
                       'B03002_018E':'OTHER_H',
                       'B03002_019E':'MIX_H'}
    df = df.rename(columns=race_mapping)
    df = df[list(race_mapping.values())+['NAME', 'county', 'state', 'tract','county_map']]
    df['AK'] = df['AK_NH']+df['AK_H']
    df['API']= df['ASIAN_H']+df['ASIAN_NH']+df['PI_H']+df['PI_NH']
    df['OTHER_MIX']=df['OTHER_NH']+df['MIX_NH']
    df['HISP'] = df['HISP_ALL']-df['AK_H']-df['PI_H']-df['ASIAN_H']
    def percent_group(column):
        return df[column]/df['TOTAL']
    
    percent_frame = pd.DataFrame({'WHITE_PER':percent_group('WHITE'),
                                 'BLACK_PER':percent_group('BLACK'),
                                 'API_PER':percent_group('API'),
                                 'AK_PER':percent_group('AK'),
                                  'HISP_PER':percent_group('HISP'),
                                  'OTHER_MIX_PER':percent_group('OTHER_MIX')
                                 
                                 })
    
    df = pd.merge(df,percent_frame,left_index=True,right_index=True)
    df['MAX_ETH_PER']= percent_frame.max(axis=1)
    df['MAX_ETH']= percent_frame.idxmax(axis=1)
    
    
    def threshold_percent(threshold):
         return np.where(df[['WHITE_PER','BLACK_PER','API_PER','AK_PER','HISP_PER','OTHER_MIX_PER']].max(axis=1)>=threshold,1,0)
    df['Above 70'],df['Above 80'],df['Above 90']=threshold_percent(.7),threshold_percent(.8),threshold_percent(.9)
    return df

In [592]:
pa = state_things(state_name='Pennsylvania')
ma = state_things(state_name='Massachusetts')
ca = state_things(state_name='California')

In [None]:
#create dictionary to get fields in the Race by Hispanic Origin table
hisp_race_dict = fields_in_table(table_number='B03002')

#take just the keys to generate the tables
fields_we_want = list(hisp_race_dict.keys())

In [593]:
#GET the data

#philadelphia county
philly_county_data = reformat_race(pa.get_by_tract(county_code='101',additional_fields=fields_we_want))

#suffolk county
suffolk_county_data = reformat_race(ma.get_by_tract(county_code='025',additional_fields=fields_we_want))

#LA county
la_county_data = reformat_race(ca.get_by_tract(county_code='037',additional_fields=fields_we_want))


In [603]:
philly_suffolk = pd.concat([philly_county_data,suffolk_county_data])
os.chdir(data)
philly_suffolk.to_csv('philly_suffolk_tract.csv')

In [499]:
print('Philly')
print(format(philly_county_data['Above 90'].sum()/len(philly_county_data),'.2f'))
print('\n'+'Boston')
print(format(suffolk_county_data['Above 90'].sum()/len(suffolk_county_data),'.2f'))

Philly
0.18

Boston
0.07


In [510]:
#percent of population that lives in a tract that is >=x% of one ethnic group
def homogenous_pop(df,threshold_column):
    percent = (df[df[threshold_column]==1]['TOTAL'].sum())/df['TOTAL'].sum()
    return format(percent*100,'.1f')

In [514]:
homogenous_pop(philly_county_data,'Above 90')

'18.4'

In [515]:
homogenous_pop(suffolk_county_data,'Above 90')

'5.4'

In [516]:
homogenous_pop(la_county_data,'Above 90')

'7.9'

# Some extra stuff

In [None]:
#For those who are non-white and Hispanic, which to include within the "hispanic" group? Let's assess

for counties in [reformat_race(la_county_data), reformat_race(philly_county_data),reformat_race(suffolk_county_data)]:
    for item in ['White_H','Black_H','AK_H','Asian_H','PI_H','OTHER_H','MIX_H']:
        print(item+' '+str(format(((counties[item]/counties['HISP_All']).mean())*100,'.1f'))+'%')
    print('------')
    

In [None]:
## extra tutorial stuff

c.acs5.get(('NAME', table_name),
          {'for': 'state:{}'.format(states.MD.fips)},year=2011) #set different years



table_name ='B25034_010E'

#The get method is the core data access method on both the ACS and SF1 data sets. 
#The first parameter is either a single string column or a tuple of columns. 
#The second parameter is a geoemtry dict with a for key and on option in key. 
c.acs5.get(('NAME', table_name),
          {'for': 'state:{}'.format(states.MD.fips)},year=2011) #set different years