## Application: 2000/2010 Political Campaign Contributions by Race

Using ethnicolr, we look to answer three basic questions:
<ol>
<li>What proportion of contributions were made by blacks, whites, Hispanics, and Asians? 
<li>What proportion of unique contributors were blacks, whites, Hispanics, and Asians?
<li>What proportion of total donations were given by blacks, whites, Hispanics, and Asians?
</ol>

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('/opt/names/fec_contrib/contribDB_2000.csv', nrows=100)
df.columns

Index([u'cycle', u'transaction_id', u'transaction_type', u'amount', u'date',
       u'bonica_cid', u'contributor_name', u'contributor_lname',
       u'contributor_fname', u'contributor_mname', u'contributor_suffix',
       u'contributor_title', u'contributor_ffname', u'contributor_type',
       u'contributor_gender', u'contributor_address', u'contributor_city',
       u'contributor_state', u'contributor_zipcode', u'contributor_occupation',
       u'contributor_employer', u'contributor_category',
       u'contributor_category_order', u'is_corp', u'organization_name',
       u'parent_organization_name', u'recipient_name', u'bonica_rid',
       u'recipient_party', u'recipient_type', u'recipient_state',
       u'recipient_category', u'recipient_category_order',
       u'recipient_district', u'seat', u'election_type',
       u'contributor_cfscore', u'candidate_cfscore', u'latitude', u'longitude',
       u'gis_confidence', u'contributor_district_90s',
       u'contributor_district_00s', u'co

In [3]:
from ethnicolr import census_ln

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


**Load and Subset on Individual Contributors**

In [4]:
df = pd.read_csv('/opt/names/fec_contrib/contribDB_2000.csv', usecols=['amount', 'contributor_type', 'contributor_lname', 'contributor_fname', 'contributor_name'])
sdf = df[df.contributor_type=='I'].copy()
rdf2000 = census_ln(sdf, 'contributor_lname', 2000)
rdf2000['year'] = 2000

df = pd.read_csv('/opt/names/fec_contrib/contribDB_2010.csv.zip', usecols=['amount', 'contributor_type', 'contributor_lname', 'contributor_fname', 'contributor_name'])
sdf = df[df.contributor_type=='I'].copy()
rdf2010 = census_ln(sdf, 'contributor_lname', 2010)
rdf2010['year'] = 2010

rdf = pd.concat([rdf2000, rdf2010])
rdf.head(20)

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,amount,contributor_name,contributor_lname,contributor_fname,contributor_type,pctwhite,pctblack,pctapi,pctaian,pct2prace,pcthispanic,year
0,180.0,"JOHNSON, KENYIE PAUL",johnson,kenyie,I,61.55,33.8,0.42,0.91,1.82,1.50,2000
1,743.0,"KIRSCH, STEVEN T",kirsch,steven,I,97.32,0.16,0.34,0.32,0.63,1.23,2000
2,180.0,"MCCOY, TIMOTHY D",mccoy,timothy,I,69.0,26.39,0.38,0.81,1.73,1.69,2000
3,188.0,"WILLIAMS, VICTOR K",williams,victor,I,48.52,46.72,0.37,0.78,2.01,1.60,2000
4,211.0,"ELDER, CHESTER H",elder,chester,I,80.67,15.68,0.38,0.44,1.48,1.36,2000
5,13000.0,"MACARTHUR, GREG",macarthur,greg,I,92.36,2.32,0.62,1.11,1.83,1.76,2000
6,13972.0,"ABELE, CHRIS",abele,chris,I,97.03,0.58,(S),(S),0.71,1.23,2000
7,15000.0,"PRICE, SOL",price,sol,I,76.11,19.79,0.42,0.64,1.58,1.46,2000
8,13600.0,"KIRSCH, STEVEN T",kirsch,steven,I,97.32,0.16,0.34,0.32,0.63,1.23,2000
9,22146.0,"KIRSCH, STEVEN T",kirsch,steven,I,97.32,0.16,0.34,0.32,0.63,1.23,2000


In [5]:
rdf.replace('(S)', 0, inplace=True)

In [6]:
rdf[['pctwhite', 'pctblack', 'pctapi', 'pctaian', 'pct2prace', 'pcthispanic']] = rdf[['pctwhite', 'pctblack', 'pctapi', 'pctaian', 'pct2prace', 'pcthispanic']].astype(float)

In [8]:
gdf.apply(lambda r: r / r.sum(), axis=1).style.format("{:.2%}")

Unnamed: 0_level_0,hispanic,white,api,black
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000,3.24%,83.49%,2.22%,11.04%
2010,4.32%,82.71%,2.74%,10.22%


###  What proportion of contributons were by blacks, whites, Hispanics, and Asians?

In [9]:
rdf['white'] = rdf.pctwhite / 100.0
rdf['black'] = rdf.pctblack / 100.0
rdf['api'] = rdf.pctapi / 100.0
rdf['hispanic'] = rdf.pcthispanic / 100.0
gdf = rdf.groupby(['year']).agg({'white': 'sum', 'black': 'sum', 'api': 'sum', 'hispanic': 'sum'})
gdf.apply(lambda r: r / r.sum(), axis=1).style.format("{:.2%}")

Unnamed: 0_level_0,hispanic,white,api,black
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000,3.43%,83.53%,2.00%,11.03%
2010,5.49%,80.43%,2.75%,11.33%


### What proportion of the donors were blacks, whites, Hispanics, and Asians?

In [10]:
udf = rdf.drop_duplicates(subset=['contributor_name']).copy()
udf['white'] = udf.pctwhite / 100.0
udf['black'] = udf.pctblack / 100.0
udf['api'] = udf.pctapi / 100.0
udf['hispanic'] = udf.pcthispanic / 100.0
gdf = udf.groupby(['year']).agg({'white': 'sum', 'black': 'sum', 'api': 'sum', 'hispanic': 'sum'})
gdf.apply(lambda r: r / r.sum(), axis=1).style.format("{:.2%}")

Unnamed: 0_level_0,hispanic,white,api,black
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000,3.82%,83.61%,2.41%,10.16%
2010,6.12%,80.43%,3.69%,9.76%


### What proportion of the total donation was given by blacks, whites, Hispanics, and Asians?

In [12]:
rdf['white'] = rdf.amount * rdf.pctwhite / 100.0
rdf['black'] = rdf.amount * rdf.pctblack / 100.0
rdf['api'] = rdf.amount * rdf.pctapi / 100.0
rdf['hispanic'] = rdf.amount * rdf.pcthispanic / 100.0
gdf = rdf.groupby(['year']).agg({'white': 'sum', 'black': 'sum', 'api': 'sum', 'hispanic': 'sum'}) / 10e6
gdf.style.format("{:0.2f}")


Unnamed: 0_level_0,hispanic,white,api,black
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000,5.03,129.64,3.45,17.15
2010,17.06,326.51,10.83,40.35


In [13]:
gdf.apply(lambda r: r / r.sum(), axis=1).style.format("{:.2%}")

Unnamed: 0_level_0,hispanic,white,api,black
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000,3.24%,83.49%,2.22%,11.04%
2010,4.32%,82.71%,2.74%,10.22%
