In [0]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


# Imports

In [0]:
import numpy as np
import pandas as pd
import requests

In [0]:
import os
import json
from copy import deepcopy

In [0]:
logging.basicConfig?

Object `logging.basicConfig` not found.


In [0]:
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s')
logging.getLogger("requests").setLevel(logging.ERROR) # silencing requests logging 

# Logging for this notebook
logger = logging.getLogger()
logger.setLevel(logging.INFO) # set this to whatever you'd like

In [0]:
BASE_URL = 'http://api.open.fec.gov/v1'

In [0]:
API_KEY = open(os.path.expanduser('~/.api-keys/data.gov'),'r').read().strip()

# Utils

In [0]:
def all_results(endpoint, params):
    _params = deepcopy(params)
    _params.update({'api_key': API_KEY})
    _url = BASE_URL+endpoint
    logging.info('querying endpoint: {}'.format(_url))
    
    initial_resp = requests.get(_url, params=_params)
    
    logging.debug('full url eg: {}'.format(initial_resp.url))
            
    initial_data = initial_resp.json()
    
    num_pages = initial_data['pagination']['pages']
    num_records = initial_data['pagination']['count']
    logging.info('{p} pages to be retrieved, with {n} records'.format(
            p=num_pages, n=num_records))
    
    current_page = initial_data['pagination']['page']
    logging.debug('page {} retrieved'.format(current_page))
    
    for record in initial_data['results']:
        yield record
    
    while current_page < num_pages:
        current_page += 1
        _params.update({'page': current_page})
        _data = requests.get(_url, params=_params).json()
        logging.debug('page {} retrieved'.format(current_page))
        for record in _data['results']:
            yield record
            
    logging.info('all pages retrieved')
    
def count_results(endpoint, params):
    _params = deepcopy(params)
    _params.update({'api_key': API_KEY})
    _url = BASE_URL+endpoint
    
    _data = requests.get(_url, params=_params).json()
    
    return _data['pagination']['count']

# Candidates

## API

In [0]:
q_all_2012_candidates = {
    "cycle": 2012,
}

In [0]:
count_results('/candidates', q_all_2012_candidates)

3022

In [0]:
candidates_2012 = [c for c in all_results('/candidates', q_all_2012_candidates)]

INFO:root:querying endpoint: http://api.open.fec.gov/v1/candidates
INFO:root:152 pages to be retrieved, with 3022 records
INFO:root:all pages retrieved


In [0]:
[c for c in candidates_2012 if 'OBAMA' in c['name']]

[{u'active_through': 2012,
  u'candidate_id': u'P80003338',
  u'candidate_status': u'N',
  u'candidate_status_full': u'Not yet a statutory candidate',
  u'cycles': [2008, 2010, 2012],
  u'district': None,
  u'election_years': [2008, 2012],
  u'incumbent_challenge': u'I',
  u'incumbent_challenge_full': u'Incumbent',
  u'name': u'OBAMA, BARACK',
  u'office': u'P',
  u'office_full': u'President',
  u'party': u'DEM',
  u'party_full': u'Democratic Party',
  u'state': u'US'}]

In [0]:
candidates_2012_df = pd.DataFrame(candidates_2012)

In [0]:
candidates_2012_df[candidates_2012_df.name.str.match('romney|obama', case=False)].T

Unnamed: 0,2011,2331
active_through,2012,2012
candidate_id,P80003338,P80003353
candidate_status,N,C
candidate_status_full,Not yet a statutory candidate,Statutory candidate
cycles,"[2008, 2010, 2012]","[2008, 2010, 2012]"
district,,
election_years,"[2008, 2012]","[2008, 2012]"
incumbent_challenge,I,C
incumbent_challenge_full,Incumbent,Challenger
name,"OBAMA, BARACK","ROMNEY, MITT / PAUL D. RYAN"


In [0]:
candidates_2012_df[(candidates_2012_df.office == 'P') & (candidates_2012_df.candidate_status.str.match('C'))].name

104               BACHMANN, MICHELE
141           BARR, ROSEANNE CHERRI
333                 BROWN, HARLEY D
1186                HERMAN, RAPHAEL
1208            HILL, CHRISTOPHER V
1404                   KARGER, FRED
1614                  LINDSAY, PETA
1769           MCCALL, JAMES HATTON
1838                MESPLAY, KENT P
2203        RAKOWITZ, ARTHUR FABIAN
2331    ROMNEY, MITT / PAUL D. RYAN
2697          TERRY, RANDALL A. MR.
2877          WELLS, ROBERT CARR JR
2909      WIFORD, SAMUEL TIMOTHY II
Name: name, dtype: object

## Bulk CSV (cn12.txt)

In [0]:
resp = requests.get('http://www.fec.gov/finance/disclosure/metadata/cn_header_file.csv')

In [0]:
cn_headers = resp.content.strip().split(',')

In [0]:
cn12 = pd.read_csv('_data/cn.txt', sep='|', header=None, names=cn_headers,
                   dtype={'CAND_OFFICE_DISTRICT':np.object, 'CAND_ELECTION_YR':np.object})

In [0]:
cn_headers

['CAND_ID',
 'CAND_NAME',
 'CAND_PTY_AFFILIATION',
 'CAND_ELECTION_YR',
 'CAND_OFFICE_ST',
 'CAND_OFFICE',
 'CAND_OFFICE_DISTRICT',
 'CAND_ICI',
 'CAND_STATUS',
 'CAND_PCC',
 'CAND_ST1',
 'CAND_ST2',
 'CAND_CITY',
 'CAND_ST',
 'CAND_ZIP']

In [0]:
map_fieldnames = {
    'CAND_PTY_AFFILIATION': 'party',
    'CAND_ELECTION_YR': 'cycle',
    'CAND_OFFICE_ST': 'state',
    'CAND_OFFICE': 'office',
    'CAND_OFFICE_DISTRICT': 'district',
    'CAND_ICI': 'incumbent_challenge',
    'CAND_STATUS': 'candidate_status',
    'CAND_NAME': 'name',
    'CAND_ID': 'candidate_id',
    'CAND_PCC': 'principal_committee',
}

In [0]:
cn12_rename = cn12.rename(columns=map_fieldnames)

In [0]:
comparison = cn12_rename.set_index('candidate_id').join(
    candidates_2012_df.set_index('candidate_id'),
    how='left', lsuffix='__CSV', rsuffix='__API')

In [0]:
comparison.office__API.value_counts()

H    2066
P     427
S     423
dtype: int64

In [0]:
comparison.office__CSV[comparison.cycle == '2012'].value_counts()

H    2595
S     481
P     438
dtype: int64

In [0]:
comparison[comparison.office__API.notnull()].shape

(2916, 28)

In [0]:
comparison[(comparison.candidate_status__API.notnull()) & (comparison.candidate_status__API != comparison.candidate_status__CSV)].shape

(1281, 28)

In [0]:
comparison[(comparison.office__API.notnull()) & (comparison.office__API != comparison.office__CSV)].shape

(0, 28)

In [0]:
comparison[comparison.name__API.isnull()].district__CSV.value_counts()

00    689
02    243
01    242
03    218
04    160
05    138
07    114
08    102
06     98
09     65
13     63
10     63
11     46
12     42
17     33
14     33
19     31
20     26
23     24
24     22
16     20
18     20
15     18
25     16
27     14
28     13
21     13
22     12
26     11
29     10
36     10
33      9
32      9
30      9
47      8
37      7
50      6
31      5
52      5
35      4
45      4
34      4
48      4
39      3
46      3
42      3
51      3
53      3
38      2
43      2
41      2
49      1
44      1
40      1
dtype: int64

In [0]:
nonmatches = comparison[comparison.name__API.isnull()]

In [0]:
nonmatches.pivot_table(index='district__CSV',
                       columns='office__CSV',
                       values='name__CSV',
                       aggfunc=np.size)

office__CSV,H,P,S
district__CSV,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,70,181.0,438.0
1,242,,
2,243,,
3,218,,
4,160,,
5,138,,
6,98,,
7,114,,
8,102,,
9,65,,


In [0]:
cn12_rename[cn12_rename.district == "99"]

Unnamed: 0,candidate_id,name,party,cycle,state,office,district,incumbent_challenge,candidate_status,principal_committee,CAND_ST1,CAND_ST2,CAND_CITY,CAND_ST,CAND_ZIP


In [0]:
comparison.pivot_table(index='district__CSV',
                       columns='office__CSV',
                       values='name__CSV',
                       aggfunc=np.size)

office__CSV,H,P,S
district__CSV,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,129,608.0,861.0
1,423,,
2,468,,
3,366,,
4,321,,
5,258,,
6,238,,
7,223,,
8,201,,
9,160,,


# Committees

# 