In [4]:
from datetime import datetime
import json
from pprint import pprint
import sys

sys.path.append('..')

from votesmart import APIHandler

In [5]:
DATA = '../data'
CONFIG = '../config'
DATE = datetime.now()
YEAR = DATE.year

In [6]:
def init_vs():
    with open(f'{CONFIG}/keys.json') as f:
        keys = json.load(f)
    votesmart_api_key = keys['votesmart']['APIKey']
    vs = APIHandler(votesmart_api_key)
    return vs

In [7]:
vs = init_vs()
senators_df = vs.get_current_senators('df')
print(senators_df.head())
print('Saving senator data...')
senators_df.to_csv(f'{DATA}/senators_{YEAR}.csv', index=False)
senator_bios = vs.get_senator_bios()

Getting current senator data...
  candidateId firstName nickName middleName preferredName   lastName suffix  \
0       53273    Dianne                            Dianne  Feinstein          
0      120012    Kamala                  D.        Kamala     Harris          
0       53298     Mitch                             Mitch  McConnell          
0      117285      Rand                              Rand       Paul          
0       23644   Jeffery     Jeff         A.          Jeff    Merkley          

     title        ballotName electionParties  ... officeParties officeStatus  \
0  Senator                                    ...    Democratic       active   
0  Senator  Kamala D. Harris      Democratic  ...    Democratic       active   
0  Senator                                    ...    Republican       active   
0  Senator                                    ...    Republican       active   
0  Senator                                    ...    Democratic       active   

  officeDist

In [8]:
MITCH_ID = '53298'
mitch = senator_bios[MITCH_ID]
mitch.keys()

dict_keys(['bio'])

In [9]:
bio = mitch['bio']
bio.keys()

dict_keys(['generalInfo', 'candidate', 'office'])

### bio > geneneralInfo
Don't need

In [10]:
bio['generalInfo'].keys()

dict_keys(['title', 'linkBack'])

In [11]:
bio['generalInfo']['title']

'Project Vote Smart - Bio -  Mitch McConnell'

In [12]:
bio['generalInfo']['linkBack']

'http://votesmart.org/bio.php?can_id=53298'

### bio > candidate

In [13]:
bio['candidate'].keys()

dict_keys(['candidateId', 'crpId', 'photo', 'firstName', 'nickName', 'middleName', 'preferredName', 'lastName', 'suffix', 'birthDate', 'birthPlace', 'pronunciation', 'gender', 'family', 'homeCity', 'homeState', 'education', 'profession', 'political', 'congMembership', 'orgMembership', 'religion', 'specialMsg'])

In [14]:
for k, v in bio['candidate'].items():
    if type(v) is str:
        print(f'{k}: {v}')
    else:
        print(f'{k}: {type(v)}')

candidateId: 53298
crpId: N00003389
photo: https://static.votesmart.org/canphoto/53298.jpg
firstName: Mitch
nickName: 
middleName: 
preferredName: Mitch
lastName: McConnell
suffix: 
birthDate: 02/20/1942
birthPlace: Sheffield, AL
pronunciation: 
gender: Male
family: Wife: Elaine Chao; 3 Children: Elly, Claire, Porter
homeCity: Louisville
homeState: KY
education: <class 'dict'>
profession: <class 'dict'>
political: <class 'dict'>
congMembership: <class 'dict'>
orgMembership: <class 'dict'>
religion: Baptist
specialMsg: 


#### bio > candidate > education

In [15]:
bio['candidate']['education'].keys()

dict_keys(['institution'])

In [16]:
bio['candidate']['education']['institution']

[{'degree': 'JD',
  'field': '',
  'school': 'University of Kentucky Law School',
  'span': '1967',
  'gpa': '',
  'fullText': 'JD, University of Kentucky Law School, 1967'},
 {'degree': 'BA',
  'field': '',
  'school': 'University of Louisville',
  'span': '1964',
  'gpa': '',
  'fullText': 'BA, University of Louisville, 1964'}]

#### bio > candidate > profession

In [17]:
bio['candidate']['profession'].keys()

dict_keys(['experience'])

In [18]:
bio['candidate']['profession']['experience']

[{'title': 'Judge-Executive',
  'organization': 'Jefferson County',
  'span': '1978-1985',
  'special': '',
  'district': '',
  'fullText': 'Judge-Executive, Jefferson County, 1978-1985'},
 {'title': 'Acting Assistant',
  'organization': 'United States Attorney General',
  'span': '1975',
  'special': '',
  'district': '',
  'fullText': 'Acting Assistant, United States Attorney General, 1975'},
 {'title': 'Deputy Assistant',
  'organization': 'United States Attorney General',
  'span': '1974-1975',
  'special': '',
  'district': '',
  'fullText': 'Deputy Assistant, United States Attorney General, 1974-1975'},
 {'title': 'Chief Legislative Assistant',
  'organization': 'United States Senator Marlow Cook',
  'span': '1968-1970',
  'special': '',
  'district': '',
  'fullText': 'Chief Legislative Assistant, United States Senator Marlow Cook, 1968-1970'},
 {'title': 'Intern',
  'organization': 'Senator John Sherman Cooper',
  'span': '',
  'special': '',
  'district': '',
  'fullText': 'In

#### bio > candidate > political 

In [19]:
bio['candidate']['political'].keys()

dict_keys(['experience'])

In [20]:
bio['candidate']['political']['experience']

[{'title': 'Majority Leader',
  'organization': 'United States Senate',
  'span': '2015-present',
  'special': '',
  'district': '',
  'fullText': 'Majority Leader, United States Senate, 2015-present'},
 {'title': 'Minority Leader',
  'organization': 'United States Senate',
  'span': '2006-2015',
  'special': '',
  'district': '',
  'fullText': 'Minority Leader, United States Senate, 2006-2015'},
 {'title': 'Majority Whip',
  'organization': 'United States Senate',
  'span': '2002-2006',
  'special': '',
  'district': '',
  'fullText': 'Majority Whip, United States Senate, 2002-2006'},
 {'title': 'Senator',
  'organization': 'United States Senate',
  'span': '1985-present',
  'special': '',
  'district': '',
  'fullText': 'Senator, United States Senate, 1985-present'}]

#### bio > candidate > congMembership

In [21]:
bio['candidate']['congMembership'].keys()

dict_keys(['experience'])

In [22]:
bio['candidate']['congMembership']['experience']

[{'title': 'Chair',
  'organization': 'National Republican Senatorial Committee',
  'span': '1998, 2000',
  'special': '',
  'district': '',
  'fullText': 'Chair, National Republican Senatorial Committee, 1998, 2000'},
 {'title': 'Chair',
  'organization': 'Kentucky Task Force on Exploited and Missing Children',
  'span': '1982',
  'special': '',
  'district': '',
  'fullText': 'Chair, Kentucky Task Force on Exploited and Missing Children, 1982'},
 {'title': 'Founder',
  'organization': 'Kentucky Task Force on Exploited and Missing Children',
  'span': '1982',
  'special': '',
  'district': '',
  'fullText': 'Founder, Kentucky Task Force on Exploited and Missing Children, 1982'},
 {'title': 'Member',
  'organization': 'National Child Tragedies Coalition',
  'span': '1981',
  'special': '',
  'district': '',
  'fullText': 'Member, National Child Tragedies Coalition, 1981'},
 {'title': 'Vice Chair',
  'organization': 'Ethics Study Commission',
  'span': '',
  'special': '',
  'district':

#### bio > candidate > orgMembership

In [23]:
bio['candidate']['orgMembership'].keys()

dict_keys(['experience'])

In [24]:
bio['candidate']['orgMembership']['experience']

[{'title': 'Member',
  'organization': 'National Institute of Justice',
  'span': '1982-1984',
  'special': '',
  'district': '',
  'fullText': 'Member, National Institute of Justice, 1982-1984'},
 {'title': 'President',
  'organization': 'Kentucky Association of County Judge/Executives',
  'span': '1982',
  'special': '',
  'district': '',
  'fullText': 'President, Kentucky Association of County Judge/Executives, 1982'},
 {'title': 'Member',
  'organization': 'Kentucky Bar Association',
  'span': '1967',
  'special': '',
  'district': '',
  'fullText': 'Member, Kentucky Bar Association, 1967'},
 {'title': 'President',
  'organization': 'University of Kentucky Student Bar Association',
  'span': '',
  'special': '',
  'district': '',
  'fullText': 'President, University of Kentucky Student Bar Association'},
 {'title': 'President',
  'organization': 'University of Louisville Student Body',
  'span': '',
  'special': '',
  'district': '',
  'fullText': 'President, University of Louisvil

In [25]:
bio['office'].keys()

dict_keys(['name', 'parties', 'title', 'shortTitle', 'type', 'status', 'firstElect', 'lastElect', 'nextElect', 'termStart', 'termEnd', 'district', 'districtId', 'stateId', 'committee'])

In [26]:
for k, v in bio['office'].items():
    if type(v) is str:
        print(f'{k}: {v}')
    else:
        print(f'{k}: {type(v)}')

name: <class 'list'>
parties: Republican
title: Senator
shortTitle: Sen.
type: Congressional
status: active
firstElect: 11/06/1984
lastElect: 11/04/2014
nextElect: 2020
termStart: 01/03/1985
termEnd: 
district: Senior Seat
districtId: 20508
stateId: KY
committee: <class 'list'>


In [27]:
bio['office']['name']

['U.S. Senate', 'U.S. Senate']

In [28]:
bio['office']['committee']

[{'committeeId': '14073',
  'committeeName': 'Agriculture, Nutrition and Forestry'},
 {'committeeId': '25', 'committeeName': 'Appropriations'},
 {'committeeId': '42', 'committeeName': 'Rules and Administration'},
 {'committeeId': '4064',
  'committeeName': 'Subcommittee on Agriculture, Rural Development, Food and Drug Administration, and Related Agencies'},
 {'committeeId': '13907',
  'committeeName': 'Subcommittee on Commodities, Risk Management and Trade'},
 {'committeeId': '4066', 'committeeName': 'Subcommittee on Defense'},
 {'committeeId': '4067',
  'committeeName': 'Subcommittee on Energy nd Water Development'},
 {'committeeId': '4070',
  'committeeName': 'Subcommittee on Interior, Environment, and Related Agencies'},
 {'committeeId': '4073',
  'committeeName': 'Subcommittee on Military Construction, Veterans Affairs, and Related Agencies'},
 {'committeeId': '13911',
  'committeeName': 'Subcommittee on Nutrition, Agricultural Research, and Specialty Crops'},
 {'committeeId': '144

In [29]:
def flatten_other(other, pref):
    out = {}
    ignore = ['fullText']
    exp = other['experience']
    for e in exp:
        for k, v in e.items():
            if k in ignore:
                continue
            k_adj = f'{pref}_{k}'
            if k_adj in out:
                out[k_adj].append(v)
            else:
                out[k_adj] = [v]
    return out

In [30]:
def flatten_education(education, pref):
    out = {}
    ignore = ['fullText']
    institutions = education['institution']
    for inst in institutions:
        for k, v in inst.items():
            if k in ignore:
                continue
            k_adj = f'{pref}_{k}'
            if k_adj in out:
                out[k_adj].append(v)
            else:
                out[k_adj] = [v]
    return out

In [31]:
def flatten_candidate(candidate):
    out = {}
    ignore = ['pronunciation']
    flatteners = {'education': flatten_education}
    prefixes = {'education': 'edu',
                'profession': 'prof',
                'political': 'pol',
                'congMembership': 'cong_mem',
                'orgMembership': 'org_mem'}
    for k, v in candidate.items():
        if k in ignore:
            continue
        if type(v) is str:
            out[k] = v
        else:
            prefix = prefixes[k]
            subsection = flatteners.get(k, flatten_other)(v, pref=prefix)
            out.update(subsection)
    return out

In [32]:
def flatten_office(office):
    out = {}
    ignore = ['shortTitle']
    for k, v in office.items():
        if k in ignore:
            continue
        if type(v) is str:
            k_adj = f'off_{k}'
            out[k_adj] = v
        elif k == 'name':
            out['off_name'] = v
        elif k == 'committee':
            out['committees'] = [d['committeeName'] for d in v]
            out['committee_ids'] = [d['committeeId'] for d in v]
    return out

In [33]:
def senator_to_df(senator_bio):
    out = {}
    candidate = senator_bio['candidate']
    candidate = flatten_candidate(candidate)
    out.update(candidate)
    office = senator_bio['office']
    office = flatten_office(office)
    out.update(office)
    out = {k: str(v) for k, v in out.items()}
    return out
    
mitch_bio = senator_bios[MITCH_ID]['bio']    
test = senator_to_df(mitch_bio)
print('\n\nTest:')
print(test)



Test:
{'candidateId': '53298', 'crpId': 'N00003389', 'photo': 'https://static.votesmart.org/canphoto/53298.jpg', 'firstName': 'Mitch', 'nickName': '', 'middleName': '', 'preferredName': 'Mitch', 'lastName': 'McConnell', 'suffix': '', 'birthDate': '02/20/1942', 'birthPlace': 'Sheffield, AL', 'gender': 'Male', 'family': 'Wife: Elaine Chao; 3 Children: Elly, Claire, Porter', 'homeCity': 'Louisville', 'homeState': 'KY', 'edu_degree': "['JD', 'BA']", 'edu_field': "['', '']", 'edu_school': "['University of Kentucky Law School', 'University of Louisville']", 'edu_span': "['1967', '1964']", 'edu_gpa': "['', '']", 'prof_title': "['Judge-Executive', 'Acting Assistant', 'Deputy Assistant', 'Chief Legislative Assistant', 'Intern']", 'prof_organization': "['Jefferson County', 'United States Attorney General', 'United States Attorney General', 'United States Senator Marlow Cook', 'Senator John Sherman Cooper']", 'prof_span': "['1978-1985', '1975', '1974-1975', '1968-1970', '']", 'prof_special': "[

In [55]:
def get_all_keys_in_dict(dct, prefix='', keys=set()):
    for k in dct:
        if type(dct[k]) in [str, int, float, bool] or dct[k] is None:
            keys.add('%s%s' % (prefix, k))
        else:
            get_all_keys_in_dict(dct[k], prefix='%s.' % k, keys=keys)
    return keys

In [56]:
def get_all_keys_in_list_of_dicts(list_of_dicts):
    keys = set()
    for d in list_of_dicts:
        keys = keys | get_all_keys_in_dict(d)
    return keys

In [148]:
def flatten(json_obj, prefix='', out={}):
    ignore = ['generalInfo', 'pronunciation', 'shortTitle']
    if type(json_obj) is str:
        if prefix in out:
            out[prefix].append(json_obj)
        else:
            out[prefix] = [json_obj]
    else:
        for k, v in json_obj.items():
            if k in ignore:
                continue
            k_adj = f'{prefix}_{k}' if prefix else k
            if type(v) in [str, int, float, bool] or v is None:
                out[k_adj] = v
            elif type(v) is dict:
                flatten(v, k_adj, out)
            elif type(v) is list:
                if type(v[0]) is str:
                    out[k_adj] = v
                else:
                    #print(f'\n{k_adj} list: {v}')
                    for d in v:
                        for lk, lv in d.items():
                            lk_adj = f'{prefix}_{lk}'
                            if lk_adj in out:
                                out[lk_adj].append(lv)
                            else:
                                out[lk_adj] = [lv]
    return out

In [149]:
mitch_flat = flatten(mitch_bio)

In [150]:
pprint(mitch_flat)

{'candidate_birthDate': '02/20/1942',
 'candidate_birthPlace': 'Sheffield, AL',
 'candidate_candidateId': '53298',
 'candidate_congMembership_district': ['', '', '', '', '', '', '', '', ''],
 'candidate_congMembership_fullText': ['Chair, National Republican Senatorial '
                                       'Committee, 1998, 2000',
                                       'Chair, Kentucky Task Force on '
                                       'Exploited and Missing Children, 1982',
                                       'Founder, Kentucky Task Force on '
                                       'Exploited and Missing Children, 1982',
                                       'Member, National Child Tragedies '
                                       'Coalition, 1981',
                                       'Vice Chair, Ethics Study Commission',
                                       'Former Member, Livestock, Marketing, '
                                       'and Agriculture Security Subcom