In [1]:
import pandas as pd
import yaml

# Import a list of states
from states import states as all_states

# Using github
We'll use the data provided by the excellent database at this repo:

https://github.com/unitedstates/congress-legislators

In [2]:
# Load social data
with open('../../../data/congress-legislators/legislators-social-media.yaml', 'r') as f:
    social = yaml.load_all(f)
    social = list(social)[0]

In [3]:
# Load person data
with open('../../../data/congress-legislators/legislators-current.yaml', 'r') as f:
    people = yaml.load_all(f)
    people = list(people)[0]

In [4]:
df_social = []
for person in social:
    twitter = person['social'].get('twitter', None)
    facebook = person['social'].get('facebook', None)
    bioguide = person['id'].get('bioguide', None)
    govtrack = person['id'].get('govtrack', None)
    df_social.append({'bioguide': bioguide, 'govtrack': govtrack,
                      'twitter': twitter, 'facebook': facebook})
df_social = pd.DataFrame(df_social)

In [5]:
df_people = []
for person in people:
    bioguide = person['id'].get('bioguide', None)
    govtrack = person['id'].get('govtrack', None)
    name = person['name']['official_full']
    party = person['terms'][-1]['party']
    state = person['terms'][-1]['state']
    chamber = person['terms'][-1]['type']
    df_people.append({'bioguide': bioguide, 'govtrack': govtrack, 'name': name,
                      'party': party, 'state': state, 'chamber': chamber})
df_people = pd.DataFrame(df_people)

In [83]:
for df in [df_social, df_people]:
    for ix, row in df.iterrows():
        if row['govtrack'] is not None:
            this_id = row['govtrack']
        elif row['bioguide'] is not None:
            this_id = row['bioguide']
        else:
            raise ValueError('Need one of the two...')
        df.loc[ix, 'id'] = this_id
    df.drop(['govtrack', 'bioguide'], axis=1, inplace=True)

In [101]:
# Save to disk
df_all = pd.merge(df_social, df_people, how='inner', on='id')
df_all['state'] = df_all['state'].map(lambda a: all_states[a])
df_all.to_csv('../data/congressperson_all_info.csv')