In [110]:
import os
import requests
import json
import collections

import config

### First get data about members of congress from [ProPublica Congress API](https://projects.propublica.org/api-docs/congress-api/).

In [3]:
def get_congress_api(chamber):
    '''
    chamber: str, either 'house' or 'senate'
    '''
    if chamber not in ('senate', 'house'):
        raise ValueError("chamber can only take the values: 'senate' or 'house'")
    return "https://api.propublica.org/congress/v1/115/{}/members.json".format(chamber)

In [4]:
def get_congress_members(chamber, features=None):
    '''
    chamber: str, either 'house' or 'senate'
    features: tuple, the features to include for each returned member
    '''
    congress_api_key = config.congress_api_key
    headers = {'X-API-Key': congress_api_key}
    
    r = requests.get(get_congress_api(chamber), headers=headers)

    out = r.json()['results'][0]
    if features:
        for i, member in enumerate(out['members']):
            out['members'][i] = {feat: member[feat] for feat in features}
    return out

In [108]:
def filter_members(members, required_features):
    '''
    Remove members that don't have the required_features
    
    members: list, list of dicts where each represents a member of congress
    required_features: list, list of features that should not be None or empty
    '''
    updated_members = []
    missing_feats_cnt = collections.defaultdict(int)
    for member in members:
        flag = True
        for feat in required_features:
            if not member[feat]:
                missing_feats_cnt[feat] += 1
                flag = False
                break
        if flag:
            updated_members.append(member)
    return updated_members, missing_feats_cnt

In [98]:
features = [
    'id', 
    'first_name',
    'last_name',
    'date_of_birth',
    'party',
    'twitter_account',
    'in_office',
    'ideal_point'
]

# remove ideal_point
required_features = features[:-1]

In [99]:
house = get_congress_members('house', features)
senate = get_congress_members('senate', features)

In [102]:
print("First house member:\n{}".format(json.dumps(house['members'][0], indent=2)))
print("First senate member:\n{}".format(json.dumps(senate['members'][0], indent=2)))

First house member:
{
  "id": "A000374",
  "first_name": "Ralph",
  "last_name": "Abraham",
  "date_of_birth": "1954-09-16",
  "party": "R",
  "twitter_account": "RepAbraham",
  "in_office": true,
  "ideal_point": null
}
First senate member:
{
  "id": "A000360",
  "first_name": "Lamar",
  "last_name": "Alexander",
  "date_of_birth": "1940-07-03",
  "party": "R",
  "twitter_account": "SenAlexander",
  "in_office": true,
  "ideal_point": null
}


In [105]:
print('Num members house: {}'.format(house['num_results']))
print('Num members senate: {}'.format(senate['num_results']))

Num members house: 446
Num members senate: 101


In [112]:
house_filter, house_missing_feats = filter_members(house['members'], required_features)
senate_filter, senate_missing_feats = filter_members(senate['members'], required_features)
print("Members house pos-filter: ", len(house_filter))
print("Members senate pos-filter: ", len(senate_filter))

Members house pos-filter:  419
Members senate pos-filter:  97


In [115]:
house_missing_feats

defaultdict(int, {'in_office': 6, 'twitter_account': 21})

### Next get data from political entities using Appendix A of Barbera's paper
Added the @GOP, @HouseGOP, @HillaryClinton and @realDonaldTrump to the list

In [130]:
def get_hand_chosen_accounts(filename):
    with open(filename, 'r') as fin:
        accnts = [account.strip().lstrip('@') for account in fin]
    return accnts

In [131]:
filename = "us_political_entities_twitter.txt"
hand_chosen_accnts = get_hand_chosen_accounts(filename)
print("Number accounts: ", len(hand_chosen_accnts))

Number accounts:  51


Join members of congress twitter accounts with hand selected ones

In [132]:
accounts = [member['twitter_account'] for members in [house_filter, senate_filter] for member in members]
accounts += hand_chosen_accnts

In [133]:
print("Total number accounts: ", len(accounts))

Total number accounts:  567


### Filter political entities twitter accounts

### Get all followers from twitter accounts and filter them

### Get France Political entities twitter accounts

### Get all twitter followers from France entities and filter them