In [11]:
import os
import requests
import json
import config

### First get data about members of congress from [ProPublica Congress API](https://projects.propublica.org/api-docs/congress-api/).

In [2]:
def get_congress_api(chamber):
    '''
    chamber: str, either 'house' or 'senate'
    '''
    if chamber not in ('senate', 'house'):
        raise ValueError("chamber can only take the values: 'senate' or 'house'")
    return "https://api.propublica.org/congress/v1/115/{}/members.json".format(chamber)

In [3]:
def get_congress_members(chamber, features=None):
    '''
    chamber: str, either 'house' or 'senate'
    features: tuple, the features to include for each returned member
    '''
    congress_api_key = config.congress_api_key
    headers = {'X-API-Key': congress_api_key}
    
    r = requests.get(get_congress_api(chamber), headers=headers)

    out = r.json()['results'][0]
    if features:
        for i, member in enumerate(out['members']):
            out['members'][i] = {feat: member[feat] for feat in features}
    return out

In [4]:
features = (
        'id', 
        'first_name',
        'last_name',
        'date_of_birth',
        'party',
        'twitter_account',
        'in_office',
        'ideal_point')

In [5]:
house = get_congress_members('house', features)
senate = get_congress_members('senate', features)

In [6]:
print('Num members house: {}'.format(house['num_results']))
print('Num members senate: {}'.format(senate['num_results']))

Num members house: 446
Num members senate: 101


In [7]:
print("First house member:\n{}".format(json.dumps(house['members'][0], indent=2)))
print("First senate member:\n{}".format(json.dumps(senate['members'][0], indent=2)))

First house member:
{
  "first_name": "Ralph",
  "ideal_point": null,
  "last_name": "Abraham",
  "twitter_account": "RepAbraham",
  "in_office": true,
  "date_of_birth": "1954-09-16",
  "id": "A000374",
  "party": "R"
}
First senate member:
{
  "first_name": "Lamar",
  "ideal_point": null,
  "last_name": "Alexander",
  "twitter_account": "SenAlexander",
  "in_office": true,
  "date_of_birth": "1940-07-03",
  "id": "A000360",
  "party": "R"
}


In [8]:
def filter_members(members, required_features):
    '''
    Remove members that don't have the required_features
    
    members: list, list of dicts where each represents a member of congress
    required_features: list, list of features that should not be None or empty
    '''
    updated_members = []
    for member in members:
        for feat in required_features:
            if not member[feat]:
                continue
        updated_members.append(member)
    return updated_members

In [9]:
len(filter_members(house['members'], features))

446

In [10]:
len(filter_members(senate['members'], features))

101

### Next get data from political entities using Appendix A of Barbera's paper
Added the @GOP, @HouseGOP and @HillaryClinton to the list

In [12]:
filename = "us_political_entities_twitter.txt"


In [13]:
with open(filename, 'rb') as fin:
    accounts = [account for account in fin]

In [14]:
print(accounts)

[b'@maddow\n', b'@MotherJones\n', b'@MMFlint\n', b'@dccc (Democratic Congressional Campaign Committee)\n', b'@KeithOlberman\n', b'@current\n', b'@HRC (Human Rights Campaign)\n', b'@OccupyWallSt\n', b'@Obama2012\n', b'@TheDemocrats\n', b'@HouseDemocrats\n', b'@SenateDems\n', b'@thinkprogress\n', b'@democracynow\n', b'@algore\n', b'@msnbc\n', b'@ClintonTweet\n', b'@HillaryClinton\n', b'@JerryBrownGov\n', b'@nytimes\n', b'@congressorg\n', b'@JonHuntsman\n', b'@HealthCaucus\n', b'@Schwarzenegger\n', b'@GovGaryJohnson\n', b'@RonPaul\n', b'@MegWhitman\n', b'@johnboehner\n', b'@timpawlenty\n', b'@newtgingrich\n', b'@NRCC\n', b'@NRSC\n', b'@gopconference\n', b'@SarahPalinUSA\n', b'@MittRomney\n', b'@RickSantorum\n', b'@KarlRove\n', b'@GovernorPerry\n', b'@GOPoversigh\n', b'@FoxNews\n', b'@Senate_GOPs\n', b'@GovMikeHuckabee\n', b'@ConnieMackIV\n', b'@DRUDGE\n', b'@THEHermanCain\n', b'@Heritage\n', b'@limbaugh\n', b'@glennbeck\n', b'@GOP\n', b'@HouseGOP']


In [17]:
#TODO: use twitter api to filter accounts who have a minimum amount of followers and that are active
# also consider adding more accounts to the list