In [7]:
import os
import pandas as pd
import numpy as np
from urllib.request import urlopen
import json
import matplotlib as plt
%matplotlib inline
import random
import seaborn as sns
sns.set(color_codes=True)
sns.set_context('talk',font_scale=1.2, rc={"lines.linewidth": 2.5})
sns.set_style("whitegrid")
from textblob import TextBlob,Word
import time

##
## each user needs his/her API key. These can be obtained from the New York Times at:
## http://developer.nytimes.com/apps/mykeys


In [8]:
# api keys for NYTIMES

def get_articles(q,begin_date,end_date,page):
    base_url = 'http://api.nytimes.com/svc/search/v2/articlesearch.json?'
    article_key='<your api key>'
    qdict = {}
    if len(q) > 0:
        qdict['q'] = q
    if len(begin_date) > 0:
        qdict['begin_date'] = begin_date
    if len(end_date) > 0:
        qdict['end_date'] = end_date
    if len(page) > 0:
        qdict['page'] = page
    qdict['api-key'] = article_key
    for k,v in qdict.items():
        s = '&' + k + '=' + v
        base_url += s
    time.sleep(1)
    response = urlopen(base_url.replace('?&',"?")).read()
    return(json.loads(response.decode('utf-8')))
        

def get_congress(q,chamber,session):
    congress_key = '<your api key>'
    if q == 'current':
        base_url = 'http://api.nytimes.com/svc/politics/v3/us/legislative/congress/'
        base_url += chamber + '/'
        base_url +=  session + '/members/current.json?api-key=' + congress_key
    time.sleep(1)
    #print(base_url)
    response = urlopen(base_url.replace('?&',"?")).read()
    return(json.loads(response.decode('utf-8')))
    





In [9]:
def parse_articles(articles):
    '''
    This function takes in a response to the NYT api and parses
    the articles into a list of dictionaries
    '''
    news = []
    for i in articles['response']['docs']:
        dic = {}
        dic['id'] = i['_id']
        if i['abstract'] is not None:
            dic['abstract'] = i['abstract']#.encode("utf8")
        dic['headline'] = i['headline']['main']#.encode("utf8")
        dic['desk'] = i['news_desk']
        dic['date'] = i['pub_date'][0:10] # cutting time of day.
        dic['section'] = i['section_name']
        if i['snippet'] is not None:
            dic['snippet'] = i['snippet']#.encode("utf8")
        dic['source'] = i['source']
        dic['type'] = i['type_of_material']
        dic['url'] = i['web_url']
        dic['word_count'] = i['word_count']
        # locations
        locations = []
        for x in range(0,len(i['keywords'])):
            if 'glocations' in i['keywords'][x]['name']:
                locations.append(i['keywords'][x]['value'])
        dic['locations'] = locations
        # subject
        subjects = []
        for x in range(0,len(i['keywords'])):
            if 'subject' in i['keywords'][x]['name']:
                subjects.append(i['keywords'][x]['value'])
        dic['subjects'] = subjects   
        news.append(dic)
    return(news) 


In [10]:
page = 0
articles_list = []
count = 0
while True:
    page += 1
    response = get_articles('gun+control','20151204','20151207',str(page))
    articles = parse_articles(response)
    articles_list += articles
    if (response['response']['meta']['offset'])  > (response['response']['meta']['hits'])+20:
        break
len(articles_list)

68

In [21]:
#articles_list

In [11]:
df = pd.DataFrame(columns = ['abstract', 'date', 'desk', 'headline', 'id', 'locations', 'section',
       'snippet', 'source', 'subjects', 'type', 'url', 'word_count'])
for article in articles_list:
    df = df.append(article,ignore_index=True)


In [12]:
df

Unnamed: 0,abstract,date,desk,headline,id,locations,section,snippet,source,subjects,type,url,word_count
0,Ross Douthat Op-Ed column points out that libe...,2015-12-06,OpEd,Liberalism’s Gun Problem,56633b407988103fe7de52a8,"[France, Australia]",Opinion,Self-righteous reactions to mass shootings won...,The New York Times,"[Gun Control, Liberalism (US Politics), Conser...",Op-Ed,http://www.nytimes.com/2015/12/06/opinion/sund...,817
1,Get recommendations from New York Times report...,2015-12-04,,What We&#8217;re Reading,566228567988100bbf963e9b,[Japan],Blogs,Get recommendations from New York Times report...,The New York Times,"[Gun Control, Muslims and Islam, San Bernardin...",Blog,http://news.blogs.nytimes.com/2015/12/04/what-...,370
2,Vote by New Jersey Assembly to override Gov Ch...,2015-12-04,Metro,Christie’s Veto of Gun-Control Bill Is Upheld ...,5660c01c7988104d062b2892,[New Jersey],N.Y. / Region,Democratic leaders invoked the mass shooting i...,The New York Times,"[Vetoes (US), Gun Control, Mental Health and D...",News,http://www.nytimes.com/2015/12/04/nyregion/chr...,592
3,,2015-12-06,National,Obama Calls for Stronger Gun Laws After Califo...,5662c3c17988100bbf963f83,[United States],U.S.,"President Obama, in his weekly radio address, ...",The New York Times,"[Terrorism, Gun Control, San Bernardino, Calif...",News,http://www.nytimes.com/2015/12/06/us/politics/...,470
4,Nicholas Kristof Op-Ed column warns that real ...,2015-12-06,OpEd,"Hysteria About Refugees, but Blindness on Guns",56633b417988103fe7de52a9,[Syria],Opinion,If politicians want to tackle a threat to our ...,The New York Times,"[Refugees and Displaced Persons, Gun Control, ...",Op-Ed,http://www.nytimes.com/2015/12/06/opinion/sund...,946
5,Gail Collins Op-Ed column laments that Senate ...,2015-12-05,OpEd,The Senate Goes Gaga on Guns,566248747988100bbf963ee7,[],Opinion,"In Washington, it’s always the same story on g...",The New York Times,"[Gun Control, San Bernardino, Calif, Shooting ...",Op-Ed,http://www.nytimes.com/2015/12/05/opinion/the-...,798
6,Timothy Egan Op-Ed column attempts to make sen...,2015-12-05,OpEd,No More Thoughts and Prayers,566228587988100bbf963e9c,[Colorado Springs (Colo)],Opinion,"A day after the carnage, the Senate voted down...",The New York Times,"[Murders, Attempted Murders and Homicides, San...",Op-Ed,http://www.nytimes.com/2015/12/05/opinion/no-m...,837
7,Editorial excoriates Senate Republicans for vo...,2015-12-04,Editorial,Tough Talk and a Cowardly Vote on Terrorism,56614d007988104d062b29ff,[],Opinion,"Faced with another bloody massacre, Republican...",The New York Times,"[Editorials, United States Politics and Govern...",Editorial,http://www.nytimes.com/2015/12/04/opinion/toug...,641
8,Pres Obama addresses nation from Oval Office a...,2015-12-07,National,Obama Says of Terrorist Threat: ‘We Will Overc...,5664dc47798810625e85ff64,[San Bernardino (Calif)],U.S.,"In a prime-time address, President Obama blunt...",The New York Times,"[Terrorism, San Bernardino, Calif, Shooting (2...",News,http://www.nytimes.com/2015/12/07/us/politics/...,1509
9,Jim Dwyer About New York column examines respo...,2015-12-04,Metro,Wanting More Than a Veto and a Prayer After Ma...,5660f3197988104d062b292e,"[New Jersey, San Bernardino (Calif)]",N.Y. / Region,In an era when the United States has had an av...,The New York Times,"[Murders, Attempted Murders and Homicides, Gun...",News,http://www.nytimes.com/2015/12/04/nyregion/wan...,731


In [13]:
congress = get_congress('current','113','house')
congress_list = congress['results']
df_congress = pd.DataFrame()
members_list = congress_list[0]['members']
for member_json in members_list:
    df_congress = df_congress.append(member_json,ignore_index = True)
int_list = list(range(0, df_congress.shape[0]))
for x in int_list:
    df_congress.set_value(x,'twitter_account','@'+df_congress.ix[x]['twitter_account'])

senate = get_congress('current','113','senate')
senate_list = senate['results']
df_senate = pd.DataFrame()
members_list = senate_list[0]['members']
for member_json in members_list:
    df_senate = df_senate.append(member_json,ignore_index = True)
int_list = list(range(0, df_senate.shape[0]))
for x in int_list:
    df_senate.set_value(x,'twitter_account','@'+df_senate.ix[x]['twitter_account'])



In [14]:
twitter_congress = df_congress[['first_name','last_name','twitter_account']]
twitter_senate = df_senate[['first_name','last_name','twitter_account']]

In [15]:
twitter_congress.head(25)

Unnamed: 0,first_name,last_name,twitter_account
0,Robert,Aderholt,@Robert_Aderholt
1,Justin,Amash,@RepJustinAmash
2,Mark,Amodei,@MarkAmodeiNV2
3,Alma,Adams,@RepAdams
4,Spencer,Bachus,@BachusAL06
5,Joe,Barton,@RepJoeBarton
6,Xavier,Becerra,@RepBecerra
7,Sanford,Bishop Jr.,@SanfordBishop
8,Earl,Blumenauer,@BlumenauerMedia
9,John,Boehner,@SpeakerBoehner


In [16]:
twitter_senate.head(25)

Unnamed: 0,first_name,last_name,twitter_account
0,Lamar,Alexander,@SenAlexander
1,Kelly,Ayotte,@KellyAyotte
2,Roy,Blunt,@RoyBlunt
3,Barbara,Boxer,@SenatorBoxer
4,Sherrod,Brown,@SenSherrodBrown
5,Richard,Burr,@SenatorBurr
6,Tammy,Baldwin,@SenatorBaldwin
7,John,Boozman,@JohnBoozman
8,John,Barrasso,@SenJohnBarrasso
9,Mark,Begich,@


In [17]:
voting_record_congress = df_congress[['first_name','last_name','party','state','missed_votes', 'missed_votes_pct','total_votes', 'votes_with_party_pct']]
voting_record_senate = df_senate[['first_name','last_name','party','state','missed_votes', 'missed_votes_pct','total_votes', 'votes_with_party_pct']]


In [18]:
voting_record_congress.head(30)

Unnamed: 0,first_name,last_name,party,state,missed_votes,missed_votes_pct,total_votes,votes_with_party_pct
0,Robert,Aderholt,R,AL,110,9.23,1192,93.99
1,Justin,Amash,R,MI,0,0.0,1192,77.01
2,Mark,Amodei,R,NV,95,7.97,1192,94.17
3,Alma,Adams,D,NC,2,4.17,48,97.83
4,Spencer,Bachus,R,AL,31,2.6,1192,92.94
5,Joe,Barton,R,TX,76,6.38,1192,94.27
6,Xavier,Becerra,D,CA,40,3.36,1192,95.57
7,Sanford,Bishop Jr.,D,GA,74,6.21,1192,85.87
8,Earl,Blumenauer,D,OR,59,4.98,1185,93.52
9,John,Boehner,R,OH,1165,97.73,1192,88.89


In [19]:
voting_record_senate.head(30)

Unnamed: 0,first_name,last_name,party,state,missed_votes,missed_votes_pct,total_votes,votes_with_party_pct
0,Lamar,Alexander,R,TN,44,6.7,657,85.97
1,Kelly,Ayotte,R,NH,10,1.52,657,81.45
2,Roy,Blunt,R,MO,39,5.94,657,89.32
3,Barbara,Boxer,D,CA,44,6.7,657,98.04
4,Sherrod,Brown,D,OH,10,1.52,657,97.84
5,Richard,Burr,R,NC,19,2.89,657,91.38
6,Tammy,Baldwin,D,WI,3,0.46,657,98.62
7,John,Boozman,R,AR,68,10.35,657,92.53
8,John,Barrasso,R,WY,6,0.91,657,91.86
9,Mark,Begich,D,AK,49,7.46,657,95.72
