In [1]:
import numpy
import scipy
import pandas

In [2]:
import matplotlib as mpl
import matplotlib.pyplot as plt

import seaborn as sns
sns.set(style="whitegrid", color_codes=True)

%matplotlib inline

In [3]:
sys.path.append(os.path.join(os.path.abspath('../..'), 'src'))
sys.path.append(os.path.join(os.path.abspath('../..'), 'data'))

In [4]:
import utils

In [5]:
senators = utils.load_election_data('senate', '2014')
reprs = utils.load_election_data('house', '2014')

In [6]:
senators.head()

Unnamed: 0,District,Session,Chamber,Name,Party,Incombant,Won,Votes
0,1,2014,S,Stan White,D,,False,27957.0
1,1,2014,S,Bill Cook,R,,True,32143.0
2,2,2014,S,Carroll Ipock II,D,,False,23925.0
3,2,2014,S,Norman Sanderson,R,,True,36562.0
4,3,2014,S,Erica Smith-Ingram,D,,True,


In [7]:
reprs.head()

Unnamed: 0,District,Session,Chamber,Name,Party,Incombant,Won,Votes
0,1,2014,H,Garry Meiggs,D,,False,10082
1,1,2014,H,Bob Steinburg,R,,True,15713
2,2,2014,H,Ray Jeffers,D,,False,10259
3,2,2014,H,Larry Yarborough,R,,True,13423
4,3,2014,H,Whit Whitley,D,,False,10524


In [8]:
btfc = utils.load_filed_bill_data()

In [9]:
btfc.head()

Unnamed: 0,session,house,bill,content,long_title,table_info,keywords
0,20150000.0,H,1,GENERAL ASSEMBLY OF NORTH CAROLINA FOURTH EXT...,A HOUSE RESOLUTION adopting the permanent rule...,"{'Sponsors': 'Representative Lewis.', 'Referre...","[ADOPTED, GENERAL ASSEMBLY, RESOLUTIONS, SIMPL..."
1,20150000.0,H,2,GENERAL ASSEMBLY OF NORTH CAROLINA FOURTH EXT...,A JOINT RESOLUTIOn providing for adjournment s...,"{'Sponsors': 'Representative Lewis.', 'Referre...","[ADJOURNMENT, GENERAL ASSEMBLY, RESOLUTIONS, J..."
2,20150000.0,H,3,GENERAL ASSEMBLY OF NORTH CAROLINA FOURTH EXT...,AN ACT to provide further REGULATORY RELIEF TO...,{'Sponsors': 'Representatives McGrady and Dixo...,"[ADMINISTRATION DEPT., ADMINISTRATIVE CODE, AD..."
3,20150000.0,H,4,GENERAL ASSEMBLY OF NORTH CAROLINA FOURTH EXT...,AN ACT directing the department of transportat...,"{'Sponsors': 'Representatives Cotham, Bradford...","[BRIDGES, CONTRACTS, COUNTIES, INFRASTRUCTURE,..."
4,20150000.0,H,5,GENERAL ASSEMBLY OF NORTH CAROLINA FOURTH EXT...,AN ACT to clarify the service area for communi...,"{'Sponsors': 'Representative S. Martin.', 'Sho...","[COUNTIES, EDGECOMBE COUNTY, INFORMATION TECHN..."


In [10]:
btfc.shape

(2098, 7)

## Sponsor Names from Text

In [11]:
btfc.table_info[0]['Sponsors']

'Representative Lewis.'

In [12]:
import re

In [13]:
sponsor_split = re.compile(r';|,| and')
known_modifiers = {'By Request', 'Primary   Sponsors', 'Primary Sponsor', 'Primary Sponsors'}

def extract_sponsors(table_info):
    
    def strip_modifiers(s):
        if s.endswith(')'):
            s = s.split('(')[0].strip()
        return(s)
    
    def parse_sponsor_string(sponsors_str, base_token):
    
        if sponsors_str[:(len(base_token) + 1)] == base_token + 's':
            nl = len(base_token) + 1
            sponsors_str = sponsors_str[nl:].strip()
            sponsors_toks = re.split(sponsor_split, sponsors_str)
            sponsors_toks = [s.strip() for s in sponsors_toks]
            sponsors_toks = [s for s in sponsors_toks if s]

        else:
            nl = len(base_token)
            sponsors_str = sponsors_str[nl:].strip()
            sponsors_toks = [sponsors_str]
            
        sponsors_toks = [strip_modifiers(s) for s in sponsors_toks]
        sponsors_toks = [s.replace(' ', '').strip() for s in sponsors_toks]
            
        return(sponsors_toks)
    
    def main(sponsors):
        sponsors = sponsors.strip('.')

        if sponsors[:14] =='Representative':
            base_token = 'Representative'
        elif sponsors[:7] =='Senator':
            base_token = 'Senator'
    
        if base_token:    
            sponsors = parse_sponsor_string(sponsors, base_token)
            
        return(sponsors)
            
    
    sponsors_raw = table_info['Sponsors']
    
    if sponsors_raw.strip()[:9] == 'Committee':
        sponsors = [sponsors_raw.strip()]
    elif sponsors_raw.find('/') > -1:
        sponsors_raw = sponsors_raw.split('/')
        sponsors = []
        for s in sponsors_raw:
            sponsors.extend(main(s))
    else:
        sponsors = main(sponsors_raw)
    
    return(sponsors)

In [14]:
btfc['sponsors'] = btfc.table_info.apply(lambda x: extract_sponsors(x))

In [15]:
def find_modifiers(sponsors_lists):
    """
    Find the set of modifiers to reps / senators names
    with regards to bill sponsorship
    """
    modifiers = []
    for ss in sponsors_lists:
        for s in ss:
            if s.endswith(')'):
                mod = s.split('(')[-1].strip(')')
                modifiers.append(mod)
                if not mod:
                    print(ss)
    modifiers = set(modifiers)
    return(modifiers)

## Match Bill Sponsor Data to Sen, Rep Data

## Counting, Basic Analysis

In [16]:
sponsor_count_data = []

def update_scd(entry):
    h = entry['house']
    b = entry['bill']
    ses = entry['session']
    for s in entry['sponsors']:
        sponsor_count_data.append({'house' : h,
                                   'bill' : b,
                                   'session' : ses,
                                   'name' : s,
                                   'lab' : s + " (" + h + ")"})
        
_ = btfc.apply(lambda x: update_scd(x), axis=1)

sponsor_count_data = pandas.DataFrame(sponsor_count_data)

In [17]:
sponsor_count_data.head()

Unnamed: 0,bill,house,lab,name,session
0,1,H,Lewis (H),Lewis,20150000.0
1,2,H,Lewis (H),Lewis,20150000.0
2,3,H,McGrady (H),McGrady,20150000.0
3,3,H,Dixon (H),Dixon,20150000.0
4,4,H,Cotham (H),Cotham,20150000.0


In [18]:
counts = sponsor_count_data.groupby('lab').apply(len)
counts = counts.sort_values(ascending=False)

In [19]:
import plotly as py
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.tools import FigureFactory as FF
import plotly.graph_objs as pogo
from plotly.graph_objs import Marker, Line, Data

init_notebook_mode(connected=True)

In [20]:
trace0 = pogo.Bar(
    x=list(counts.index),
    y=list(counts.values),
    text=list(counts.index),
    marker=dict(
        color='green',
        ),
)

data = [trace0]
layout = pogo.Layout(
    title='Senators / Reps Bill Sponsorship Counts, 2015-2016',
)

fig = pogo.Figure(data=data, layout=layout)
iplot(fig)

In [27]:
# If has '.', check against FILN; else, check against LastName
lu = {}
for n,h in zip(sponsor_count_data.name, sponsor_count_data.house):
    if h == 'H':
        if n.find('.') > -1:
            ind = numpy.where(house_2016_ele.FILN == n)[0]
        else:
            ind = numpy.where(house_2016_ele.LastName == n)[0]
        if ind.shape[0] == 1:
            ind = ind[0]
        else:
            ind = -1
        lu[n] = ind

AttributeError: 'DataFrame' object has no attribute 'LastName'