In [1]:
import numpy
import scipy
import pandas

In [2]:
import matplotlib as mpl
import matplotlib.pyplot as plt

import seaborn as sns
sns.set(style="whitegrid", color_codes=True)

%matplotlib inline

In [3]:
sys.path.append(os.path.join(os.path.abspath('../..'), 'src'))
sys.path.append(os.path.join(os.path.abspath('../..'), 'data'))

In [4]:
from importlib import reload

In [5]:
import utils

In [110]:
reload(utils)

<module 'utils' from '/home/immersinn/gits/ncga/src/utils.py'>

In [113]:
all_reps = utils.load_repr_data('2014')

In [114]:
all_reps.shape

(182, 6)

In [115]:
all_reps.head()

Unnamed: 0,District,Session,Chamber,Name,Party,Incombant
0,1,2014,S,Bill Cook,R,
1,2,2014,S,Norman Sanderson,R,
2,3,2014,S,Erica Smith-Ingram,D,
3,4,2014,S,Angela Bryant,D,
4,5,2014,S,Donald Davis,D,


In [98]:
btfc = utils.load_filed_bill_data()

In [99]:
btfc.head()

Unnamed: 0,session,house,bill,content,long_title,table_info,keywords
0,20150000.0,H,1,GENERAL ASSEMBLY OF NORTH CAROLINA FOURTH EXT...,A HOUSE RESOLUTION adopting the permanent rule...,"{'Sponsors': 'Representative Lewis.', 'Referre...","[ADOPTED, GENERAL ASSEMBLY, RESOLUTIONS, SIMPL..."
1,20150000.0,H,2,GENERAL ASSEMBLY OF NORTH CAROLINA FOURTH EXT...,A JOINT RESOLUTIOn providing for adjournment s...,"{'Sponsors': 'Representative Lewis.', 'Referre...","[ADJOURNMENT, GENERAL ASSEMBLY, RESOLUTIONS, J..."
2,20150000.0,H,3,GENERAL ASSEMBLY OF NORTH CAROLINA FOURTH EXT...,AN ACT to provide further REGULATORY RELIEF TO...,{'Sponsors': 'Representatives McGrady and Dixo...,"[ADMINISTRATION DEPT., ADMINISTRATIVE CODE, AD..."
3,20150000.0,H,4,GENERAL ASSEMBLY OF NORTH CAROLINA FOURTH EXT...,AN ACT directing the department of transportat...,"{'Sponsors': 'Representatives Cotham, Bradford...","[BRIDGES, CONTRACTS, COUNTIES, INFRASTRUCTURE,..."
4,20150000.0,H,5,GENERAL ASSEMBLY OF NORTH CAROLINA FOURTH EXT...,AN ACT to clarify the service area for communi...,"{'Sponsors': 'Representative S. Martin.', 'Ref...","[COUNTIES, EDGECOMBE COUNTY, INFORMATION TECHN..."


In [100]:
btfc.shape

(2098, 7)

## Sponsor Names from Bill Info

In [60]:
import re

In [61]:
sponsor_split = re.compile(r';|,| and')
known_modifiers = {'By Request', 'Primary   Sponsors', 'Primary Sponsor', 'Primary Sponsors'}

def extract_sponsors(table_info):
    
    def strip_modifiers(s):
        if s.endswith(')'):
            s = s.split('(')[0].strip()
        return(s)
    
    def parse_sponsor_string(sponsors_str, base_token):
    
        if sponsors_str[:(len(base_token) + 1)] == base_token + 's':
            nl = len(base_token) + 1
            sponsors_str = sponsors_str[nl:].strip()
            sponsors_toks = re.split(sponsor_split, sponsors_str)
            sponsors_toks = [s.strip() for s in sponsors_toks]
            sponsors_toks = [s for s in sponsors_toks if s]

        else:
            nl = len(base_token)
            sponsors_str = sponsors_str[nl:].strip()
            sponsors_toks = [sponsors_str]
            
        sponsors_toks = [strip_modifiers(s) for s in sponsors_toks]
        sponsors_toks = [s.replace(' ', '').strip() for s in sponsors_toks]
            
        return(sponsors_toks)
    
    def main(sponsors):
        sponsors = sponsors.strip('.')

        if sponsors[:14] =='Representative':
            base_token = 'Representative'
        elif sponsors[:7] =='Senator':
            base_token = 'Senator'
    
        if base_token:    
            sponsors = parse_sponsor_string(sponsors, base_token)
            
        return(sponsors)
            
    
    sponsors_raw = table_info['Sponsors']
    
    if sponsors_raw.strip()[:9] == 'Committee':
        sponsors = [sponsors_raw.strip()]
    elif sponsors_raw.find('/') > -1:
        sponsors_raw = sponsors_raw.split('/')
        sponsors = []
        for s in sponsors_raw:
            sponsors.extend(main(s))
    else:
        sponsors = main(sponsors_raw)
    
    return(sponsors)

In [101]:
btfc['sponsors'] = btfc.table_info.apply(lambda x: extract_sponsors(x))

In [102]:
sponsors_info = btfc.apply(lambda x: [{'bill_ix' : x.name,
                                       'name' : s,
                                       'chamber' : x.house} for s in x.sponsors], axis=1)
sponsors_info = pandas.DataFrame([s for l in sponsors_info for s in l])

## Match Bill Sponsor Data to Sen, Rep Data

In [103]:
name_suffix_list = ['Jr', 'Sr', 'II', 'III', 'IV']

def get_last_name(full_name):
    name_parts = [p.strip() for p in full_name.split()]
    name_parts = [p for p in name_parts if p]
    
    last = ''
    if name_parts[-1].strip('.') not in name_suffix_list:
        last = name_parts[-1]
    else:
        last = name_parts[-2].strip(',')
    return(last)

def get_firstinit(full_name):
    return(full_name[0])

In [104]:
last_names = [get_last_name(n) for n in all_reps.Name]
firstinits = [get_firstinit(n) for n in all_reps.Name]
name_info = pandas.DataFrame(data={'LN':last_names,
                                   'FILN':[f+'.'+l for l,f in zip(last_names, firstinits)],
                                   'Chamber':all_reps.Chamber},
                             index=all_reps.index)

In [105]:
ln_dict = {ln+'-'+c : ind for ln,c,ind in zip(name_info.LN, name_info.Chamber, name_info.index)}
filn_dict = {filn+'-'+c : ind for filn,c,ind in zip(name_info.FILN, name_info.Chamber, name_info.index)}

# Hand correct some issues
ln_dict['Adcock-S'] = ln_dict['Adcock-H']
ln_dict['Robinson-H'] = ln_dict['Robinson-S']

def name_lookup(name, chamber):
    key = name + '-' + chamber
    try:
        return(filn_dict[key])
    except KeyError:
        try:
            return(ln_dict[key])
        except KeyError:
            return(-1)

In [106]:
sponsors_info['sponsor_ix'] = [name_lookup(sponsors_info.name[i], sponsors_info.chamber[i]) \
                             for i in sponsors_info.index]

In [108]:
missing = sponsors_info[sponsors_info.sponsor_ix==-1]
missing_names = set(missing.name)

In [109]:
missing_names

{'Committee on Ethics.',
 'Committee on Rules, Calendar, and Operations of the   House.'}

In [125]:
sponsors_info = sponsors_info[sponsors_info.sponsor_ix != -1]

In [126]:
sponsors_info.head()

Unnamed: 0,bill_ix,chamber,name,sponsor_ix
0,0,H,Lewis,160
1,1,H,Lewis,160
2,2,H,McGrady,255
3,2,H,Dixon,87
4,3,H,Cotham,232


In [119]:
import bill_sponsor_analysis_pipeline

In [120]:
reload(bill_sponsor_analysis_pipeline)

<module 'bill_sponsor_analysis_pipeline' from '/home/immersinn/gits/ncga/src/bill_sponsor_analysis_pipeline.py'>

In [121]:
ar, bi, si = bill_sponsor_analysis_pipeline.main('2014')

In [122]:
ar.head()

Unnamed: 0,District,Session,Chamber,Name,Party,Incombant
0,1,2014,S,Bill Cook,R,
1,2,2014,S,Norman Sanderson,R,
2,3,2014,S,Erica Smith-Ingram,D,
3,4,2014,S,Angela Bryant,D,
4,5,2014,S,Donald Davis,D,


In [123]:
bi.head()

Unnamed: 0,session,house,bill,content,long_title,table_info,keywords,sponsors
0,20150000.0,H,1,GENERAL ASSEMBLY OF NORTH CAROLINA FOURTH EXT...,A HOUSE RESOLUTION adopting the permanent rule...,"{'Sponsors': 'Representative Lewis.', 'Referre...","[ADOPTED, GENERAL ASSEMBLY, RESOLUTIONS, SIMPL...",[Lewis]
1,20150000.0,H,2,GENERAL ASSEMBLY OF NORTH CAROLINA FOURTH EXT...,A JOINT RESOLUTIOn providing for adjournment s...,"{'Sponsors': 'Representative Lewis.', 'Referre...","[ADJOURNMENT, GENERAL ASSEMBLY, RESOLUTIONS, J...",[Lewis]
2,20150000.0,H,3,GENERAL ASSEMBLY OF NORTH CAROLINA FOURTH EXT...,AN ACT to provide further REGULATORY RELIEF TO...,{'Sponsors': 'Representatives McGrady and Dixo...,"[ADMINISTRATION DEPT., ADMINISTRATIVE CODE, AD...","[McGrady, Dixon]"
3,20150000.0,H,4,GENERAL ASSEMBLY OF NORTH CAROLINA FOURTH EXT...,AN ACT directing the department of transportat...,"{'Sponsors': 'Representatives Cotham, Bradford...","[BRIDGES, CONTRACTS, COUNTIES, INFRASTRUCTURE,...","[Cotham, Bradford, J.Moore]"
4,20150000.0,H,5,GENERAL ASSEMBLY OF NORTH CAROLINA FOURTH EXT...,AN ACT to clarify the service area for communi...,"{'Sponsors': 'Representative S. Martin.', 'Ref...","[COUNTIES, EDGECOMBE COUNTY, INFORMATION TECHN...",[S.Martin]


In [124]:
si.head()

Unnamed: 0,bill_ix,chamber,name,sponsor_ix
0,0,H,Lewis,102
1,1,H,Lewis,102
2,2,H,McGrady,166
3,2,H,Dixon,53
4,3,H,Cotham,149
