# Congressional Bill Cosponsorship Network Analysis

In [1]:
import pandas as pd
import numpy as np
import networkx as nx
import ipywidgets as widgets
import json

In [2]:
# create graph
df = pd.read_csv('data/edges.csv', names = ['from', 'to', 'weight'], index_col = False)
G = nx.from_pandas_edgelist(df, source = 'from', target = 'to', create_using = nx.DiGraph(), edge_attr='weight')

  df = pd.read_csv('data/edges.csv', names = ['from', 'to', 'weight'], index_col = False)


In [3]:
# load MOC data
moc = pd.read_csv('data/legislators.csv', dtype = {'district': 'Int64', 'thomas_id': 'object'})
moc = moc.drop(['full_name', 'middle_name', 'suffix', 'url', 'address', 'phone',
       'contact_form', 'rss_url', 'twitter', 'facebook', 'youtube',
       'youtube_id', 'opensecrets_id', 'lis_id',
       'fec_ids', 'cspan_id', 'govtrack_id', 'votesmart_id', 'ballotpedia_id',
       'washington_post_id', 'icpsr_id', 'wikipedia_id'], axis = 1)
moc['thomas_id'] = moc.thomas_id.astype('float').astype('Int64')
moc.tail()

Unnamed: 0.1,Unnamed: 0,last_name,first_name,nickname,birthday,gender,type,state,district,senate_class,party,bioguide_id,thomas_id
12593,533,Finstad,Brad,,1976-05-30,M,rep,MN,1,,Republican,F000475,
12594,534,Peltola,Mary,,1973-08-31,F,rep,AK,0,,Democrat,P000619,
12595,535,Ryan,Patrick,,1982-03-28,M,rep,NY,19,,Democrat,R000579,
12596,536,Sempolinski,Joseph,,1982-02-10,M,rep,NY,23,,Republican,S001219,
12597,537,Yakym,Rudy,,1984-02-24,M,rep,IN,2,,Republican,Y000067,


In [4]:
# get features
# takes ~1 hour on my macbook air

# get pagerank
rank = nx.pagerank(G)
features = pd.DataFrame(list(rank.keys()), columns = ['id'])
# features = pd.DataFrame(index = list(rank.keys()))
features['pagerank'] = rank.values()

# get clustering coefficient
cluster = nx.clustering(G)
features['clustering'] = cluster.values()

# get centrality
centrality = nx.degree_centrality(G)
features['centrality'] = centrality.values()

# get closeness
closeness = nx.closeness_centrality(G)
features['closeness'] = closeness.values()

# get betweenness
betweenness = nx.betweenness_centrality(G)
features['betweenness'] = betweenness.values()

features.head()

Unnamed: 0,id,pagerank,clustering,centrality,closeness,betweenness
0,181,0.000157,0.788433,0.398574,0.31262,3.4e-05
1,513,9.4e-05,0.864077,0.301158,0.29701,1.2e-05
2,528,0.000406,0.572125,0.399168,0.316033,0.001627
3,570,8e-05,0.909239,0.238194,0.284927,5e-06
4,656,8.8e-05,0.913392,0.238194,0.286336,5e-06


In [5]:
# combine network measures with MOC info
df1 = features.merge(moc, how = 'left', left_on = 'id', right_on = 'bioguide_id')
df2 = features.merge(moc, how = 'left', left_on = 'id', right_on = 'thomas_id')
df = df1.combine_first(df2)

# fix columns and dtypes
df = df.drop(['Unnamed: 0'], axis = 1)
df = df.set_index('id')
df.index = df.index.astype('string')

df.head()

Unnamed: 0_level_0,pagerank,clustering,centrality,closeness,betweenness,last_name,first_name,nickname,birthday,gender,type,state,district,senate_class,party,bioguide_id,thomas_id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
181,0.000157,0.788433,0.398574,0.31262,3.4e-05,Carter,Tim,,1910-09-02,M,rep,KY,5.0,,Republican,C000201,181
513,9.4e-05,0.864077,0.301158,0.29701,1.2e-05,Hastings,James,,1926-04-10,M,rep,NY,39.0,,Republican,H000327,513
528,0.000406,0.572125,0.399168,0.316033,0.001627,Heinz,Henry,,1938-10-23,M,sen,PA,,1.0,Republican,H000456,528
570,8e-05,0.909239,0.238194,0.284927,5e-06,Hudnut,William,,1932-10-17,M,rep,IN,11.0,,Republican,H000906,570
656,8.8e-05,0.913392,0.238194,0.286336,5e-06,Kyros,Peter,,1925-07-11,M,rep,ME,1.0,,Democrat,K000356,656


In [6]:
# get bipartisan score

# make + show progress bar
progress_bar = widgets.IntProgress(
    min=0, max=len(G.nodes), description="calculating bipartisanship scores")
display(progress_bar)

# get same-party edge weight and total edge weight sums
for node1 in G.nodes:
    same_party_edges = 0
    # get same-party nodes
    for node2 in G[node1]:
        if df.loc[str(node1)].party == df.loc[str(node2)].party:
            same_party_edges += G[node1][node2]["weight"]
    # get total weights for node
    total_edge_weight = 0
    for nbr, datadict in G.adj[node1].items():
        total_edge_weight += datadict['weight']
    df.loc[str(node1), ['same_party_edges', 'total_edge_weight']] = [same_party_edges, total_edge_weight]
    
    progress_bar.value += 1
    
# calculate bipartisan_cosponsorship_score
df['bipartisan_cosponsorship_score'] = df['same_party_edges'] / df['total_edge_weight']
    
df.head()

IntProgress(value=0, description='calculating bipartisanship scores', max=3368)

Unnamed: 0_level_0,pagerank,clustering,centrality,closeness,betweenness,last_name,first_name,nickname,birthday,gender,type,state,district,senate_class,party,bioguide_id,thomas_id,same_party_edges,total_edge_weight,bipartisan_cosponsorship_score
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
181,0.000157,0.788433,0.398574,0.31262,3.4e-05,Carter,Tim,,1910-09-02,M,rep,KY,5.0,,Republican,C000201,181,13553.0,31049.0,0.436504
513,9.4e-05,0.864077,0.301158,0.29701,1.2e-05,Hastings,James,,1926-04-10,M,rep,NY,39.0,,Republican,H000327,513,2183.0,4856.0,0.449547
528,0.000406,0.572125,0.399168,0.316033,0.001627,Heinz,Henry,,1938-10-23,M,sen,PA,,1.0,Republican,H000456,528,30767.0,72513.0,0.424296
570,8e-05,0.909239,0.238194,0.284927,5e-06,Hudnut,William,,1932-10-17,M,rep,IN,11.0,,Republican,H000906,570,1375.0,2670.0,0.514981
656,8.8e-05,0.913392,0.238194,0.286336,5e-06,Kyros,Peter,,1925-07-11,M,rep,ME,1.0,,Democrat,K000356,656,3072.0,4349.0,0.706369


In [7]:
# load moc job info
with open('data/moc_info.json') as f:
    j = json.loads(f.read())
moc_info = pd.DataFrame.from_dict(j, orient = 'index')
moc_info = moc_info.drop(['usCongressBioId', 'familyName', 'givenName', 'middleName',
       'honorificPrefix', 'unaccentedFamilyName', 'unaccentedGivenName',
       'unaccentedMiddleName', 'birthDate', 'birthCirca', 'deathDate',
       'deathCirca', 'image', 'profileText', 'relationship', 'creativeWork', 
        'researchRecord', 'honorificSuffix', 'nickName', 'deleted'], axis = 1)
moc_info.head()

Unnamed: 0,jobPositions
W000374,"[{'job': {'name': 'Representative', 'jobType':..."
L000226,"[{'job': {'name': 'Representative', 'jobType':..."
F000260,"[{'job': {'name': 'Representative', 'jobType':..."
M000777,"[{'job': {'name': 'Representative', 'jobType':..."
W000724,"[{'job': {'name': 'Representative', 'jobType':..."


In [8]:
# add congress # info for each MOC

# get list of congresses per MOC
congresses = []
for i in range(len(moc_info)):
    congress_list = []
    #jobs = json.loads(moc_info.iloc[i]['jobPositions'])
    jobs = moc_info.iloc[i]['jobPositions']
    for job in jobs:
        # avoid errors from non-congress jobs (like SCOTUS)
        try:
            congress_list.append(job['congressAffiliation']['congress']['congressNumber'])
        except:
            pass
    congresses.append(congress_list)
moc_info['congresses'] = congresses
moc_info = moc_info.drop(['jobPositions'], axis = 1)

# get avg congress per MOC
moc_info['avg_congress'] = [np.mean(lst) for lst in moc_info.congresses]

# add cols to df
moc_info['bioguide_id'] = moc_info.index
df = df.merge(moc_info, how = 'left', on = 'bioguide_id')

df.head()

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Unnamed: 0,pagerank,clustering,centrality,closeness,betweenness,last_name,first_name,nickname,birthday,gender,...,district,senate_class,party,bioguide_id,thomas_id,same_party_edges,total_edge_weight,bipartisan_cosponsorship_score,congresses,avg_congress
0,0.000157,0.788433,0.398574,0.31262,3.4e-05,Carter,Tim,,1910-09-02,M,...,5.0,,Republican,C000201,181,13553.0,31049.0,0.436504,"[89, 90, 91, 92, 93, 94, 95, 96]",92.5
1,9.4e-05,0.864077,0.301158,0.29701,1.2e-05,Hastings,James,,1926-04-10,M,...,39.0,,Republican,H000327,513,2183.0,4856.0,0.449547,"[91, 92, 93, 94]",92.5
2,0.000406,0.572125,0.399168,0.316033,0.001627,Heinz,Henry,,1938-10-23,M,...,,1.0,Republican,H000456,528,30767.0,72513.0,0.424296,"[92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102]",97.0
3,8e-05,0.909239,0.238194,0.284927,5e-06,Hudnut,William,,1932-10-17,M,...,11.0,,Republican,H000906,570,1375.0,2670.0,0.514981,[93],93.0
4,8.8e-05,0.913392,0.238194,0.286336,5e-06,Kyros,Peter,,1925-07-11,M,...,1.0,,Democrat,K000356,656,3072.0,4349.0,0.706369,"[90, 91, 92, 93]",91.5


In [10]:
# final cleaning
df = df[df.party.isin(['Democrat', 'Republican'])]
df = df[['first_name', 'last_name', 'nickname', 'birthday', 'gender', 'type',
       'state', 'district', 'senate_class', 'party', 'bioguide_id',
       'thomas_id', 'same_party_edges', 'total_edge_weight',
       'bipartisan_cosponsorship_score', 'pagerank', 'clustering', 
       'centrality', 'closeness', 'betweenness','congresses', 'avg_congress']]
df['birthday'] = pd.to_datetime(df.birthday)
df['birthyear'] = df.birthday.dt.year
df['congresses'] = df['congresses'].astype('str')

# current congress is 117; 111 started 2009
# 1244 True, ~50%
df['since_111'] = df.congresses.str.contains('11')

# gender already exists, but it's notable that of 318 women
# 89 are Republicans
# print(len(df[df.gender == 'F']))
# print(len(df[(df.gender == 'F') & (df.party == 'Republican')]))

df.head()

Unnamed: 0,first_name,last_name,nickname,birthday,gender,type,state,district,senate_class,party,...,bipartisan_cosponsorship_score,pagerank,clustering,centrality,closeness,betweenness,congresses,avg_congress,birthyear,since_111
0,Tim,Carter,,1910-09-02,M,rep,KY,5.0,,Republican,...,0.436504,0.000157,0.788433,0.398574,0.31262,3.4e-05,"[89, 90, 91, 92, 93, 94, 95, 96]",92.5,1910.0,False
1,James,Hastings,,1926-04-10,M,rep,NY,39.0,,Republican,...,0.449547,9.4e-05,0.864077,0.301158,0.29701,1.2e-05,"[91, 92, 93, 94]",92.5,1926.0,False
2,Henry,Heinz,,1938-10-23,M,sen,PA,,1.0,Republican,...,0.424296,0.000406,0.572125,0.399168,0.316033,0.001627,"[92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102]",97.0,1938.0,False
3,William,Hudnut,,1932-10-17,M,rep,IN,11.0,,Republican,...,0.514981,8e-05,0.909239,0.238194,0.284927,5e-06,[93],93.0,1932.0,False
4,Peter,Kyros,,1925-07-11,M,rep,ME,1.0,,Democrat,...,0.706369,8.8e-05,0.913392,0.238194,0.286336,5e-06,"[90, 91, 92, 93]",91.5,1925.0,False


In [11]:
#df.to_csv('data/MOC_features.csv')