In [1]:
import pandas as pd
import numpy as np
import networkx as nx

# Intro

In [21]:
# create graph
df = pd.read_csv('data/edges.csv', names = ['from', 'to', 'weight'], index_col = False)
G = nx.from_pandas_edgelist(df, source = 'from', target = 'to', create_using = nx.DiGraph())

In [41]:
# count nodes and edges
print(f'This graph has {len(list(G.nodes()))} nodes representing members of congress.')
print(f'This graph has {len(list(G.edges()))} edges representing cosponsorship relationships.')

This graph has 2821 nodes representing members of congress.
This graph has 1702866 edges representing cosponsorship relationships.


In [None]:
# pagerank
rank = nx.pagerank(G)

In [78]:
# make features df
features = pd.DataFrame(list(rank.keys()), columns = ['id'])
features['pagerank'] = rank.values()
features.head()

Unnamed: 0,id,pagerank
0,t00181,0.000368
1,t00513,0.000297
2,t00528,0.000488
3,t00570,0.000247
4,t00656,0.000246


In [44]:
# get features for every MOC
# get pagerank
rank = nx.pagerank(G)
features = pd.DataFrame(index = list(rank.keys()))
features['pagerank'] = rank.values()

# get clustering coefficient
cluster = nx.clustering(G)
features['clustering'] = cluster.values()

# get centrality
centrality = nx.degree_centrality(G)
features['centrality'] = centrality.values()

# get closeness
closeness = nx.closeness_centrality(G)
features['closeness'] = closeness.values()

# get betweenness
betweenness = nx.betweenness_centrality(G)
features['betweenness'] = betweenness.values()

# add features to reviews data
reviews = reviews.set_index('id')
reviews = reviews.join(features)

return reviews

KeyboardInterrupt: 

In [42]:
avg_distance = nx.average_shortest_path_length(G)
print(avg_distance)

NetworkXError: Graph is not weakly connected.

In [63]:
# load MOC data
moc = pd.read_csv('data/legislators.csv', dtype = {'district': 'Int64', 'thomas_id': 'object'})
moc = moc.drop(['last_name', 'first_name', 'middle_name', 'suffix', 'url', 'address', 'phone',
       'contact_form', 'rss_url', 'twitter', 'facebook', 'youtube',
       'youtube_id', 'opensecrets_id', 'lis_id',
       'fec_ids', 'cspan_id', 'govtrack_id', 'votesmart_id', 'ballotpedia_id',
       'washington_post_id', 'icpsr_id', 'wikipedia_id'], axis = 1)
moc.tail()

Unnamed: 0.1,Unnamed: 0,nickname,full_name,birthday,gender,type,state,district,senate_class,party,bioguide_id,thomas_id
12593,533,,Brad Finstad,1976-05-30,M,rep,MN,1,,Republican,F000475,
12594,534,,Mary Sattler Peltola,1973-08-31,F,rep,AK,0,,Democrat,P000619,
12595,535,,Patrick Ryan,1982-03-28,M,rep,NY,19,,Democrat,R000579,
12596,536,,Joseph Sempolinski,1982-02-10,M,rep,NY,23,,Republican,S001219,
12597,537,,,1984-02-24,M,rep,IN,2,,Republican,Y000067,


In [50]:
moc_double_id = moc[moc.bioguide_id.notna() & moc.thomas_id.notna()]
len(moc_double_id)

2181

In [69]:
# combine 
df1 = features.merge(moc, how = 'left', left_on = 'id', right_on = 'bioguide_id')
df2 = features.merge(moc, how = 'left', left_on = 'id', right_on = 'thomas_id')
df3 = df1.combine_first(df2)

In [71]:
df3

Unnamed: 0.1,id,pagerank,Unnamed: 0,nickname,full_name,birthday,gender,type,state,district,senate_class,party,bioguide_id,thomas_id
0,t00181,0.000368,,,,,,,,,,,,
1,t00513,0.000297,,,,,,,,,,,,
2,t00528,0.000488,,,,,,,,,,,,
3,t00570,0.000247,,,,,,,,,,,,
4,t00656,0.000246,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2816,bS001212,0.000295,,,,,,,,,,,,
2817,bP000615,0.000294,,,,,,,,,,,,
2818,bR000615,0.000222,,,,,,,,,,,,
2819,bK000377,0.000220,,,,,,,,,,,,


Unnamed: 0,id,pagerank
0,t00181,0.000368
1,t00513,0.000297
2,t00528,0.000488
3,t00570,0.000247
4,t00656,0.000246
