In [None]:
# https://www.propublica.org/datastore/dataset/congressional-data-bulk-legislation-bills

# Get party affiliations from https://projects.propublica.org/api-docs/congress-api/members/

In [7]:
%%bash

rm -rf data
mkdir -p data
rm -rf *.zip *_bills

for i in {93..117..1}
do
   echo "Downloading ${i}.zip"
   wget -P data "https://s3.amazonaws.com/pp-projects-static/congress/bills/${i}.zip"
   # unzip -o -q "${i}.zip" -d "${i}_bills"
done

Process is interrupted.


In [19]:
import os
import zipfile
import json
from tqdm import tqdm

In [48]:
db = []
LIMIT = 5


for i, z in enumerate(os.listdir('data')):
    if i > LIMIT:
        break
    zip_file = os.path.join('data', z)
    with zipfile.ZipFile(zip_file, 'r') as f:
        names = [file for file in f.namelist() if file.endswith('data.json')]
        for file in tqdm(names, f'Opening from {zip_file}: ', position=0):
            with f.open(file) as bill:
                data = json.load(bill)
                db.append({'id': data['bill_id'],
                           'title': data['official_title'],
                           'sponsor': data['sponsor'],
                           'cosponsors': data['cosponsors']})

            

Opening from data/100.zip: 100%|██████████| 11278/11278 [00:07<00:00, 1514.38it/s]
Opening from data/101.zip: 100%|██████████| 11787/11787 [00:09<00:00, 1231.31it/s]
Opening from data/102.zip: 100%|██████████| 12016/12016 [00:10<00:00, 1145.22it/s]
Opening from data/103.zip: 100%|██████████| 9822/9822 [00:08<00:00, 1154.35it/s]
Opening from data/104.zip: 100%|██████████| 7991/7991 [00:07<00:00, 1071.90it/s]
Opening from data/105.zip: 100%|██████████| 9141/9141 [00:08<00:00, 1131.86it/s]


In [84]:
members = {}
edges = {}

def get_id(member):
    try:
        member_id = int(member['thomas_id'])
    except:
        member_id = 'None'
    return member_id

def add_to_members(entry):
    member_id = get_id(entry)
    
    if member_id == 'None':
        members[member_id] = ('N/A', 'N/A')
        return 0
    
    else:
        name = f"{entry['title']} {entry['name']}"
        state = entry['state']

        if member_id not in members.keys():
            members[member_id] = (name, state)

        return 0
        
def compare_cosponsors(sponsor, cosponsor_list, bill_name):
    sponsor_id = get_id(sponsor)
    if sponsor_id not in edges.keys():
        edges[sponsor_id] = {}
    
    for cosponsor in cosponsor_list + [sponsor]:  # Get that diagonal lol
        cosponsor_id = get_id(cosponsor)
        if cosponsor_id not in edges[sponsor_id].keys():
            edges[sponsor_id][cosponsor_id] = []
        edges[sponsor_id][cosponsor_id].append(bill_name)
    
def summarize_edges():
    out = []
    for sponsor, cosponsor_list in edges.items():
        for cosponsor, bills in cosponsor_list.items():
            out.append((sponsor, cosponsor, len(bills)))
    return out
    

for bill in tqdm(db, 'Processing database: ', position=0):
    bill_name = f"{bill['id']}: {bill['title']}"
    sponsor = bill['sponsor']
    cosponsors = bill['cosponsors']
    
    add_to_members(sponsor)
    for cosponsor in cosponsors:
        add_to_members(cosponsor)
    
    compare_cosponsors(sponsor, cosponsors, bill_name)
    
len(summarize_edges())

Processing database: 100%|██████████| 62035/62035 [00:01<00:00, 36537.51it/s]


256373

In [None]:
# Todo: add reciprocal entries so that it becomes symmetrical

In [81]:
from pyvis.network import Network
import pandas as pd

got_net = Network(height='750px', width='100%', bgcolor='#222222', font_color='white')

# set the physics layout of the network
got_net.barnes_hut()

smry = summarize_edges()

sources = [i[0] for i in smry]
targets = [i[1] for i in smry]
weights = [i[2] for i in smry]

edge_data = zip(sources, targets, weights)


for m_id, vals in members.items():
    full_name = f'{vals[0]} (???-{vals[1]})'
    try:
        got_net.add_node(m_id, m_id, title=full_name)
    except:
        print(m_id)
        print(vals)
        raise ValueError

for e in edge_data:
    src = e[0]
    dst = e[1]
    w = e[2]

    got_net.add_edge(src, dst, value=w)

neighbor_map = got_net.get_adj_list()

# add neighbor data to node hover data
for node in got_net.nodes:
    node['title'] += ' Neighbors:<br>' + '<br>'.join(neighbor_map[node['id']])
    node['value'] = len(neighbor_map[node['id']])

got_net.show('congress.html')

KeyboardInterrupt: 