In [10]:
import pandas as pd
import numpy as np
import os
from itertools import combinations

In [11]:
os.makedirs("../Data/bills-senate/", exist_ok=True)
os.makedirs("../Data/bills-house/", exist_ok=True)

In [12]:
## labels
senate_df = pd.read_csv("../RawData/congress/Cosponsor/senate.csv")
senate_labels = senate_df.groupby("id")['party'].apply(list).reset_index()
senate_labels['group_code'] = senate_labels['party'].apply(lambda lst: max(set(lst), key=lst.count))
senate_labels[['id', 'group_code']].to_csv("../Data/bills-senate/labels.csv", index=False)

house_df = pd.read_csv("../RawData/congress/Cosponsor/house.csv")
house_labels = house_df.groupby("id")['party'].apply(list).reset_index()
house_labels['group_code'] = house_labels['party'].apply(lambda lst: max(set(lst), key=lst.count))
house_labels[['id', 'group_code']].to_csv("../Data/bills-house/labels.csv", index=False)

In [13]:
## edges and triangles
bills_df = pd.read_csv("../RawData/congress/Cosponsor/bills.txt", names=['bill_name'])
sponsors_df = pd.read_csv("../RawData/congress/Cosponsor/sponsors.txt", names=['sponsor_id'])
cosponsors_df = pd.read_csv("../RawData/congress/Cosponsor/cosponsors.txt", names=['cosponsor_list'])
dates_df = pd.read_csv("../RawData/congress/Cosponsor/dates.txt", names=['date'])

full_df = bills_df.copy()
full_df['sponsor_id'] = sponsors_df['sponsor_id']
full_df['cosponsor_list'] = cosponsors_df['cosponsor_list']
full_df['date'] = pd.to_datetime(dates_df['date'])
full_df['t'] = full_df['date'].apply(lambda x: (x - pd.to_datetime("1973-01-01")).days)

full_df = full_df.dropna(subset=['sponsor_id'])
def make_list(x):
    p1 = int(x['sponsor_id'])
    rest = [] if pd.isnull(x['cosponsor_list']) else [int(i) for i in x['cosponsor_list'].split(" ")]
    return [p1] + rest
full_df['nodes'] = full_df.apply(make_list, axis=1)

In [14]:
senate_edges = []
senate_triangles = []
house_edges = []
house_triangles = []

house_simplices = []
senate_simplices = []
for i, row in full_df.iterrows():
    nodes = row['nodes']
    bill_name = row['bill_name']
    t = row['t']
    if(len(nodes) <= 4):
        for i, j in combinations(nodes, 2):
            n1, n2 = np.sort([i, j])
            if (bill_name.startswith("HR")):
                house_edges.append((n1, n2, t))
            if (bill_name.startswith("SN")):
                senate_edges.append((n1, n2, t))
            
        for i, j, k in combinations(nodes, 3):
            n1, n2, n3 = np.sort([i, j, k])
            if (bill_name.startswith("HR")):
                house_triangles.append((n1, n2, n3, t))
            if (bill_name.startswith("SN")):
                senate_triangles.append((n1, n2, n3, t))
        
        
        if (bill_name.startswith("HR")):
            house_simplices.append(nodes + [t])
        if (bill_name.startswith("SN")):
            senate_simplices.append(nodes + [t])

In [15]:
house_edge_df = pd.DataFrame(house_edges)
house_edge_df.columns = ['node_1', 'node_2', 't']
house_edge_df = house_edge_df[['node_1', 'node_2']].drop_duplicates()
house_edge_df.to_csv("../Data/bills-house/edges.csv", index=False)

house_triangle_df = pd.DataFrame(house_triangles)
house_triangle_df.columns = ['node_1', 'node_2', 'node_3', 't']
house_triangle_df.to_csv("../Data/bills-house/triangles.csv", index=False)

In [17]:
senate_edge_df = pd.DataFrame(senate_edges)
senate_edge_df.columns = ['node_1', 'node_2', 't']
senate_edge_df = senate_edge_df[['node_1', 'node_2']].drop_duplicates()
senate_edge_df.to_csv("../Data/bills-senate/edges.csv", index=False)

senate_triangle_df = pd.DataFrame(senate_triangles)
senate_triangle_df.columns = ['node_1', 'node_2', 'node_3', 't']
senate_triangle_df.to_csv("../Data/bills-senate/triangles.csv", index=False)

In [18]:
house_simplices.sort(key=lambda x: x[-1])
with open(f'../Data/bills-house/simplices.csv', 'w') as f:
    for item in house_simplices:
        f.write("%s\n" % ",".join([str(i) for i in item]))

senate_simplices.sort(key=lambda x: x[-1])
with open(f'../Data/bills-senate/simplices.csv', 'w') as f:
    for item in senate_simplices:
        f.write("%s\n" % ",".join([str(i) for i in item]))