In [3]:
import networkx as nx
import simplejson as json
from networkx.readwrite import json_graph
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly
import plotly.graph_objs as go
import plotly.express as px

In [None]:
# parse the gml file and build the graph object
g = nx.read_gml('networks/data/karate.gml', label='id')
# create a dictionary in a node-link format that is suitable for JSON serialization
# d = json_graph.node_link_data(g)
# with open('networks/data/power_grid_westernus.json', 'w') as fp:
#     json.dump(d, fp)

In [None]:
nx.adjacency_data(g)

In [None]:
nx.draw(g, with_labels=True)

In [None]:
vote_data = pd.read_csv('networks/data/house_votes_2022_curated.csv')

In [None]:
vote_data["DATE"] = pd.to_datetime(vote_data["DATE"])

In [None]:
vote_data["DATE"] = vote_data["DATE"].dt.date

In [None]:
vote_data.sort_values(by=['DATE'], inplace=True)

In [None]:
vote_data.reset_index(drop=True, inplace=True)

In [None]:
vote_data["REPRESENTATIVE"] = vote_data["REPRESENTATIVE"].astype(str)

In [None]:
vote_data["index"] = np.arange(len(vote_data))

In [None]:
yea_nay_totals = vote_data.groupby(["BILL", "VOTE"]).count()

In [None]:
reps_voted_yea_on_same_bill = vote_data[vote_data["VOTE"] == "YEA"].groupby(["BILL", "REPRESENTATIVE", "PARTY"]).count()

In [None]:
reps_voted_nay_on_same_bill = vote_data[vote_data["VOTE"] == "NAY"].groupby(["BILL", "REPRESENTATIVE", "PARTY"]).count()

In [None]:
reps_voted_same_on_bill = vote_data.groupby(["BILL", "REPRESENTATIVE", "PARTY", "VOTE", "DATE", "BILL_NAME", "COMMITTEE", "STATE"]).count()

In [None]:
voted_same_df = reps_voted_same_on_bill.reset_index()

In [None]:
reps_voted_same_on_bill.to_csv('networks/data/reps_voted_same_on_bill.csv')

In [None]:
def create_node_link_json_from_vote_data(vote_data, reps_voted_yea_on_same_bill, reps_voted_nay_on_same_bill):
    nodes = []
    links = []
    for index, row in vote_data.iterrows():
        print(index)
        nodes.append({"id": row["REPRESENTATIVE"], "group": row["PARTY"], "vote": row["VOTE"], "bill": row["BILL"], "date": row["DATE"].strftime("%Y-%m-%d")})
    for index, row in reps_voted_yea_on_same_bill.iterrows():
        links.append({"source": index[0], "target": index[1], "value": row["index"]})
    for index, row in reps_voted_nay_on_same_bill.iterrows():
        links.append({"source": index[0], "target": index[1], "value": row["index"]})
    return {"nodes": nodes, "links": links}

In [None]:
def nodes_links_same_votes(vote_data, reps_voted_same_on_bill):
    nodes = []
    links = []
    for index, row in vote_data.iterrows():
        nodes.append({"id": row["REPRESENTATIVE"], "group": row["PARTY"], "vote": row["VOTE"], "bill": row["BILL"], "date": row["DATE"].strftime("%Y-%m-%d")})
    for index, row in reps_voted_same_on_bill.iterrows():
        links.append({"source": index[0], "target": index[1], "value": row["index"]})
    return {"nodes": nodes, "links": links}

In [None]:
net_data = nodes_links_same_votes(vote_data, reps_voted_same_on_bill)

In [None]:
net_data

In [None]:
with open('networks/data/house_votes_2022_curated.json', 'w') as fp:
    json.dump(net_data, fp)

In [None]:
def convert_yea_nay_to_binary(vote_data):
    vote_data["VOTE"] = vote_data["VOTE"].apply(lambda x: 1 if x == "YEA" else 0)
    return vote_data

In [None]:
vote_data = convert_yea_nay_to_binary(vote_data)

In [None]:
def create_same_votes_heatmap(vote_data):
    same_votes = vote_data.groupby(["BILL", "REPRESENTATIVE", "PARTY", "VOTE"]).count()
    same_votes = same_votes.reset_index()
    same_votes = same_votes.pivot(index=["REPRESENTATIVE", "PARTY"], columns="BILL", values="VOTE")
    same_votes = same_votes.fillna(0)
    return same_votes

In [None]:
s_v = create_same_votes_heatmap(vote_data)

In [None]:
values = [0]

In [None]:
samevotes_fixed = svhm[svhm['1319'] !='0']

In [None]:
samevotes_fixed = samevotes_fixed[samevotes_fixed['8404'] !='0']

In [None]:
bills_passed_samevotes = samevotes_fixed[samevotes_fixed['1319'] == samevotes_fixed['8404']]

In [None]:
samevotes_fixed = pd.read_csv('networks/data/samevotes_fixed.csv')

In [None]:
with open('networks/data/house_votes_network.json', 'w') as fp:
    json.dump(net_data, fp)

### all votes for the 1st and 2nd Sessions of the 117th Congress (House of Representatives)

In [None]:
congress_members = pd.read_csv('networks/data/congress/H117_members.csv')

In [None]:
def party_code_to_party_name_in_df_column(df, column_name):
    df[column_name] = df[column_name].apply(lambda x: "Democrat" if x == 100 else "Republican")
    return df

In [None]:
congress_mems = party_code_to_party_name_in_df_column(congress_members, "party_code")

In [None]:
def match_value_to_icpsr(df):
    # df["nomdim1"] = df["icpsr"].apply(lambda x: congress_mems[congress_mems["icpsr"] == x]["nominate_dim1"].values[0])
    # df["rep_name"] = df["icpsr"].apply(lambda x: congress_mems[congress_mems["icpsr"] == x]["bioname"].values[0])
    # df["party"] = df["icpsr"].apply(lambda x: congress_mems[congress_mems["icpsr"] == x]["party_code"].values[0])
    # df["born"] = df["icpsr"].apply(lambda x: congress_mems[congress_mems["icpsr"] == x]["born"].values[0])
    df["state"] = df["icpsr"].apply(lambda x: congress_mems[congress_mems["icpsr"] == x]["state_abbrev"].values[0])
    return df

In [None]:
data = match_value_to_icpsr(all_votes_117)

In [None]:
def get_times_reps_voted_same(votes_all_fixed):
    same_votes = votes_all_fixed.groupby(["rollnumber", "icpsr", "party", "cast_code", "rep_name", "nomdim1", "age"]).count()
    same_votes = same_votes.reset_index()
    same_votes = same_votes.pivot(index=["icpsr", "party", "rep_name", "nomdim1", "age"], columns="rollnumber", values="cast_code")
    same_votes = same_votes.fillna(0)
    return same_votes

In [None]:
d2 = get_times_reps_voted_same(data)

In [None]:
d2.sort_values(by="nomdim1", ascending=False, inplace=True)

In [4]:
v_d = pd.read_csv('networks/data/congress/same_votes.csv')

In [6]:
num_votes_per_rep = v_d.groupby(["icpsr", "party", "rep_name", "nomdim1", "age"]).count()

In [12]:
def drop_rows_with_more_than_n_zero_values(df, n):
    df = df[(df == 0).sum(axis=1) <= n]
    return df

In [15]:
v_d_f = drop_rows_with_more_than_n_zero_values(v_d, 50)

In [16]:
v_d_f.to_csv('networks/data/congress/sv_fixed_ab.csv')

Unnamed: 0,icpsr,party,rep_name,nomdim1,age,1,2,3,4,5,...,858,859,860,861,862,863,864,865,866,867
0,22123,Republican,"HERRELL, Yvette",0.936,58,6,1,6,6,6,...,6,1,1,6,6,1,1,6,6,6
1,21705,Republican,"BIGGS, Andrew S.",0.849,64,6,1,6,6,6,...,6,6,1,6,6,6,1,6,6,6
2,21753,Republican,"NORMAN, Ralph",0.841,69,6,1,6,6,6,...,9,9,9,9,9,9,9,6,6,6
3,22121,Republican,"GREENE, Marjorie Taylor",0.800,48,6,1,6,6,6,...,9,9,9,9,9,9,9,6,6,6
4,21961,Republican,"ROY, Charles",0.800,50,6,1,6,6,6,...,6,6,1,6,6,6,9,6,6,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
447,29106,Democrat,"WATERS, Maxine",-0.656,84,1,9,1,1,1,...,1,1,6,1,1,1,6,1,1,1
448,21906,Democrat,"CASTEN, Sean",-0.657,51,1,1,1,1,1,...,1,1,6,1,1,1,1,1,1,1
449,21726,Democrat,"JAYAPAL, Pramila",-0.681,57,1,1,1,1,1,...,1,1,6,1,1,1,6,1,1,1
450,29778,Democrat,"LEE, Barbara",-0.681,76,1,1,1,1,1,...,1,1,6,1,1,1,6,1,1,1


### Stack Overflow data

In [None]:
stack_links = pd.read_csv("networks/data/stack_network_links.csv")

In [None]:
stack_links

In [None]:
stack_nodes = pd.read_csv("networks/data/stack_network_nodes.csv")

In [None]:
def create_nodes_links_json_from_stack_data(stack_nodes, stack_links):
    nodes = []
    links = []
    for index, row in stack_nodes.iterrows():
        nodes.append({"id": row["name"], "group": row["group"], "size": row["nodesize"]})
    for index, row in stack_links.iterrows():
        links.append({"source": row["source"], "target": row["target"], "value": row["value"]})
    return {"nodes": nodes, "links": links}

In [None]:
stack_net = create_nodes_links_json_from_stack_data(stack_nodes, stack_links)

In [14]:
def write_json_to_file(filename, data):
    with open(filename, 'w') as fp:
        json.dump(data, fp)

In [15]:
write_json_to_file("networks/data/stack_network.json", stack_net)

In [16]:
def netjson_to_gml(netjson):
    g = nx.Graph()
    for node in netjson["nodes"]:
        g.add_node(node["id"], group=node["group"], size=node["size"])
    for link in netjson["links"]:
        g.add_edge(link["source"], link["target"], value=link["value"])
    return g

In [19]:
s_g = nx.generate_gml(netjson_to_gml(stack_net))

In [22]:
nx.write_gml(netjson_to_gml(stack_net), "networks/data/stack_network.gml")

In [None]:
def gml_to_json(gml_file):
    with open(gml_file) as f:
        g = nx.read_gml(f)
    return json_graph.node_link_data(g)

In [23]:
stack_gml = nx.read_gml("networks/data/stack_network.gml")

In [25]:
def show_graph(g):
    pos = nx.spring_layout(g)
    nx.draw(g, pos, with_labels=True)
    plt.show()