In [1]:
import networkx as nx
import json
from pandas.io.json import json_normalize
from networkx.readwrite import json_graph
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy

## Net stuff

In [None]:
# parse the gml file and build the graph object
g = nx.read_gml('networks/data/karate.gml', label='id')
# create a dictionary in a node-link format that is suitable for JSON serialization
# d = json_graph.node_link_data(g)
# with open('networks/data/power_grid_westernus.json', 'w') as fp:
#     json.dump(d, fp)

In [None]:
nx.adjacency_data(g)

In [None]:
nx.draw(g, with_labels=True)

In [None]:
vote_data = pd.read_csv('networks/data/house_votes_2022_curated.csv')

In [None]:
vote_data["DATE"] = pd.to_datetime(vote_data["DATE"])

In [None]:
vote_data["DATE"] = vote_data["DATE"].dt.date

In [None]:
vote_data.sort_values(by=['DATE'], inplace=True)

In [None]:
vote_data.reset_index(drop=True, inplace=True)

In [None]:
vote_data["REPRESENTATIVE"] = vote_data["REPRESENTATIVE"].astype(str)

In [None]:
vote_data["index"] = np.arange(len(vote_data))

In [None]:
yea_nay_totals = vote_data.groupby(["BILL", "VOTE"]).count()

In [None]:
reps_voted_yea_on_same_bill = vote_data[vote_data["VOTE"] == "YEA"].groupby(["BILL", "REPRESENTATIVE", "PARTY"]).count()

In [None]:
reps_voted_nay_on_same_bill = vote_data[vote_data["VOTE"] == "NAY"].groupby(["BILL", "REPRESENTATIVE", "PARTY"]).count()

In [None]:
reps_voted_same_on_bill = vote_data.groupby(["BILL", "REPRESENTATIVE", "PARTY", "VOTE", "DATE", "BILL_NAME", "COMMITTEE", "STATE"]).count()

In [None]:
voted_same_df = reps_voted_same_on_bill.reset_index()

In [None]:
reps_voted_same_on_bill.to_csv('networks/data/reps_voted_same_on_bill.csv')

In [None]:
def create_node_link_json_from_vote_data(vote_data, reps_voted_yea_on_same_bill, reps_voted_nay_on_same_bill):
    nodes = []
    links = []
    for index, row in vote_data.iterrows():
        print(index)
        nodes.append({"id": row["REPRESENTATIVE"], "group": row["PARTY"], "vote": row["VOTE"], "bill": row["BILL"], "date": row["DATE"].strftime("%Y-%m-%d")})
    for index, row in reps_voted_yea_on_same_bill.iterrows():
        links.append({"source": index[0], "target": index[1], "value": row["index"]})
    for index, row in reps_voted_nay_on_same_bill.iterrows():
        links.append({"source": index[0], "target": index[1], "value": row["index"]})
    return {"nodes": nodes, "links": links}

In [None]:
def nodes_links_same_votes(vote_data, reps_voted_same_on_bill):
    nodes = []
    links = []
    for index, row in vote_data.iterrows():
        nodes.append({"id": row["REPRESENTATIVE"], "group": row["PARTY"], "vote": row["VOTE"], "bill": row["BILL"], "date": row["DATE"].strftime("%Y-%m-%d")})
    for index, row in reps_voted_same_on_bill.iterrows():
        links.append({"source": index[0], "target": index[1], "value": row["index"]})
    return {"nodes": nodes, "links": links}

In [None]:
net_data = nodes_links_same_votes(vote_data, reps_voted_same_on_bill)

In [None]:
net_data

In [None]:
with open('networks/data/house_votes_2022_curated.json', 'w') as fp:
    json.dump(net_data, fp)

In [None]:
def convert_yea_nay_to_binary(vote_data):
    vote_data["VOTE"] = vote_data["VOTE"].apply(lambda x: 1 if x == "YEA" else 0)
    return vote_data

In [None]:
vote_data = convert_yea_nay_to_binary(vote_data)

In [None]:
def create_same_votes_heatmap(vote_data):
    same_votes = vote_data.groupby(["BILL", "REPRESENTATIVE", "PARTY", "VOTE"]).count()
    same_votes = same_votes.reset_index()
    same_votes = same_votes.pivot(index=["REPRESENTATIVE", "PARTY"], columns="BILL", values="VOTE")
    same_votes = same_votes.fillna(0)
    return same_votes

In [None]:
s_v = create_same_votes_heatmap(vote_data)

In [None]:
values = [0]

In [None]:
samevotes_fixed = svhm[svhm['1319'] !='0']

In [None]:
samevotes_fixed = samevotes_fixed[samevotes_fixed['8404'] !='0']

In [None]:
bills_passed_samevotes = samevotes_fixed[samevotes_fixed['1319'] == samevotes_fixed['8404']]

In [None]:
samevotes_fixed = pd.read_csv('networks/data/samevotes_fixed.csv')

In [None]:
with open('networks/data/house_votes_network.json', 'w') as fp:
    json.dump(net_data, fp)

### all votes for the 1st and 2nd Sessions of the 117th Congress (House of Representatives)

In [None]:
congress_members = pd.read_csv('networks/data/congress/H117_members.csv')

In [None]:
def party_code_to_party_name_in_df_column(df, column_name):
    df[column_name] = df[column_name].apply(lambda x: "Democrat" if x == 100 else "Republican")
    return df

In [None]:
congress_mems = party_code_to_party_name_in_df_column(congress_members, "party_code")

In [None]:
def match_value_to_icpsr(df):
    # df["nomdim1"] = df["icpsr"].apply(lambda x: congress_mems[congress_mems["icpsr"] == x]["nominate_dim1"].values[0])
    # df["rep_name"] = df["icpsr"].apply(lambda x: congress_mems[congress_mems["icpsr"] == x]["bioname"].values[0])
    # df["party"] = df["icpsr"].apply(lambda x: congress_mems[congress_mems["icpsr"] == x]["party_code"].values[0])
    # df["born"] = df["icpsr"].apply(lambda x: congress_mems[congress_mems["icpsr"] == x]["born"].values[0])
    df["state"] = df["icpsr"].apply(lambda x: congress_mems[congress_mems["icpsr"] == x]["state_abbrev"].values[0])
    return df

In [None]:
data = match_value_to_icpsr(all_votes_117)

In [None]:
def get_times_reps_voted_same(votes_all_fixed):
    same_votes = votes_all_fixed.groupby(["rollnumber", "icpsr", "party", "cast_code", "rep_name", "nomdim1", "age"]).count()
    same_votes = same_votes.reset_index()
    same_votes = same_votes.pivot(index=["icpsr", "party", "rep_name", "nomdim1", "age"], columns="rollnumber", values="cast_code")
    same_votes = same_votes.fillna(0)
    return same_votes

In [None]:
d2 = get_times_reps_voted_same(data)

In [None]:
d2.sort_values(by="nomdim1", ascending=False, inplace=True)

In [None]:
v_d = pd.read_csv('networks/data/congress/same_votes.csv')

In [None]:
num_votes_per_rep = v_d.groupby(["icpsr", "party", "rep_name", "nomdim1", "age"]).count()

In [None]:
def drop_rows_with_more_than_n_zero_values(df, n):
    df = df[(df == 0).sum(axis=1) <= n]
    return df

In [None]:
v_d_f = drop_rows_with_more_than_n_zero_values(v_d, 50)

In [None]:
v_d_f.to_csv('networks/data/congress/sv_fixed_ab.csv')

### Stack Overflow data

In [3]:
stack_links = pd.read_csv("networks/data/stack_overflow/stack_network_links.csv")

In [10]:
stack_links.drop_duplicates(subset="value", keep="first", inplace=True)

In [4]:
stack_nodes = pd.read_csv("networks/data/stack_overflow/stack_network_nodes.csv")

In [11]:
stack_d = pd.read_csv("networks/data/stack_overflow/stack_data_table.csv")

In [5]:
stack_nodes

Unnamed: 0,name,group,nodesize
0,html,6,272.45
1,css,6,341.17
2,hibernate,8,29.83
3,spring,8,52.84
4,ruby,3,70.14
...,...,...,...
110,perl,13,19.38
111,cloud,9,10.66
112,photoshop,6,12.62
113,powershell,5,9.85


In [13]:
stack_d.sort_values(by="Id", ascending=False)

KeyError: 'id'

In [10]:
s_nodes 

NameError: name 's_nodes' is not defined

In [10]:
pd.set_option('display.max_rows', 200)

In [11]:
stack_nodes

Unnamed: 0,name,group,nodesize
14,javascript,6,649.16
42,java,8,610.65
46,python,1,438.67
18,php,6,361.22
1,css,6,341.17
12,c#,2,321.13
0,html,6,272.45
10,c++,1,268.11
41,android,4,229.86
15,jquery,6,208.29


In [10]:
stack_nodes.to_csv("networks/data/stack_overflow/stack_network_nodeasdasds.csv")

In [14]:
def create_nodes_links_json_from_stack_data(stack_nodes, stack_links):
    nodes = []
    links = []
    for index, row in stack_nodes.iterrows():
        nodes.append({"id": row["name"], "group": row["group"], "size": row["nodesize"]})
    for index, row in stack_links.iterrows():
        links.append({"source": row["source"], "target": row["target"], "value": row["value"]})
    return {"nodes": nodes, "links": links}

In [15]:
stack_net = create_nodes_links_json_from_stack_data(stack_nodes, stack_links)

In [16]:
def write_json_to_file(filename, data):
    with open(filename, 'w') as fp:
        json.dump(data, fp)

In [None]:
write_json_to_file("networks/data/stack_overflow/edited_s_net.json", stack_net)

In [None]:
def netjson_to_gml(netjson):
    g = nx.Graph()
    for node in netjson["nodes"]:
        g.add_node(node["id"], group=node["group"], size=node["size"])
    for link in netjson["links"]:
        g.add_edge(link["source"], link["target"], value=link["value"])
    return g

In [None]:
s_g = nx.generate_gml(netjson_to_gml(stack_net))

In [None]:
def gml_to_json(gml_file):
    with open(gml_file) as f:
        g = nx.read_gml(f)
    return json_graph.node_link_data(g)

In [None]:
stack_gml = nx.read_gml("networks/data/stack_overflow/stack_network.gml")

In [4]:
with open('networks/data/stack_overflow/stack_network.json') as s_n:
    stack_net = json.load(s_n)

In [None]:
df = pd.DataFrame.from_dict(stack_net["nodes"])

In [27]:
adj_matrix = nx.adjacency_matrix(stack_gml)

  adj_matrix = nx.adjacency_matrix(stack_gml)


In [31]:
am = pd.DataFrame(adj_matrix.todense())

In [33]:
am.to_csv("networks/data/stack_overflow/stack_adj_matrix.csv")

In [9]:
def get_node_group_link_values_from_netjson(netjson):
    nodes = []
    links = []
    for node in netjson["nodes"]:
        nodes.append(node["group"])
    for link in netjson["links"]:
        links.append(link["value"])
    return nodes, links

In [10]:
gl = get_group_link_values(stack_net)

KeyError: 'group'