In [1]:
import pandas as pd
import json



In [2]:
# These define the input file names
VERTEX_DATA_FILE = "county_covid_data_OR_25-8-20_prepped.csv"
EDGE_DATA_FILE = "County edges.csv"

# Import vertex and edge data
v_df = pd.read_csv(VERTEX_DATA_FILE)
e_df = pd.read_csv(EDGE_DATA_FILE)



In [3]:
# These define the positions of various data within the v_df dataframe
CNTY_LOC = 1
DATE_LOC = 0
NCASE_LOC = 4
NDEATH_LOC = 5

# Initialize the graph
graph = {
    "or_grph_swf": {
        "Vertices": {},
        "Edges": []
    }
}



In [4]:
# Make partition of vertex set based on county
cnty_part = {}
for i in range(len(v_df.index)):
    cnty = v_df.iloc[i, CNTY_LOC]

    if cnty not in cnty_part:
        cnty_part[cnty] = []

    cnty_part[cnty].append(i)

# Make dictionary for each county
for cnty in cnty_part:
    graph["or_grph_swf"]["Vertices"][cnty] = {}

# populate county dictionaries with vertex data
for cnty in graph["or_grph_swf"]["Vertices"]:
    v_dict = graph["or_grph_swf"]["Vertices"][cnty]

    for i in range(len(cnty_part[cnty])):
        date = v_df.iloc[cnty_part[cnty][i], DATE_LOC]
        ncase = v_df.iloc[cnty_part[cnty][i], NCASE_LOC]
        ndeath = v_df.iloc[cnty_part[cnty][i], NDEATH_LOC]

        v_dict[date] = [int(ncase), int(ndeath)]

# Make edge list
# The original spreadsheets both had problems.
# The latest spreadsheet was missing items.  When I went and got the original spreadsheet
# there were repeat edges if one counts nonoriented edges.  To fix this, sort each edge, sort the edge list and then
# remove dups by 1) changing the list of lists to a list of tuples, 2) since tuples are hashable
# we can now change the list to a set, 3) change the set back to a list.



In [5]:
#The following is the nice way to do it:
broken_edge_list = [[e_df.iloc[i, 0], e_df.iloc[i, 1]] for i in range(len(e_df.index))]
nice_edge_list = list(map(sorted, broken_edge_list)) #sort the names on each edge
nice_edge_list.sort() #sort the edges
nice_edge_list = list(set(map(tuple, nice_edge_list))) #get rid of duplicates



In [6]:
graph["or_grph_swf"]["Edges"] = nice_edge_list

# I'm going to do it in a one-liner
graph["or_grph_swf"]["Edges"] = \
 list(set(map(tuple, sorted(list(map(sorted, [[e_df.iloc[i, 0], e_df.iloc[i, 1]] for i in range(len(e_df.index))]))))))



# Export dictionary of graphs
with open('or_grph_swf.txt', 'w') as file:
    json.dump(graph, file)
