In [1]:
import os, sys
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

# Vineyard functions
from VineyardFinal import vineyard_from_pds, Get_Adjacency_Persistence, Homotopy, Get_W_Infinity, vdist, min_vc, fD, fL

# Matrix Operations
import numpy as np
import pandas as pd

# misc
from tqdm import tqdm

# tda
import gudhi
import gudhi.hera

# Geospatial and graphs
import geopandas as gpd
from gerrychain import Graph
import networkx as nx

import warnings
warnings.filterwarnings('ignore', message='Found islands', category=UserWarning) # Getting rid of pesky degree-0 graph warnings 

INFINITY = 1e6
n = 100

## Load Data

In [2]:
state_dict = {
    '01': {'name': 'Alabama'},
    '02': {'name': 'Alaska'},
    '04': {'name': 'Arizona'},
    '05': {'name': 'Arkansas'},
    '06': {'name': 'California'},
    '08': {'name': 'Colorado'},
    '09': {'name': 'Connecticut'},
    '10': {'name': 'Delaware'},
    '11': {'name': 'District of Columbia'},
    '12': {'name': 'Florida'},
    '13': {'name': 'Georgia'},
    '15': {'name': 'Hawaii'},
    '16': {'name': 'Idaho'},
    '17': {'name': 'Illinois'},
    '18': {'name': 'Indiana'},
    '19': {'name': 'Iowa'},
    '20': {'name': 'Kansas'},
    '21': {'name': 'Kentucky'},
    '22': {'name': 'Louisiana'},
    '23': {'name': 'Maine'},
    '24': {'name': 'Maryland'},
    '25': {'name': 'Massachusetts'},
    '26': {'name': 'Michigan'},
    '27': {'name': 'Minnesota'},
    '28': {'name': 'Mississippi'},
    '29': {'name': 'Missouri'},
    '30': {'name': 'Montana'},
    '31': {'name': 'Nebraska'},
    '32': {'name': 'Nevada'},
    '33': {'name': 'New Hampshire'},
    '34': {'name': 'New Jersey'},
    '35': {'name': 'New Mexico'},
    '36': {'name': 'New York'},
    '37': {'name': 'North Carolina'},
    '38': {'name': 'North Dakota'},
    '39': {'name': 'Ohio'},
    '40': {'name': 'Oklahoma'},
    '41': {'name': 'Oregon'},
    '42': {'name': 'Pennsylvania'},
    '44': {'name': 'Rhode Island'},
    '45': {'name': 'South Carolina'},
    '46': {'name': 'South Dakota'},
    '47': {'name': 'Tennessee'},
    '48': {'name': 'Texas'},
    '49': {'name': 'Utah'},
    '50': {'name': 'Vermont'},
    '51': {'name': 'Virginia'},
    '53': {'name': 'Washington'},
    '54': {'name': 'West Virginia'},
    '55': {'name': 'Wisconsin'},
    '56': {'name': 'Wyoming'}
}

graph_folder = '../dataIn/elections/state_graphs_2020'
graphs = [Graph.from_json(f'{graph_folder}/{f}') for f in os.listdir(graph_folder) if f.endswith('.json')]


## Calculate Metrics

In [3]:
###### Analsysis Params ###### 
elections = [2000, 2004, 2008, 2012, 2016, 2020]
demographic = 'BVAP'
# length of homotopy
t = 100

###### Out Dir ###### 

outDir = '../dataOut'
os.makedirs(outDir, exist_ok = True)

cols = ['State', 'StateFP', 'Election_Year', 'w', 'w1', 'v', 'mvc', 'l1', 'l1w', 'l2',
     'l2w', 'linf', 'linfw', 'r', 'r^2', 
     'BVAP%', 'Dem%', 'REP%', 'Votes', 'VAP','Winner']

df_out = pd.DataFrame(columns = cols)

for G in tqdm(graphs, desc = "Processing States"):
    n = list(G.nodes)[0]
    stateFP = G.nodes[n].get('STATEFP')
    
    for election in elections:
        # Index of DataFrame is {stateFP}{Election Year}
        idx = f'{stateFP}{election}'

        # State name and voting age population
        state_name = state_dict[stateFP]['name']
        VAP = np.sum([G.nodes[v]["VAP"] for v in G.nodes])


###### Node Values ###### 
        # Democrat%
        D = np.array(list((nx.get_node_attributes(G,f'{election}_D').values())))
        # Republican%
        R = np.array(list((nx.get_node_attributes(G,f'{election}_R').values())))
        # Total Votes
        V = np.array(list((nx.get_node_attributes(G,f'{election}_V').values())))
        # Demographic%
        B = np.nan_to_num(np.array(list((nx.get_node_attributes(G,demographic).values()))))
        # VAP
        VAP = np.array(list((nx.get_node_attributes(G,'VAP').values())))

###### Compute Vineyard ###### 

        # Persistence diagrams
        PD0 = []

        # Straight Line Homotopy
        Hs = Homotopy(D,B, t = 50)
        for h in Hs:
            for i, v in enumerate(G.nodes()):
                G.nodes[v]['Homotopy'] = h[i]
            PD0.append(Get_Adjacency_Persistence(G,'Homotopy', dimension = 0, popCol = "VAP", popMin = 10))

###### Calculate Metrics ###### 
        # Wasserstein Distance
        w = Get_W_Infinity(PD0[0], PD0[-1])
        # 1-Wasserstein Distance
        w1 = gudhi.hera.wasserstein_distance(PD0[0], PD0[-1])
        # Vineyard Distance (F(D) = D, F(L) = 1)
        v = vdist(vineyard_from_pds(PD0), fD, fL)
        # MVC 
        mvc = min_vc(PD0[0], PD0[-1])
        #l1 Norm
        l1 = np.linalg.norm(D - B, ord = 1)
        # l2 Norm
        l2 = np.linalg.norm(D - B, ord = 2)
        #Linf Norm
        linf = np.linalg.norm(D - B, ord = np.inf)
        # Weighted l1 Norm
        l1w = l1 / len(G)
        # Weighted L2 Norm
        l2w = l2 / len(G)
        # Weighted Linf Norm
        linfw = linf / len(G)
        # Pearson Correlation
        r = np.corrcoef(D,B)[0,1]
        # R^2
        r2 = r ** 2
        # Total Votes
        tV = np.sum(V)
        # Democrat %
        dP = np.sum(D * V) / tV * 100.0 
        # Republican %
        rP = np.sum(R * V) / tV * 100.0 
        # Total VAP
        tVAP = np.sum(VAP)
        # Demographic Percentage
        bP = np.sum(B * VAP) / tVAP * 100.0
        # winner
        if dP > rP:
            winner = "Democrat"
        else:
            winner = "Republican"
###### Store Data ###### 

        data = pd.Series([state_name, stateFP, election, w, w1, v, mvc, l1, l1w, l2, l2w, linf, linfw, r, r2, bP, dP, rP, tV, tVAP, winner])
        
        df_out.loc[idx] = pd.Series({
            f'{col}' : d for col, d in zip(cols, data)
        })
        print(f'{state_name}-{election} is done')

###### Save ############################################################################## 
# Index is {StateFP}{ElectionYear}
df_out.index.name = '[StateFP][ElectionYear]'
df_out.to_csv(os.path.join(outDir, 'election_metrics.csv'), index = True)
print(f"Metric calculation complete. \nResults saved to {outDir}/election_metrics.csv")

Processing States:   0%|          | 0/48 [00:00<?, ?it/s]

Texas-2000 is done
Texas-2004 is done
Texas-2008 is done
Texas-2012 is done


Processing States:   2%|▏         | 1/48 [00:01<00:49,  1.05s/it]

Texas-2016 is done
Texas-2020 is done
Idaho-2000 is done
Idaho-2004 is done


Processing States:   4%|▍         | 2/48 [00:01<00:22,  2.00it/s]

Idaho-2008 is done
Idaho-2012 is done
Idaho-2016 is done
Idaho-2020 is done
Nebraska-2000 is done
Nebraska-2004 is done
Nebraska-2008 is done


Processing States:   6%|▋         | 3/48 [00:01<00:18,  2.43it/s]

Nebraska-2012 is done
Nebraska-2016 is done
Nebraska-2020 is done
Massachusetts-2000 is done
Massachusetts-2004 is done
Massachusetts-2008 is done
Massachusetts-2012 is done


Processing States:  10%|█         | 5/48 [00:01<00:08,  4.79it/s]

Massachusetts-2016 is done
Massachusetts-2020 is done
Arizona-2000 is done
Arizona-2004 is done
Arizona-2008 is done
Arizona-2012 is done
Arizona-2016 is done
Arizona-2020 is done
Georgia-2000 is done
Georgia-2004 is done
Georgia-2008 is done
Georgia-2012 is done
Georgia-2016 is done
Georgia-2020 is done


Processing States:  15%|█▍        | 7/48 [00:02<00:10,  3.92it/s]

Wisconsin-2000 is done
Wisconsin-2004 is done
Wisconsin-2008 is done
Wisconsin-2012 is done
Wisconsin-2016 is done
Wisconsin-2020 is done
New Hampshire-2000 is done
New Hampshire-2004 is done


Processing States:  19%|█▉        | 9/48 [00:02<00:07,  5.51it/s]

New Hampshire-2008 is done
New Hampshire-2012 is done
New Hampshire-2016 is done
New Hampshire-2020 is done
Wyoming-2000 is done
Wyoming-2004 is done
Wyoming-2008 is done
Wyoming-2012 is done
Wyoming-2016 is done
Wyoming-2020 is done
Kansas-2000 is done
Kansas-2004 is done
Kansas-2008 is done
Kansas-2012 is done
Kansas-2016 is done
Kansas-2020 is done
Missouri-2000 is done
Missouri-2004 is done
Missouri-2008 is done


Processing States:  23%|██▎       | 11/48 [00:03<00:09,  4.03it/s]

Missouri-2012 is done
Missouri-2016 is done
Missouri-2020 is done
Maine-2000 is done
Maine-2004 is done
Maine-2008 is done
Maine-2012 is done
Maine-2016 is done
Maine-2020 is done
Iowa-2000 is done
Iowa-2004 is done
Iowa-2008 is done
Iowa-2012 is done


Processing States:  27%|██▋       | 13/48 [00:03<00:08,  4.31it/s]

Iowa-2016 is done
Iowa-2020 is done
Arkansas-2000 is done
Arkansas-2004 is done
Arkansas-2008 is done
Arkansas-2012 is done


Processing States:  31%|███▏      | 15/48 [00:03<00:06,  4.75it/s]

Arkansas-2016 is done
Arkansas-2020 is done
Colorado-2000 is done
Colorado-2004 is done
Colorado-2008 is done
Colorado-2012 is done
Colorado-2016 is done
Colorado-2020 is done


Processing States:  33%|███▎      | 16/48 [00:04<00:06,  4.79it/s]

Alabama-2000 is done
Alabama-2004 is done
Alabama-2008 is done
Alabama-2012 is done
Alabama-2016 is done
Alabama-2020 is done
West Virginia-2000 is done


Processing States:  35%|███▌      | 17/48 [00:04<00:06,  5.03it/s]

West Virginia-2004 is done
West Virginia-2008 is done
West Virginia-2012 is done
West Virginia-2016 is done
West Virginia-2020 is done
New Jersey-2000 is done
New Jersey-2004 is done
New Jersey-2008 is done
New Jersey-2012 is done
New Jersey-2016 is done
New Jersey-2020 is done
Virginia-2000 is done
Virginia-2004 is done
Virginia-2008 is done
Virginia-2012 is done


Processing States:  40%|███▉      | 19/48 [00:05<00:09,  3.09it/s]

Virginia-2016 is done
Virginia-2020 is done
California-2000 is done


Processing States:  42%|████▏     | 20/48 [00:05<00:07,  3.55it/s]

California-2004 is done
California-2008 is done
California-2012 is done
California-2016 is done
California-2020 is done
Maryland-2000 is done
Maryland-2004 is done
Maryland-2008 is done
Maryland-2012 is done
Maryland-2016 is done
Maryland-2020 is done
Pennsylvania-2000 is done


Processing States:  46%|████▌     | 22/48 [00:05<00:05,  4.59it/s]

Pennsylvania-2004 is done
Pennsylvania-2008 is done
Pennsylvania-2012 is done
Pennsylvania-2016 is done
Pennsylvania-2020 is done
Connecticut-2000 is done
Connecticut-2004 is done
Connecticut-2008 is done
Connecticut-2012 is done
Connecticut-2016 is done
Connecticut-2020 is done
Rhode Island-2000 is done
Rhode Island-2004 is done
Rhode Island-2008 is done
Rhode Island-2012 is done
Rhode Island-2016 is done
Rhode Island-2020 is done
Washington-2000 is done
Washington-2004 is done


Processing States:  52%|█████▏    | 25/48 [00:05<00:03,  6.96it/s]

Washington-2008 is done
Washington-2012 is done
Washington-2016 is done
Washington-2020 is done
Michigan-2000 is done
Michigan-2004 is done
Michigan-2008 is done


Processing States:  54%|█████▍    | 26/48 [00:06<00:03,  6.08it/s]

Michigan-2012 is done
Michigan-2016 is done
Michigan-2020 is done
Kentucky-2000 is done
Kentucky-2004 is done
Kentucky-2008 is done
Kentucky-2012 is done
Kentucky-2016 is done


Processing States:  56%|█████▋    | 27/48 [00:06<00:04,  4.56it/s]

Kentucky-2020 is done
Delaware-2000 is done
Delaware-2004 is done
Delaware-2008 is done
Delaware-2012 is done
Delaware-2016 is done
Delaware-2020 is done
Indiana-2000 is done
Indiana-2004 is done
Indiana-2008 is done


Processing States:  60%|██████    | 29/48 [00:06<00:03,  5.14it/s]

Indiana-2012 is done
Indiana-2016 is done
Indiana-2020 is done
New Mexico-2000 is done
New Mexico-2004 is done
New Mexico-2008 is done
New Mexico-2012 is done
New Mexico-2016 is done
New Mexico-2020 is done


Processing States:  65%|██████▍   | 31/48 [00:07<00:02,  5.75it/s]

Florida-2000 is done
Florida-2004 is done
Florida-2008 is done
Florida-2012 is done
Florida-2016 is done
Florida-2020 is done
Oklahoma-2000 is done


Processing States:  67%|██████▋   | 32/48 [00:07<00:02,  5.45it/s]

Oklahoma-2004 is done
Oklahoma-2008 is done
Oklahoma-2012 is done
Oklahoma-2016 is done
Oklahoma-2020 is done
South Dakota-2000 is done
South Dakota-2004 is done


Processing States:  69%|██████▉   | 33/48 [00:07<00:02,  5.49it/s]

South Dakota-2008 is done
South Dakota-2012 is done
South Dakota-2016 is done
South Dakota-2020 is done
Vermont-2000 is done
Vermont-2004 is done
Vermont-2008 is done
Vermont-2012 is done
Vermont-2016 is done
Vermont-2020 is done
Utah-2000 is done
Utah-2004 is done
Utah-2008 is done
Utah-2012 is done


Processing States:  75%|███████▌  | 36/48 [00:07<00:01,  7.23it/s]

Utah-2016 is done
Utah-2020 is done
North Dakota-2000 is done
North Dakota-2004 is done
North Dakota-2008 is done
North Dakota-2012 is done
North Dakota-2016 is done
North Dakota-2020 is done
Minnesota-2000 is done


Processing States:  77%|███████▋  | 37/48 [00:07<00:01,  6.21it/s]

Minnesota-2004 is done
Minnesota-2008 is done
Minnesota-2012 is done
Minnesota-2016 is done
Minnesota-2020 is done
Montana-2000 is done


Processing States:  79%|███████▉  | 38/48 [00:08<00:01,  6.27it/s]

Montana-2004 is done
Montana-2008 is done
Montana-2012 is done
Montana-2016 is done
Montana-2020 is done
Tennessee-2000 is done
Tennessee-2004 is done
Tennessee-2008 is done
Tennessee-2012 is done
Tennessee-2016 is done


Processing States:  85%|████████▌ | 41/48 [00:08<00:01,  6.50it/s]

Tennessee-2020 is done
Nevada-2000 is done
Nevada-2004 is done
Nevada-2008 is done
Nevada-2012 is done
Nevada-2016 is done
Nevada-2020 is done
South Carolina-2000 is done
South Carolina-2004 is done
South Carolina-2008 is done
South Carolina-2012 is done
South Carolina-2016 is done
South Carolina-2020 is done
Mississippi-2000 is done
Mississippi-2004 is done
Mississippi-2008 is done
Mississippi-2012 is done
Mississippi-2016 is done


Processing States:  90%|████████▉ | 43/48 [00:09<00:00,  5.42it/s]

Mississippi-2020 is done
New York-2000 is done
New York-2004 is done
New York-2008 is done
New York-2012 is done
New York-2016 is done
New York-2020 is done
Illinois-2000 is done
Illinois-2004 is done
Illinois-2008 is done
Illinois-2012 is done
Illinois-2016 is done


Processing States:  92%|█████████▏| 44/48 [00:09<00:00,  4.71it/s]

Illinois-2020 is done
Oregon-2000 is done
Oregon-2004 is done
Oregon-2008 is done
Oregon-2012 is done
Oregon-2016 is done
Oregon-2020 is done
Louisiana-2000 is done
Louisiana-2004 is done
Louisiana-2008 is done


Processing States:  96%|█████████▌| 46/48 [00:09<00:00,  5.46it/s]

Louisiana-2012 is done
Louisiana-2016 is done
Louisiana-2020 is done
Ohio-2000 is done
Ohio-2004 is done


Processing States:  98%|█████████▊| 47/48 [00:09<00:00,  4.68it/s]

Ohio-2008 is done
Ohio-2012 is done
Ohio-2016 is done
Ohio-2020 is done
North Carolina-2000 is done


Processing States: 100%|██████████| 48/48 [00:10<00:00,  4.68it/s]

North Carolina-2004 is done
North Carolina-2008 is done
North Carolina-2012 is done
North Carolina-2016 is done
North Carolina-2020 is done
Metric calculation complete. 
Results saved to ../dataOut/election_metrics.csv





In [4]:
df_1 = pd.read_csv('../../PaperCodes/DataOut/Election_Metrics_P.csv')
df_2 = pd.read_csv('../dataOut/election_metrics.csv')
print(df_1.head())
print(df_2.head())



   [StateFP][ElectionYear]  State  StateFP  Election_Year         w        w1  \
0                   482000  Texas       48           2000  0.430731  2.592206   
1                   482004  Texas       48           2004  0.385406  2.548228   
2                   482008  Texas       48           2008  0.482466  2.702160   
3                   482012  Texas       48           2012  0.500909  2.624320   
4                   482016  Texas       48           2016  0.428717  2.566622   

          v       mvc         l1       l1w  ...      linf     linfw         R  \
0  0.485710  0.159006  67.117461  0.264242  ...  0.773962  0.003047  0.173609   
1  0.462384  0.158916  58.595791  0.230692  ...  0.743589  0.002928  0.228178   
2  0.545844  0.193575  62.744125  0.247024  ...  0.844335  0.003324  0.104491   
3  0.542655  0.203138  54.363558  0.214030  ...  0.862778  0.003397  0.089062   
4  0.529528  0.207460  47.361260  0.186462  ...  0.790586  0.003113  0.075331   

        R^2      BVAP%    