In [1]:
import numpy as np
import pandas as pd

from epitools import get_data

import sys
sys.path.append("C:/Users/cnava/Repos/PolaPy")

from polapy.competitiveness import blais_lago, endersby_etal, grofman_selb, navarrete_etal as competitiveness
from polapy.polarization import navarrete_etal as polarization, esteban_ray

In [2]:
year = 2022
country = "France"
geography = "nuts_2"

# year = 2021
# country = "Chile"
# geography = "region_id"

# year = 2020
# country = "United States"
# geography = "state"

In [3]:
def get_data(
    country,
    year,
    aggregation,
    candidate="candidate",
    election="runoff",
    method="nv"
):
    output_df = pd.read_csv(
        f"data_curated/{country}/antagonism_{year}_{aggregation}_{election}_{method}.csv.gz")
    
    if method == "nv":

        output_within = output_df[output_df["type"] == "EP"].copy()
        output_between = output_df[output_df["type"] == "EC"].copy()

        output_between[aggregation] = output_between[aggregation].astype(str)
        output_within[aggregation] = output_within[aggregation].astype(str)

        output_data = pd.merge(output_between, output_within,
                                on=[candidate, aggregation])
        
        output_data["ec"] = output_data["antagonism_x"]
        output_data["ep"] = output_data["antagonism_y"]

        output_data = output_data.groupby(aggregation).agg({"ec": "sum", "ep": "sum"})
        output_data = output_data.reset_index().dropna()
        output_data["year"] = year
        output_data["year"] = output_data["year"].astype(int)

        return output_data
    
    else:
        return output_df.groupby(aggregation).agg({"value": "sum"}).reset_index()

In [4]:
get_data(
    country=country,
    year=year,
    aggregation=geography,
    election="first_round",
    method="er0.25"
)

Unnamed: 0,nuts_2,value
0,FR10,0.063461
1,FRB0,0.056623
2,FRC1,0.060729
3,FRC2,0.071393
4,FRD1,0.058096
5,FRD2,0.066575
6,FRE1,0.072401
7,FRE2,0.069787
8,FRF1,0.071395
9,FRF2,0.070614


In [5]:
output = []
for method in ["std", "nv", "er0.25", "er1.0"]: #"tw0.5", "tw0.75", 
    tmp = get_data(
        country=country,
        year=year,
        aggregation=geography,
        election="first_round",
        method=method
    )
    tmp["Method"] = method
    
    if method == "nv":
        tmp_b = tmp.copy().rename(columns={"ep": "value"})
        tmp_b["Method"] = "ep"
        tmp_c = tmp.copy().rename(columns={"ec": "value"})
        tmp_c["Method"] = "ec"
        output.append(tmp_b)
        output.append(tmp_c)

    else:
        output.append(tmp)

    
df = pd.concat(output, ignore_index=True)
if country == "United States":
    df = df[df["state"] != "DISTRICT OF COLUMBIA"]
elif country == "France":
    df = df[~df[geography].str.contains("Z")]
elif country == "Chile":
    df[geography] = df[geography].astype(str)

df.head()

Unnamed: 0,nuts_2,value,Method,ec,year,ep
0,FR10,0.46922,std,,,
1,FRB0,0.378376,std,,,
2,FRC1,0.410626,std,,,
3,FRC2,0.440087,std,,,
4,FRD1,0.356728,std,,,


In [6]:
methods = {
    "ep": "(1) EP",
    "ec": "(2) EC",
    "er0.25": "(3) Esteban-Ray (0.25)",
    "er1.0": "(4) Esteban-Ray (1)",
    # "tw0.5": "Tsui-Wang (0.5)",
    # "tw0.75": "Tsui-Wang (0.75)",
    "std": "(5) Dispersion"
}

df["Method"] = df["Method"].replace(methods)

In [7]:
dd = df.groupby([geography, "Method"]).agg({"value": "sum"}).reset_index()
dd.head()

Unnamed: 0,nuts_2,Method,value
0,FR10,(1) EP,0.306406
1,FR10,(2) EC,0.693623
2,FR10,(3) Esteban-Ray (0.25),0.063461
3,FR10,(4) Esteban-Ray (1),0.000111
4,FR10,(5) Dispersion,0.46922


In [8]:
data = dd.pivot(index=[geography], columns="Method", values="value").dropna()
data = data[methods.values()]

data.corr().reset_index()

Method,Method.1,(1) EP,(2) EC,(3) Esteban-Ray (0.25),(4) Esteban-Ray (1),(5) Dispersion
0,(1) EP,1.0,0.379685,-0.04359,-0.32982,0.674882
1,(2) EC,0.379685,1.0,-0.511717,-0.71301,0.327832
2,(3) Esteban-Ray (0.25),-0.04359,-0.511717,1.0,0.864116,0.322671
3,(4) Esteban-Ray (1),-0.32982,-0.71301,0.864116,1.0,-0.057787
4,(5) Dispersion,0.674882,0.327832,0.322671,-0.057787,1.0


In [9]:
def to_latex(input_df, caption=None):
    d = input_df.corr().reset_index().round(2)
    names = list(d)
    d = d.values

    il1 = np.tril_indices(d.shape[0])
    d[il1] = "-"

    input_df = pd.DataFrame(d, columns=[x[0:3] if x != "Method" else x for x in names])
    input_df["Method"] = names[1:]


    n = len(list(input_df))
    cols = ["l" if x == 0 else "c" for x in range(n)]
    cols = "".join(cols)

    print("\\begin{table}[H]")
    print("\\begin{center}")
    print("\\begin{tabular}"+f"{{{cols}}}")
    print("\\hline")
    print(" & ".join(list(input_df)) + " \\\\")
    print("\\hline")
    for i, row in input_df.iterrows():
        values = [str(row[x]) for x in list(input_df)]
        print(" & ".join(values) + " \\\\")

    print("\\hline")
    print("\\end{tabular}")
    print(f"\\caption{{{caption}}}")
    print("\\end{center}")
    print("\\end{table}")


to_latex(data, caption=f"Correlation matrix for measures of polarization in {country} ({year})")

\begin{table}[H]
\begin{center}
\begin{tabular}{lccccc}
\hline
Method & (1) & (2) & (3) & (4) & (5) \\
\hline
(1) EP & 1.0 & 0.38 & -0.04 & -0.33 & 0.67 \\
(2) EC & - & 1.0 & -0.51 & -0.71 & 0.33 \\
(3) Esteban-Ray (0.25) & - & - & 1.0 & 0.86 & 0.32 \\
(4) Esteban-Ray (1) & - & - & - & 1.0 & -0.06 \\
(5) Dispersion & - & - & - & - & 1.0 \\
\hline
\end{tabular}
\caption{Correlation matrix for measures of polarization in France (2022)}
\end{center}
\end{table}


: 