# Polarization Models

In [11]:
import pandas as pd
import numpy as np
import scipy.stats as stats

%load_ext lab_black

The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black


## Load Data
Zollman and Polarisation Simulations

In [12]:
zollman = pd.read_csv("data/complete_zollman.csv")
zollman["model"] = "Zollman"

In [13]:
polarization = pd.read_csv("data/complete_polarization.csv")
polarization["model"] = "Polarization"

OConnorWeatherall

In [14]:
distance = pd.read_csv("data/complete_distance.csv")

In [15]:
# Seperate these into two
distance1 = distance.query("op == 'OConnorWeatherallSquareRootDistanceOp'").copy()
distance1["model"] = "OConnorWeatherallSquareRootDistanceOp"

In [16]:
distance2 = distance.query("op == 'OConnorWeatherallSquareDistanceOp'").copy()
distance2["model"] = "OConnorWeatherallSquareDistanceOp"

### Combine and Group Data

In [8]:
df = pd.concat([zollman, polarization, distance1, distance2], ignore_index=True)

In [9]:
df = df.fillna(0)

In [10]:
# Group the data
pr = df.groupby(["size", "trials", "epsilon", "model", "mistrust", "action"]).count()

pr = (
    pr.unstack()
    .fillna(0)
    .droplevel(0, axis=1)
    .reset_index()
    .rename_axis(None, axis=1)
    .T.drop_duplicates()
    .T
)

# Calculate not B's
pr["not_B"] = pr["A"] + pr["?"]

# Convert the types columns to integers because they were output as floats
pr["A"] = pr["A"].astype(int)
pr["B"] = pr["B"].astype(int)
pr["not_B"] = pr["not_B"].astype(int)

pr

Unnamed: 0,size,trials,epsilon,model,mistrust,?,A,B,not_B
0,2,8,0.001,Zollman,0.0,0.0,59,41,59
1,2,8,0.01,Zollman,0.0,0.0,48,52,48
2,2,8,0.1,Zollman,0.0,0.0,37,63,37
3,2,16,0.001,Zollman,0.0,0.0,47,53,47
4,2,16,0.01,Zollman,0.0,0.0,47,53,47
...,...,...,...,...,...,...,...,...,...
404,128,2048,0.01,Zollman,0.0,0.0,0,100,0
405,128,4096,0.001,Zollman,0.0,0.0,1,99,1
406,128,4096,0.01,Zollman,0.0,0.0,0,100,0
407,128,8192,0.001,Zollman,0.0,0.0,1,99,1


In [73]:
# Create total column
pr["total"] = pr["not_B"] + pr["B"]

# Calculate percentages
pr["converged"] = (pr["B"] / pr["total"]) * 100

# Change 0 to NA for Zollman mistrust
pr.loc[pr["model"] == "Zollman", "mistrust"] = "NA"

pr

Unnamed: 0,size,trials,epsilon,model,mistrust,?,A,B,not_B,total,converged
0,2,8,0.001,Zollman,,0.0,59,41,59,100,41.0
1,2,8,0.01,Zollman,,0.0,48,52,48,100,52.0
2,2,8,0.1,Zollman,,0.0,37,63,37,100,63.0
3,2,16,0.001,Zollman,,0.0,47,53,47,100,53.0
4,2,16,0.01,Zollman,,0.0,47,53,47,100,53.0
...,...,...,...,...,...,...,...,...,...,...,...
404,128,2048,0.01,Zollman,,0.0,0,100,0,100,100.0
405,128,4096,0.001,Zollman,,0.0,1,99,1,100,99.0
406,128,4096,0.01,Zollman,,0.0,0,100,0,100,100.0
407,128,8192,0.001,Zollman,,0.0,1,99,1,100,99.0


In [74]:
pr.to_csv("zollman_pol.csv", index=False)

## Polarization Mistrust > 1

### Chi2 Tests - Proportions

In [75]:
pr.model.unique()

array(['Zollman', 'Polarization', 'OConnorWeatherallSquareDistanceOp',
       'OConnorWeatherallSquareRootDistanceOp'], dtype=object)

In [92]:
zollman = pr[pr.model == "Zollman"]
polarization_not_0 = pr[
    (pr.model == "Polarization") & (pr.mistrust != 0.0) & (pr.mistrust != 1.0)
]

In [93]:
# Dataframe to store Chi2 results
chi2_results = pd.DataFrame()

# Group networks by network size
for size_name, size_group in polarization_not_0.groupby("size"):
    # Group by trails size
    for trials_name, trials_group in size_group.groupby("trials"):
        # Group by epsilon
        for eps_name, eps_group in trials_group.groupby("epsilon"):
            # Group by mistrust
            for mis_name, mis_group in eps_group.groupby("mistrust"):
                # Get matching data from Zollman
                test_data = zollman.query(
                    "size == @size_name &"
                    "trials == @trials_name &"
                    "epsilon == @eps_name"
                ).copy()

                if not test_data.empty:
                    try:
                        # Calculate Chi2
                        chi2, p, dof, expected = stats.chi2_contingency(
                            [
                                [mis_group["B"].iloc[0], mis_group["not_B"].iloc[0]],
                                [test_data["B"].iloc[0], test_data["not_B"].iloc[0]],
                            ]
                        )
                    except:
                        p = None
                        chi2 = None

                    _ = pd.DataFrame(
                        {
                            "size": [size_name],
                            "trials": [trials_name],
                            "epsilon": [eps_name],
                            "polarization_mistrust": [mis_name],
                            "not_B": [mis_group["not_B"].iloc[0]],
                            "B": [mis_group["B"].iloc[0]],
                            "zollman_not_B": [test_data["not_B"].iloc[0]],
                            "zollman_B": [test_data["B"].iloc[0]],
                            "chi2": [chi2],
                            "pvalue": [p],
                        }
                    )

                    # Append to results dataframe
                    chi2_results = pd.concat([chi2_results, _], ignore_index=True)

In [97]:
chi2_results.style.hide_index()

  chi2_results.style.hide_index()


size,trials,epsilon,polarization_mistrust,not_B,B,zollman_not_B,zollman_B,chi2,pvalue
4,4,0.001,1.1,180,280,151,349,8.06727,0.004507
4,4,0.001,1.5,299,161,151,349,115.119861,0.0
4,16,0.001,1.1,187,273,178,422,13.435695,0.000247
4,16,0.001,1.5,307,153,178,422,142.691278,0.0
4,16,0.01,1.1,163,297,27,73,2.244295,0.134108
4,16,0.01,1.5,294,166,27,73,44.254782,0.0
4,16,0.1,1.1,63,397,23,77,4.7783,0.028821
4,16,0.1,1.5,219,241,23,77,19.280836,1.1e-05
4,64,0.001,1.1,156,304,182,418,1.375834,0.240812
4,64,0.001,1.5,290,170,182,418,111.467015,0.0


### Mann Whitney - Steps

In [98]:
zollman = pd.read_csv("data/complete_zollman.csv")
zollman["model"] = "Zollman"
zollman = zollman[zollman.action == "B"]

polarization_not_0 = polarization[
    (polarization.model == "Polarization")
    & (polarization.mistrust != 0.0)
    & (polarization.mistrust != 1.0)
    & (polarization.action == "B")
]

In [99]:
# Dataframe to store results
mwu_results = pd.DataFrame()

# Group networks by network size
for size_name, size_group in polarization_not_0.groupby("size"):
    # Group by trails size
    for trials_name, trials_group in size_group.groupby("trials"):
        # Group by epsilon
        for eps_name, eps_group in trials_group.groupby("epsilon"):
            # Group by mistrust
            for mis_name, mis_group in eps_group.groupby("mistrust"):
                # Get matching data from Zollman
                test_data = zollman.query(
                    "size == @size_name &"
                    "trials == @trials_name &"
                    "epsilon == @eps_name"
                ).copy()

                if not test_data.empty:
                    # If we have steps for both the current network and a network to test against run the test
                    if len(mis_group["steps"]) > 0 and len(test_data["steps"]) > 0:
                        # Calculate Mann Whitney U Test
                        stat, pvalue = stats.mannwhitneyu(
                            mis_group["steps"], test_data["steps"]
                        )

                        _ = pd.DataFrame(
                            {
                                "size": [size_name],
                                "trials": [trials_name],
                                "epsilon": [eps_name],
                                "polarization_mistrust": [mis_name],
                                "polarization_count": [len(mis_group)],
                                "mean_steps": [np.mean(mis_group["steps"])],
                                "zollman_count": [len(test_data)],
                                "zollman_mean_steps": [np.mean(test_data["steps"])],
                                "stat": [stat],
                                "pvalue": [pvalue],
                            }
                        )

                        # Append to results dataframe
                        mwu_results = pd.concat([mwu_results, _], ignore_index=True)

In [100]:
mwu_results.style.hide_index()

  mwu_results.style.hide_index()


size,trials,epsilon,polarization_mistrust,polarization_count,mean_steps,zollman_count,zollman_mean_steps,stat,pvalue
4,4,0.001,1.1,280,128847.778571,349,59330.968481,72570.5,0.0
4,4,0.001,1.5,161,118857.434783,349,59330.968481,38103.5,0.0
4,16,0.001,1.1,273,30829.190476,422,14670.796209,81441.5,0.0
4,16,0.001,1.5,153,29849.588235,422,14670.796209,42569.5,0.0
4,16,0.01,1.1,297,306.296296,73,154.136986,15515.0,0.0
4,16,0.01,1.5,166,295.861446,73,154.136986,7909.0,0.000172
4,16,0.1,1.1,397,4.365239,77,2.415584,23720.5,0.0
4,16,0.1,1.5,241,4.605809,77,2.415584,14643.5,0.0
4,64,0.001,1.1,304,7214.878289,418,3802.253589,87067.0,0.0
4,64,0.001,1.5,170,5833.052941,418,3802.253589,45134.0,0.0


## Polarization Mistrust = 1, Sqr Root, Sqr Distance

### Chi2 Test - Proportions

In [82]:
pr.model.unique()

array(['Zollman', 'Polarization', 'OConnorWeatherallSquareDistanceOp',
       'OConnorWeatherallSquareRootDistanceOp'], dtype=object)

In [101]:
# Gather data
zollman = pr[pr.model == "Zollman"]

polarization_1 = pr[(pr.model == "Polarization") & (pr.mistrust == 1.0)].copy()

square = pr[pr.model == "OConnorWeatherallSquareDistanceOp"]
square = pd.concat(
    [square, pr[pr.model == "OConnorWeatherallSquareRootDistanceOp"]],
    ignore_index=True,
)

In [102]:
# Dataframe to store Chi2 results
chi2_results = pd.DataFrame()

# Group networks by network size
for size_name, size_group in polarization_1.groupby("size"):
    # Group by trails size
    for trials_name, trials_group in size_group.groupby("trials"):
        # Group by epsilon
        for eps_name, eps_group in trials_group.groupby("epsilon"):
            # Group by mistrust
            for mis_name, mis_group in eps_group.groupby("mistrust"):
                # Get matching data from Zollman
                test_data = zollman.query(
                    "size == @size_name &"
                    "trials == @trials_name &"
                    "epsilon == @eps_name"
                ).copy()

                if not test_data.empty:
                    try:
                        # Calculate Chi2
                        chi2, p, dof, expected = stats.chi2_contingency(
                            [
                                [mis_group["B"].iloc[0], mis_group["not_B"].iloc[0]],
                                [test_data["B"].iloc[0], test_data["not_B"].iloc[0]],
                            ]
                        )
                    except:
                        p = None
                        chi2 = None

                    _ = pd.DataFrame(
                        {
                            "model": ["Polarization"],
                            "size": [size_name],
                            "trials": [trials_name],
                            "epsilon": [eps_name],
                            "mistrust": [mis_name],
                            "not_B": [mis_group["not_B"].iloc[0]],
                            "B": [mis_group["B"].iloc[0]],
                            "zollman_not_B": [test_data["not_B"].iloc[0]],
                            "zollman_B": [test_data["B"].iloc[0]],
                            "chi2": [chi2],
                            "pvalue": [p],
                        }
                    )

                    # Append to results dataframe
                    chi2_results = pd.concat([chi2_results, _], ignore_index=True)

In [103]:
# Group by model
for model_name, model_group in square.groupby("model"):
    # Group networks by network size
    for size_name, size_group in model_group.groupby("size"):
        # Group by trails size
        for trials_name, trials_group in size_group.groupby("trials"):
            # Group by epsilon
            for eps_name, eps_group in trials_group.groupby("epsilon"):
                # Group by mistrust
                for mis_name, mis_group in eps_group.groupby("mistrust"):
                    # Get matching data from Zollman
                    test_data = zollman.query(
                        "size == @size_name &"
                        "trials == @trials_name &"
                        "epsilon == @eps_name"
                    ).copy()

                    if not test_data.empty:
                        try:
                            # Calculate Chi2
                            chi2, p, dof, expected = stats.chi2_contingency(
                                [
                                    [
                                        mis_group["B"].iloc[0],
                                        mis_group["not_B"].iloc[0],
                                    ],
                                    [
                                        test_data["B"].iloc[0],
                                        test_data["not_B"].iloc[0],
                                    ],
                                ]
                            )
                        except:
                            p = None
                            chi2 = None

                        _ = pd.DataFrame(
                            {
                                "model": [model_name],
                                "size": [size_name],
                                "trials": [trials_name],
                                "epsilon": [eps_name],
                                "mistrust": [mis_name],
                                "not_B": [mis_group["not_B"].iloc[0]],
                                "B": [mis_group["B"].iloc[0]],
                                "zollman_not_B": [test_data["not_B"].iloc[0]],
                                "zollman_B": [test_data["B"].iloc[0]],
                                "chi2": [chi2],
                                "pvalue": [p],
                            }
                        )

                        # Append to results dataframe
                        chi2_results = pd.concat([chi2_results, _], ignore_index=True)

In [104]:
chi2_results.style.hide_index()

  chi2_results.style.hide_index()


model,size,trials,epsilon,mistrust,not_B,B,zollman_not_B,zollman_B,chi2,pvalue
Polarization,4,4,0.001,1.0,135,325,151,349,0.047429,0.8276
Polarization,4,16,0.001,1.0,139,321,178,422,0.015982,0.899401
Polarization,4,16,0.01,1.0,118,342,27,73,0.023387,0.878455
Polarization,4,16,0.1,1.0,45,415,23,77,12.240867,0.000468
Polarization,4,64,0.001,1.0,145,315,182,418,0.121174,0.727764
Polarization,4,64,0.01,1.0,114,346,24,76,0.001338,0.970822
Polarization,4,64,0.1,1.0,35,425,6,94,0.121058,0.727889
Polarization,16,4,0.001,1.0,31,429,37,463,0.074428,0.784995
Polarization,16,16,0.001,1.0,30,430,42,558,0.033695,0.854358
Polarization,16,16,0.01,1.0,30,430,7,93,0.0,1.0


### Mann-Whitney Test - Steps

In [110]:
zollman = pd.read_csv("data/complete_zollman.csv")
zollman["model"] = "Zollman"
zollman = zollman[zollman.action == "B"]

polarization_1 = polarization[
    (polarization.model == "Polarization")
    & (polarization.mistrust == 1.0)
    & (polarization.action == "B")
]

square = pd.concat(
    [distance1[distance1.action == "B"], distance2[distance2.action == "B"]],
    ignore_index=True,
)

In [111]:
# Dataframe to store Chi2 results
mwu_results = pd.DataFrame()

# Group networks by network size
for size_name, size_group in polarization_1.groupby("size"):
    # Group by trails size
    for trials_name, trials_group in size_group.groupby("trials"):
        # Group by epsilon
        for eps_name, eps_group in trials_group.groupby("epsilon"):
            # Group by mistrust
            for mis_name, mis_group in eps_group.groupby("mistrust"):
                # Get matching data from Zollman
                test_data = zollman.query(
                    "size == @size_name &"
                    "trials == @trials_name &"
                    "epsilon == @eps_name"
                ).copy()

                if not test_data.empty:
                    # If we have steps for both the current network and a network to test against run the test
                    if len(mis_group["steps"]) > 0 and len(test_data["steps"]) > 0:
                        # Calculate Mann Whitney U Test
                        stat, pvalue = stats.mannwhitneyu(
                            mis_group["steps"], test_data["steps"]
                        )

                        _ = pd.DataFrame(
                            {
                                "model": ["Polarization"],
                                "size": [size_name],
                                "trials": [trials_name],
                                "epsilon": [eps_name],
                                "polarization_mistrust": [mis_name],
                                "polarization_count": [len(mis_group)],
                                "mean_steps": [np.mean(mis_group["steps"])],
                                "zollman_count": [len(test_data)],
                                "zollman_mean_steps": [np.mean(test_data["steps"])],
                                "stat": [stat],
                                "pvalue": [pvalue],
                            }
                        )

                        # Append to results dataframe
                        mwu_results = pd.concat([mwu_results, _], ignore_index=True)

In [112]:
# Group by model
for model_name, model_group in square.groupby("model"):
    # Group networks by network size
    for size_name, size_group in model_group.groupby("size"):
        # Group by trails size
        for trials_name, trials_group in size_group.groupby("trials"):
            # Group by epsilon
            for eps_name, eps_group in trials_group.groupby("epsilon"):
                # Group by mistrust
                for mis_name, mis_group in eps_group.groupby("mistrust"):
                    # Get matching data from Zollman
                    test_data = zollman.query(
                        "size == @size_name &"
                        "trials == @trials_name &"
                        "epsilon == @eps_name"
                    ).copy()

                    if not test_data.empty:
                        # If we have steps for both the current network and a network to test against run the test
                        if len(mis_group["steps"]) > 0 and len(test_data["steps"]) > 0:
                            # Calculate Mann Whitney U Test
                            stat, pvalue = stats.mannwhitneyu(
                                mis_group["steps"], test_data["steps"]
                            )

                            _ = pd.DataFrame(
                                {
                                    "model": [model_name],
                                    "size": [size_name],
                                    "trials": [trials_name],
                                    "epsilon": [eps_name],
                                    "polarization_mistrust": [mis_name],
                                    "polarization_count": [len(mis_group)],
                                    "mean_steps": [np.mean(mis_group["steps"])],
                                    "zollman_count": [len(test_data)],
                                    "zollman_mean_steps": [np.mean(test_data["steps"])],
                                    "stat": [stat],
                                    "pvalue": [pvalue],
                                }
                            )

                            # Append to results dataframe
                            mwu_results = pd.concat([mwu_results, _], ignore_index=True)

In [115]:
mwu_results.style.hide_index()

  mwu_results.style.hide_index()


model,size,trials,epsilon,polarization_mistrust,polarization_count,mean_steps,zollman_count,zollman_mean_steps,stat,pvalue
Polarization,4,4,0.001,1.0,325,129776.618462,349,59330.968481,82568.5,0.0
Polarization,4,16,0.001,1.0,321,31820.380062,422,14670.796209,94438.5,0.0
Polarization,4,16,0.01,1.0,342,361.994152,73,154.136986,17134.5,1e-06
Polarization,4,16,0.1,1.0,415,4.156627,77,2.415584,24512.0,0.0
Polarization,4,64,0.001,1.0,315,7941.726984,418,3802.253589,92550.0,0.0
Polarization,4,64,0.01,1.0,346,99.976879,76,39.052632,19005.5,0.0
Polarization,4,64,0.1,1.0,425,2.167059,94,1.180851,34380.0,0.0
Polarization,16,4,0.001,1.0,429,63539.634033,463,22135.591793,158682.5,0.0
Polarization,16,16,0.001,1.0,430,14648.95814,558,5121.329749,193902.0,0.0
Polarization,16,16,0.01,1.0,430,128.95814,93,53.11828,31830.5,0.0
