This is a tiny amount of scaffolding around the directed homophilic preferential attachment code written by Lisette Espin and others ([available here](https://github.com/gesiscss/Homophilic_Directed_ScaleFree_Networks)). In particular, we replicate a simple case where homophily within groups can lead to inequities between groups, defined as underrepresentation of minorities in the top k% of ranks.

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import time

import os

import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from tqdm import tqdm

In [None]:
! pip install powerlaw

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting powerlaw
  Downloading powerlaw-1.5-py3-none-any.whl (24 kB)
Installing collected packages: powerlaw
Successfully installed powerlaw-1.5


In [None]:
from DPAH4 import DPAH4

In [None]:
def compute_inequity(g, k):
    """Compute the proportion of allies and minorities in the top k ranks of g"""
    node_pageranks = nx.pagerank(g)
    node_pageranks_sorted = sorted(node_pageranks.items(), key=lambda x: x[1], reverse=True)
    top_k = node_pageranks_sorted[:k]
    
    num_top_k_allies = 0
    num_top_k_minority = 0
    
    for (node_id, _) in top_k:
        if g.nodes[node_id]['m'] == 1: # is an ally:
            num_top_k_allies += 1
        elif g.nodes[node_id]['m'] == 2: # is minority
            num_top_k_minority += 1
    
    return num_top_k_allies / k, num_top_k_minority / k

In [None]:
# Number of nodes
N = 1000

# Top beta% of rankings to consider for inequity
beta = 0.05

# Top k nodes to consider for inequity. k = N * beta
k = int(N * beta)

# Fraction minority
# fm = 0.2

# Fraction allies
fa = 0

# homophily within majority group
h_MM = 0.8

# homophily within minority group
h_mm = 0.8

print(k)

50


We sweep over different values of allies/coverts with homophily parameters indicating that majorities accept allies/coverts, but neiether allies/coverts themselves nor the minorities accept them. **This is our scenario 1.**

In [None]:
results_allies = []

beta_list = [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4,
            0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 
            0.85, 0.9, 0.95, 1]


for fm in tqdm(np.linspace(0.05, 0.5, 10)):
  for fraction_allies in np.linspace(0, 0.5 - fm, 10):
      for i in range(10):
          g = DPAH4(
              N=N,
              fm=fm,
              fa=fraction_allies, # Fraction that are allies
              d=0.0015,
              plo_M=3,
              plo_m=3,
              h_MM=h_MM,
              h_mm=h_mm,
              acceptance = "maj",
              verbose=False,
              seed=i)
          
          for beta in beta_list:
              # Top k nodes to consider for inequity. k = N * beta
              k = int(N * beta)
              prop_top_k_ally, prop_top_k_minority = compute_inequity(g, k)
              results_allies.append((fm, fraction_allies, beta, prop_top_k_ally, prop_top_k_minority))


results_covert = []
for fm in tqdm(np.linspace(0.05, 0.5, 10)):
  for fraction_covert in np.linspace(0, fm, 10) :
      for i in range(10):
          g = DPAH4(
              N=N,
              fm=fm - fraction_covert,
              fa=fraction_covert, # "Allies" and "covert" are functionally the same
              d=0.0015,
              plo_M=3,
              plo_m=3,
              h_MM=h_MM,
              h_mm=h_mm,
              acceptance = "maj",
              verbose=False,
              seed=i)
        
               
          for beta in beta_list:
              # Top k nodes to consider for inequity. k = N * beta
              k = int(N * beta)
              prop_top_k_covert, prop_top_k_minority = compute_inequity(g, k)
              results_covert.append((fm, fraction_covert, beta, prop_top_k_covert, prop_top_k_minority))

          

  0%|          | 0/10 [00:04<?, ?it/s]


KeyboardInterrupt: ignored

In [None]:
results_covert = []
for fm in tqdm(np.linspace(0.05, 0.5, 10)):
  for fraction_covert in np.linspace(0, fm, 10) :
      for i in range(10):
          g = DPAH4(
              N=N,
              fm=fm - fraction_covert,
              fa=fraction_covert, # "Allies" and "covert" are functionally the same
              d=0.0015,
              plo_M=3,
              plo_m=3,
              h_MM=h_MM,
              h_mm=h_mm,
              acceptance = "maj",
              verbose=False,
              seed=i)
        
               
          for beta in beta_list:
              # Top k nodes to consider for inequity. k = N * beta
              k = int(N * beta)
              prop_top_k_covert, prop_top_k_minority = compute_inequity(g, k)
              results_covert.append((fm, fraction_covert, beta, prop_top_k_covert, prop_top_k_minority))


100%|██████████| 10/10 [1:44:38<00:00, 627.80s/it]


In [None]:
# df_results_allies = pd.DataFrame(results_allies, columns=['prop_min', 'prop_ally','beta' ,'prop_ally_top_k', 'prop_min_top_k'])
df_results_covert = pd.DataFrame(results_covert, columns=['prop_min', 'prop_covert', 'beta', 'prop_covert_top_k', 'prop_overt_top_k'])
df_results_covert['prop_covert_rescaled'] = df_results_covert['prop_covert'] / df_results_covert['prop_covert'].max()
df_results_covert['prop_all_minorities_top_k'] = df_results_covert['prop_covert_top_k'] + df_results_covert['prop_overt_top_k']

In [None]:
# df_results_allies.to_csv("allies_sweep_per_top_k_setup1.csv", index=False)
df_results_covert.to_csv("covert_sweep_per_top_k_setup1.csv", index=False)

Now, sweep over allies and covert for the setting where majorities accept allies/coverts, but allies/coverts also accept each other. **This is our scenario 2.**

In [None]:
results_allies = []

beta_list = [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4,
            0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 
            0.85, 0.9, 0.95, 1]


for fm in tqdm(np.linspace(0.05, 0.5, 10)):
  for fraction_allies in np.linspace(0, 0.5 - fm, 10):
      for i in range(10):
          g = DPAH4(
              N=N,
              fm=fm,
              fa=fraction_allies, # Fraction that are allies
              d=0.0015,
              plo_M=3,
              plo_m=3,
              h_MM=h_MM,
              h_mm=h_mm,
              acceptance = "maj/al",
              verbose=False,
              seed=i)
          
          for beta in beta_list:
              # Top k nodes to consider for inequity. k = N * beta
              k = int(N * beta)
              prop_top_k_ally, prop_top_k_minority = compute_inequity(g, k)
              results_allies.append((fm, fraction_allies, beta, prop_top_k_ally, prop_top_k_minority))


results_covert = []

for fm in tqdm(np.linspace(0.05, 0.5, 10)):
  for fraction_covert in np.linspace(0, fm, 10):
      for i in range(10):
          g = DPAH4(
              N=N,
              fm=fm - fraction_covert,
              fa=fraction_covert, # "Allies" and "covert" are functionally the same
              d=0.0015,
              plo_M=3,
              plo_m=3,
              h_MM=h_MM,
              h_mm=h_mm,
              acceptance = "maj/al",
              verbose=False,
              seed=i)
          
                
          for beta in beta_list:
              # Top k nodes to consider for inequity. k = N * beta
              k = int(N * beta)
              prop_top_k_covert, prop_top_k_minority = compute_inequity(g, k)
              results_covert.append((fm, fraction_covert, beta, prop_top_k_covert, prop_top_k_minority))

         

100%|██████████| 10/10 [08:14<00:00, 49.47s/it]
100%|██████████| 10/10 [08:14<00:00, 49.50s/it]


In [None]:
df_results_allies = pd.DataFrame(results_allies, columns=['prop_ally','beta' ,'prop_ally_top_k', 'prop_min_top_k'])
df_results_covert = pd.DataFrame(results_covert, columns=['prop_covert', 'beta', 'prop_covert_top_k', 'prop_overt_top_k'])
df_results_covert['prop_covert_rescaled'] = df_results_covert['prop_covert'] / df_results_covert['prop_covert'].max()
df_results_covert['prop_all_minorities_top_k'] = df_results_covert['prop_covert_top_k'] + df_results_covert['prop_overt_top_k']

In [None]:
df_results_allies.to_csv("allies_sweep_per_top_k_setup2.csv", index=False)
df_results_covert.to_csv("covert_sweep_per_top_k_setup2.csv", index=False)

Finally, sweep over the combinations where everyone accepts allies/coverts. **This is our scenario 3.**

In [None]:
results_allies = []

beta_list = [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4,
            0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 
            0.85, 0.9, 0.95, 1]


for fm in tqdm(np.linspace(0.05, 0.5, 10)):
  for fraction_allies in np.linspace(0, 0.5 - fm, 10):
      for i in range(10):
          g = DPAH4(
              N=N,
              fm=fm,
              fa=fraction_allies, # Fraction that are allies
              d=0.0015,
              plo_M=3,
              plo_m=3,
              h_MM=h_MM,
              h_mm=h_mm,
              acceptance = "all",
              verbose=False,
              seed=i)
          
          for beta in beta_list:
              # Top k nodes to consider for inequity. k = N * beta
              k = int(N * beta)
              prop_top_k_ally, prop_top_k_minority = compute_inequity(g, k)
              results_allies.append((fm, fraction_allies, beta, prop_top_k_ally, prop_top_k_minority))


results_covert = []

for fm in tqdm(np.linspace(0.05, 0.5, 10)):
  for fraction_covert in np.linspace(0, fm, 10):
      for i in range(10):
          g = DPAH4(
              N=N,
              fm=fm - fraction_covert,
              fa=fraction_covert, # "Allies" and "covert" are functionally the same
              d=0.0015,
              plo_M=3,
              plo_m=3,
              h_MM=h_MM,
              h_mm=h_mm,
              acceptance = "all",
              verbose=False,
              seed=i)
          
                
          for beta in beta_list:
              # Top k nodes to consider for inequity. k = N * beta
              k = int(N * beta)
              prop_top_k_covert, prop_top_k_minority = compute_inequity(g, k)
              results_covert.append((fm, fraction_covert, beta, prop_top_k_covert, prop_top_k_minority))

         

100%|██████████| 10/10 [08:16<00:00, 49.61s/it]
100%|██████████| 10/10 [08:16<00:00, 49.66s/it]


In [None]:
df_results_allies = pd.DataFrame(results_allies, columns=['prop_ally','beta' ,'prop_ally_top_k', 'prop_min_top_k'])
df_results_covert = pd.DataFrame(results_covert, columns=['prop_covert', 'beta', 'prop_covert_top_k', 'prop_overt_top_k'])
df_results_covert['prop_covert_rescaled'] = df_results_covert['prop_covert'] / df_results_covert['prop_covert'].max()
df_results_covert['prop_all_minorities_top_k'] = df_results_covert['prop_covert_top_k'] + df_results_covert['prop_overt_top_k']

In [None]:
df_results_allies.to_csv("allies_sweep_per_top_k_setup3.csv", index=False)
df_results_covert.to_csv("covert_sweep_per_top_k_setup3.csv", index=False)