In [1]:
import re, dispindiffs
import polars as pl
import pandas as pd
import numpy as np

In [2]:
enron_nlist = pl.read_csv("./dat/enron_nlist.csv")
id_to_role = dict(zip(enron_nlist["id"], enron_nlist["role"]))

In [3]:
role_categories = ["Executive Management", "Senior Management", "Managers", "Lawyers", "Traders", "Specialists", "Associates"]
role_to_hierarchy = {}
for x in id_to_role:
    role = id_to_role[x]
    if role!="NA":
        role_wo_dept = role.split(",")[0]
        if role_wo_dept in ["CEO", "President"]:
            role_to_hierarchy[role] = "Executive Management"
        elif (role_wo_dept in ["Vice President", "Director", "Managing Director"]) or ("Chief Operating Officer" in role) or ("Government Relation Executive" in role):
            role_to_hierarchy[role] = "Senior Management"
        elif role_wo_dept in ["In House Lawyer"]:
            role_to_hierarchy[role] = "Lawyers"
        elif role_wo_dept=="Manager":
            role_to_hierarchy[role] = "Managers"
        elif role_wo_dept=="Trader":
            role_to_hierarchy[role] = "Traders"
        elif ("Specialist" in role) or ("Analyst" in role):
            role_to_hierarchy[role] = "Specialists"
        else:
            role_to_hierarchy[role] = "Associates"
    else:
        role_to_hierarchy[role] = "NA"

In [4]:
enron_elist = pl.read_csv("./dat/enron_elist.csv")
enron_elist = enron_elist.filter(pl.col("source")!=pl.col("target"))
N_T = len(set(enron_elist["source"]) | set(enron_elist["target"]))
E_T = len(enron_elist)
print(N_T, E_T)

182 2829


In [5]:
Enron = dispindiffs.DisparityInDifferences(enron_elist)

In [6]:
Enron.calc_disp()
Enron.calc_disp_in_diffs()

Merging bilateral relations
Generating pre-sampled values from beta distributions
Calculating statistical significance
Done


### Disparity Filter

In [7]:
n_nodes_edges_by_th = []
for th in [10**(-k) for k in np.arange(20, -0.1, -0.25)]:
    bb, th, N, E = Enron.extr_disp_backbone(th=th)    
    n_nodes_edges_by_th.append((th, N, E))
pd.DataFrame(n_nodes_edges_by_th, columns=["th", "n_nodes", "n_edges"]).to_csv("./outputs/enron_disp_info_by_th.csv", index=False)

In [8]:
disp_backbone, _, _, _ = Enron.extr_disp_backbone(th=0.01)
disp_backbone = disp_backbone.with_columns(
    pl.col("source").replace_strict(id_to_role, default=None).alias("source_role"),
    pl.col("target").replace_strict(id_to_role, default=None).alias("target_role"),
)
disp_backbone = disp_backbone.with_columns(
    pl.col("source_role").replace_strict(role_to_hierarchy, default=None).alias("source_cat"),
    pl.col("target_role").replace_strict(role_to_hierarchy, default=None).alias("target_cat")
)

disp_backbone = disp_backbone.filter(pl.col("source_cat").is_not_null(), pl.col("target_cat").is_not_null()).to_pandas()
disp_backbone["source_cat"] = pd.Categorical(disp_backbone["source_cat"], categories=role_categories)
disp_backbone["target_cat"] = pd.Categorical(disp_backbone["target_cat"], categories=role_categories)
tab = pd.crosstab(disp_backbone["source_cat"], disp_backbone["target_cat"])
tab = tab.reindex(index=role_categories, columns=role_categories, fill_value=0)
tab

target_cat,Executive Management,Senior Management,Managers,Lawyers,Traders,Specialists,Associates
source_cat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Executive Management,5,3,0,0,0,0,0
Senior Management,8,16,1,0,0,0,4
Managers,2,1,1,0,1,0,0
Lawyers,0,0,0,0,0,0,0
Traders,0,0,0,0,0,0,1
Specialists,0,0,0,0,1,0,0
Associates,1,2,0,0,1,0,1


### Disparity-in-Differences

In [9]:
n_nodes_edges_by_th = []
for th in [10**(-k) for k in np.arange(20, -0.1, -0.25)]:
    bb, th, N, E = Enron.extr_disp_in_diffs_backbone(th=th)    
    n_nodes_edges_by_th.append((th, N, E))
pd.DataFrame(n_nodes_edges_by_th, columns=["th", "n_nodes", "n_edges"]).to_csv("./outputs/enron_disp_in_diffs_info_by_th.csv", index=False)

In [10]:
disp_in_diffs_backbone, _, _, _  = Enron.extr_disp_in_diffs_backbone(th=0.01)    
disp_in_diffs_backbone = disp_in_diffs_backbone.with_columns(
    pl.col("source").replace_strict(id_to_role, default=None).alias("source_role"),
    pl.col("target").replace_strict(id_to_role, default=None).alias("target_role")
)
disp_in_diffs_backbone = disp_in_diffs_backbone.with_columns(
    pl.col("source_role").replace_strict(role_to_hierarchy, default=None).alias("source_cat"),
    pl.col("target_role").replace_strict(role_to_hierarchy, default=None).alias("target_cat")
)

disp_in_diffs_backbone = disp_in_diffs_backbone.filter(pl.col("source_cat").is_not_null(), pl.col("target_cat").is_not_null()).to_pandas()
disp_in_diffs_backbone["source_cat"] = pd.Categorical(disp_in_diffs_backbone["source_cat"], categories=role_categories)
disp_in_diffs_backbone["target_cat"] = pd.Categorical(disp_in_diffs_backbone["target_cat"], categories=role_categories)
tab = pd.crosstab(disp_in_diffs_backbone["source_cat"], disp_in_diffs_backbone["target_cat"])
tab = tab.reindex(index=role_categories, columns=role_categories, fill_value=0)
tab

target_cat,Executive Management,Senior Management,Managers,Lawyers,Traders,Specialists,Associates
source_cat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Executive Management,1,0,0,0,0,0,0
Senior Management,5,8,0,0,0,0,3
Managers,0,2,1,0,2,0,0
Lawyers,0,0,0,0,0,0,0
Traders,0,0,0,0,0,0,1
Specialists,0,0,0,0,0,0,0
Associates,1,1,0,0,0,0,3
