In [1]:
import numpy as np
import pandas as pd
from equiflow import TableZero

In [2]:
np.random.seed(42)
n = 100
data = {
    'age': np.random.randint(18, 80, size=n),
    'race': np.random.choice(['White', 'Black', 'Asian', 'Hispanic', None],
                             size=n),
    'sex': np.random.choice(['Male', 'Female'], size=n),
    'english': np.random.choice(['Fluent', 'Limited'], size=n),
}

for i in range(1, 11):
    data[f'var{i}'] = np.random.randn(n)

df = pd.DataFrame(data)

In [3]:
data_0 = df.copy()
data_1 = data_0.loc[data_0.sex == "Female"]
data_2 = data_1.loc[data_1.english == "Fluent"]

t = TableZero(dfs = [data_0, data_1, data_2],
              cols = ['race','sex', 'english'],
              format = 'N (%)',
              missingness = False,
              )

t1 = t.view_cohorts()
t1

Unnamed: 0_level_0,Unnamed: 1_level_0,Cohort,Cohort,Cohort
Unnamed: 0_level_1,Unnamed: 1_level_1,0,1,2
Variable,Value,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Overall,,100,52,18
race,Black,13 (16.2),8 (20.5),3 (20.0)
race,Hispanic,22 (27.5),8 (20.5),4 (26.7)
race,White,25 (31.2),11 (28.2),5 (33.3)
race,Asian,20 (25.0),12 (30.8),3 (20.0)
sex,Female,52 (52.0),52 (100.0),18 (100.0)
sex,Male,48 (48.0),0 (0.0),0 (0.0)
english,Limited,58 (58.0),34 (65.4),0 (0.0)
english,Fluent,42 (42.0),18 (34.6),18 (100.0)


In [4]:
t1.sort_index(level=0, key=lambda x: x == 'Overall', ascending=False, sort_remaining=False)


Unnamed: 0_level_0,Unnamed: 1_level_0,Cohort,Cohort,Cohort
Unnamed: 0_level_1,Unnamed: 1_level_1,0,1,2
Variable,Value,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Overall,,100,52,18
race,Black,13 (16.2),8 (20.5),3 (20.0)
race,Hispanic,22 (27.5),8 (20.5),4 (26.7)
race,White,25 (31.2),11 (28.2),5 (33.3)
race,Asian,20 (25.0),12 (30.8),3 (20.0)
sex,Female,52 (52.0),52 (100.0),18 (100.0)
sex,Male,48 (48.0),0 (0.0),0 (0.0)
english,Limited,58 (58.0),34 (65.4),0 (0.0)
english,Fluent,42 (42.0),18 (34.6),18 (100.0)


In [5]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(t1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Cohort,Cohort,Cohort
Unnamed: 0_level_1,Unnamed: 1_level_1,0,1,2
Variable,Value,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Overall,,100,52,18
race,Black,13 (16.2),8 (20.5),3 (20.0)
race,Hispanic,22 (27.5),8 (20.5),4 (26.7)
race,White,25 (31.2),11 (28.2),5 (33.3)
race,Asian,20 (25.0),12 (30.8),3 (20.0)
sex,Female,52 (52.0),52 (100.0),18 (100.0)
sex,Male,48 (48.0),0 (0.0),0 (0.0)
english,Limited,58 (58.0),34 (65.4),0 (0.0)
english,Fluent,42 (42.0),18 (34.6),18 (100.0)


In [6]:
t1.loc[('race', 'Missing')]

KeyError: 'Missing'