In [1]:
import numpy as np
import pandas as pd
from equiflow import EquiFlow

In [2]:
np.random.seed(42)
n = 100000
data = {
    'age': np.random.randint(18, 80, size=n),
    'sofa': np.random.choice([0,1,2,3,4,5,6,7,8,9,10,15, np.nan], size=n),
    'race': np.random.choice(['White', 'Black', 'Asian', 'Hispanic', None],
                             size=n),
    'sex': np.random.choice(['Male', 'Female'], size=n),
    'english': np.random.choice(['Fluent', 'Limited', np.nan, None], size=n),
}

for i in range(1, 11):
    data[f'var{i}'] = np.random.randn(n)

df = pd.DataFrame(data)

In [3]:
data_0 = df.copy()
data_1 = data_0.loc[data_0.english.notnull()]
data_2 = data_1.loc[data_1.sofa.notnull()]

ef = EquiFlow(dfs = [data_0, data_1, data_2])



In [4]:
ef.table_flows(
    label_suffix=True,
    thousands_sep=True,
)

Cohort Flow,0 to 1,1 to 2
,,
"Inital, n",100000.0,50022.0
"Removed, n",49978.0,3874.0
"Result, n",50022.0,46148.0


In [10]:
t = ef.table_characteristics(
    categorical = ['race','sex', 'english'],
    nonnormal = ['sofa'],
    normal = ['age'],
    format_cat = 'N (%)',
    format_cont = 'Mean ± SD',
    missingness = True,
    decimals = 1,
    label_suffix = True,
    thousands_sep = False,
    rename={'race': 'Race and Ethnicity',
            'english': 'English Proficiency',
            'sex':'Sex',
            'sofa': 'SOFA',
            'age': 'Age',  
            }
)
display(t)

Unnamed: 0_level_0,Unnamed: 1_level_0,Cohort,Cohort,Cohort
Unnamed: 0_level_1,Unnamed: 1_level_1,0,1,2
Variable,Value,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Overall,,100000,50022,46148
"Race and Ethnicity, N (%)",Hispanic,20024 (20.0),10011 (20.0),9197 (19.9)
"Race and Ethnicity, N (%)",Asian,19952 (20.0),9918 (19.8),9145 (19.8)
"Race and Ethnicity, N (%)",Black,20066 (20.1),10088 (20.2),9323 (20.2)
"Race and Ethnicity, N (%)",White,19931 (19.9),9978 (19.9),9218 (20.0)
"Race and Ethnicity, N (%)",Missing,20027 (20.0),10027 (20.0),9265 (20.1)
"Sex, N (%)",Male,50052 (50.1),25049 (50.1),23089 (50.0)
"Sex, N (%)",Female,49948 (49.9),24973 (49.9),23059 (50.0)
"Sex, N (%)",Missing,0 (0.0),0 (0.0),0 (0.0)
"English Proficiency, N (%)",Fluent,25134 (25.1),25134 (50.2),23223 (50.3)


In [6]:
t.loc[('Sex, N (%)', 'Male'), ('Cohort',0)] 

'50052 (50.1)'

In [7]:
t.loc[('Sex, N (%)', 'Male'), ('Cohort',1)] 


'25049 (50.1)'

In [8]:
t = ef.table_characteristics(
    categorical = ['race','sex', 'english'],
    nonnormal = ['sofa'],
    normal = ['age'],
    format = 'N',
    missingness = True,
    decimals = 1,
    label_suffix = True,
    rename={'race': 'Race and Ethnicity',
            'english': 'English Proficiency',
            'sex':'Sex',
            'sofa': 'SOFA',
            'age': 'Age',  
            }
)
display(t)

TypeError: BaseTable.__init__() got an unexpected keyword argument 'format'

In [None]:
t.loc[('Sex, N', 'Male'), ('Cohort',0)]

'50,052'

In [None]:
prop1 = [0.501]
prop1[1:]

[]